From 2278964d88734994de48345561b30b2ab29aa1f9 Mon Sep 17 00:00:00 2001
From: Pascal Jahan Elahi <pascaljelahi@gmail.com>
Date: Thu, 30 Jul 2020 09:20:02 +0800
Subject: [PATCH 01/13] Initial update to HDF5 C API

---
 CMakeLists.txt   |  35 ++-
 Converter.cc     |  48 ++--
 Converter.h      |  11 +-
 FastConverter.cc |   4 +-
 HDF5Wrapper.cc   | 599 +++++++++++++++++++++++++++++++++++++++++++++++
 HDF5Wrapper.h    | 128 ++++++++++
 MipMap.cc        |  24 +-
 MipMap.h         |   6 +-
 SlowConverter.cc |   6 +-
 Stats.cc         |  53 +++--
 Stats.h          |  28 ++-
 Util.cc          | 184 +++++++--------
 Util.h           |  24 +-
 13 files changed, 969 insertions(+), 181 deletions(-)
 create mode 100644 HDF5Wrapper.cc
 create mode 100644 HDF5Wrapper.h
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 409fe63..074eee4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,9 +1,32 @@
-cmake_minimum_required(VERSION 2.8)
+cmake_minimum_required(VERSION 3.13)
 project(hdf_convert)
 
 set(default_build_type "Release")
 
 set(CMAKE_CXX_STANDARD 11)
+
+# macro(find_hdf5)
+# 	find_package(HDF5 REQUIRED COMPONENTS C)
+# 	if (HDF5_FOUND)
+# 		include_directories(${HDF5_INCLUDE_DIRS})
+# 		if (HDF5_IS_PARALLEL HDF5_CONVERTER_ALLOWPARALLELHDF5)
+# 			set (ENV{HDF5_PREFER_PARALLEL} true)
+# 			set(HDF5_CONVERTER_HAS_PARALLEL_HDF5 Yes)
+# 			list(APPEND HDF5_CONVERTER_DEFINES USEPARALLELHDF)
+# 			if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDF5_CONVERTER_ALLOWCOMPRESSIONPARALLELHDF5)
+# 				set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes)
+# 				list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION)
+# 				list(APPEND HDF5_CONVERTER_DEFINES PARALLELCOMPRESSIONACTIVE)
+# 			endif()
+# 		else()
+# 			if (HDF5_CONVERTER_ALLOWCOMPRESSIONHDF5)
+# 				set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes)
+# 				list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION)
+# 			endif()
+# 		endif()
+#     endif()
+# endmacro()
+
 FIND_PACKAGE(HDF5 REQUIRED COMPONENTS CXX)
 INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR})
 find_package(OpenMP)
@@ -26,13 +49,15 @@ set(LINK_LIBS ${LINK_LIBS} cfitsio ${HDF5_LIBRARIES})
 
 set(SOURCE_FILES
     ${SOURCE_FILES}
-    main.cc
-    Stats.cc
-    MipMap.cc
+    HDF5Wrapper.cc
     Converter.cc
     FastConverter.cc
     SlowConverter.cc
-    Util.cc)
+    Stats.cc
+    MipMap.cc
+    Util.cc
+    main.cc
+    )
 
 add_executable(hdf_convert ${SOURCE_FILES})
 target_link_libraries(hdf_convert ${LINK_LIBS})
diff --git a/Converter.cc b/Converter.cc
index 9fca065..587c0ec 100644
--- a/Converter.cc
+++ b/Converter.cc
@@ -43,7 +43,7 @@ Converter::Converter(std::string inputFileName, std::string outputFileName, bool
 
 Converter::~Converter() {
     // TODO this is probably unnecessary; the file object destructor should close the file properly.
-    outputFile.close();
+    HDF5outputFile.close();
 }
 
 std::unique_ptr<Converter> Converter::getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress) {
@@ -67,28 +67,34 @@ void Converter::convert() {
     
     // TODO dataset variables should be local and passed into the copy function?
     
-    outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC);
-    outputGroup = outputFile.createGroup("0");
+//     outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC);
+//     outputGroup = outputFile.createGroup("0");
+
+    H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC);
+    hid_t gid = H5outputfile.create_group("0");
     
     std::vector<hsize_t> chunkDims;
     if (useChunks(standardDims)) {
         chunkDims = tileDims;
     }
     
-    H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
-    floatType.setOrder(H5T_ORDER_LE);
-    createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims);
+//     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
+//     floatType.setOrder(H5T_ORDER_LE);
+//     createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims);
+//????
+
+    
     
-    statsXY.createDatasets(outputGroup, "XY");
+//     statsXY.createDatasets(outputGroup, "XY");
 
     if (depth > 1) {
-        statsXYZ.createDatasets(outputGroup, "XYZ");
-        statsZ.createDatasets(outputGroup, "Z");
+//         statsXYZ.createDatasets(outputGroup, "XYZ");
+//         statsZ.createDatasets(outputGroup, "Z");
         
-        auto swizzledGroup = outputGroup.createGroup("SwizzledData");
+//         auto swizzledGroup = outputGroup.createGroup("SwizzledData");
         // We use this name in papers because it sounds more serious. :)
-        outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData");
-        createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims);
+//         outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData");
+//         createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims);
     }
     
     mipMaps.createDatasets(outputGroup);
@@ -97,9 +103,9 @@ void Converter::convert() {
     
     TIMER(timer.start("Headers"););
     
-    writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
-    writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
-    writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
+//     writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
+//     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
+//     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
 
     int numAttributes;
     readFitsHeader(inputFilePtr, numAttributes);
@@ -123,16 +129,16 @@ void Converter::convert() {
                     // STRING
                     std::string attributeValueStr;
                     readFitsStringAttribute(inputFilePtr, attributeName, attributeValueStr);
-                    writeHdf5Attribute(outputGroup, attributeName, attributeValueStr);
+//                     writeHdf5Attribute(outputGroup, attributeName, attributeValueStr);
                 } else if (attributeValue == "T" || attributeValue == "F") {
                     // BOOLEAN
                     bool attributeValueBool = (attributeValue == "T");
-                    writeHdf5Attribute(outputGroup, attributeName, attributeValueBool);
+//                     writeHdf5Attribute(outputGroup, attributeName, attributeValueBool);
                 } else if (attributeValue.find('.') != std::string::npos) {
                     // TRY TO PARSE AS DOUBLE
                     try {
                         double attributeValueDouble = std::stod(attributeValue);
-                        writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
+//                         writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
                     } catch (const std::invalid_argument& ia) {
                         std::cout << "Warning: could not parse attribute '" << attributeName << "' as a float." << std::endl;
                         parsingFailure = true;
@@ -140,7 +146,7 @@ void Converter::convert() {
                         // Special handling for subnormal numbers
                         long double attributeValueLongDouble = std::stold(attributeValue);
                         double attributeValueDouble = (double) attributeValueLongDouble;
-                        writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
+//                         writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
                         
                         std::ostringstream ostream;
                         ostream.precision(13);
@@ -158,7 +164,7 @@ void Converter::convert() {
                     // TRY TO PARSE AS INTEGER
                     try {
                         int64_t attributeValueInt = std::stoi(attributeValue);
-                        writeHdf5Attribute(outputGroup, attributeName, attributeValueInt);
+//                         writeHdf5Attribute(outputGroup, attributeName, attributeValueInt);
                     } catch (const std::invalid_argument& ia) {
                         std::cout << "Warning: could not parse attribute '" << attributeName << "' as an integer." << std::endl;
                         parsingFailure = true;
@@ -167,7 +173,7 @@ void Converter::convert() {
                 
                 if (parsingFailure) {
                     // FALL BACK TO STRING
-                    writeHdf5Attribute(outputGroup, attributeName, attributeValue);
+//                     writeHdf5Attribute(outputGroup, attributeName, attributeValue);
                 }
             }
         }
diff --git a/Converter.h b/Converter.h
index 223d22e..e0f36e9 100644
--- a/Converter.h
+++ b/Converter.h
@@ -6,6 +6,7 @@
 #include "MipMap.h"
 #include "Timer.h"
 #include "Util.h"
+#include "HDF5Wrapper.h"
 
 class Converter {
 public:
@@ -28,10 +29,12 @@ class Converter {
     fitsfile* inputFilePtr;
     
     // Main HDF5 objects
-    H5::H5File outputFile;
-    H5::Group outputGroup;
-    H5::DataSet standardDataSet;
-    H5::DataSet swizzledDataSet;
+//     H5::H5File outputFile;
+//     H5::Group outputGroup;
+//     H5::DataSet standardDataSet;
+//     H5::DataSet swizzledDataSet;
+
+    H5OutputFile H5outputfile;
     
     float* standardCube;
     float* rotatedCube;
diff --git a/FastConverter.cc b/FastConverter.cc
index b32f1ed..ecf4af7 100644
--- a/FastConverter.cc
+++ b/FastConverter.cc
@@ -253,14 +253,14 @@ void FastConverter::copyAndCalculate() {
         std::vector<hsize_t> memDims = {depth, height, width};
         std::vector<hsize_t> count = trimAxes({1, depth, height, width}, N);
         std::vector<hsize_t> start = trimAxes({currentStokes, 0, 0, 0}, N);
-        writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
+//         writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
         
         if (depth > 1) {
             // This all technically worked if we reused the standard filespace and memspace
             // But it's probably not a good idea to rely on two incorrect values cancelling each other out
             std::vector<hsize_t> swizzledCount = trimAxes({1, width, height, depth}, N);
             std::vector<hsize_t> swizzledMemDims = {width, height, depth};
-            writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start);
+//             writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start);
         }
 
         // After writing and before mipmaps, we free the swizzled memory. We allocate it again next Stokes.
diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
new file mode 100644
index 0000000..2e5bc7a
--- /dev/null
+++ b/HDF5Wrapper.cc
@@ -0,0 +1,599 @@
+#include "HDF5Wrapper.h" 
+
+void H5OutputFile::create(std::string filename, hid_t flag,
+    int taskID, bool iparallelopen)
+{
+    if(file_id >= 0) io_error("Attempted to create file when already open!");
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+    if (iparallelopen && taskID ==-1) {
+        parallel_access_id = H5Pcreate (H5P_FILE_ACCESS);
+        if (parallel_access_id < 0) io_error("Parallel access creation failed");
+        herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info);
+        if (ret < 0) io_error("Parallel access failed");
+        // create the file collectively
+        file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, parallel_access_id);
+        if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename);
+        ret = H5Pclose(parallel_access_id);
+        if (ret < 0) io_error("Parallel release failed");
+        parallel_access_id = -1;
+    }
+    else {
+        if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID));
+        if (ThisWriteTask == taskID) {
+            file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT);
+            if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename);
+            parallel_access_id = -1;
+        }
+        else {
+            parallel_access_id = -2;
+        }
+        MPI_Barrier(comm);
+    }
+#else
+    file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT);
+    if(file_id < 0)io_error(std::string("Failed to create output file: ")+filename);
+#endif
+
+}
+
+void H5OutputFile::append(std::string filename, hid_t flag,
+    int taskID, bool iparallelopen)
+{
+    if(file_id >= 0)io_error("Attempted to open and append to file when already open!");
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+    if (iparallelopen && taskID ==-1) {
+        parallel_access_id = H5Pcreate (H5P_FILE_ACCESS);
+        if (parallel_access_id < 0) io_error("Parallel access creation failed");
+        herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info);
+        if (ret < 0) io_error("Parallel access failed");
+        // create the file collectively
+        file_id = H5Fopen(filename.c_str(), flag, parallel_access_id);
+        if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename);
+        ret = H5Pclose(parallel_access_id);
+        if (ret < 0) io_error("Parallel release failed");
+        parallel_access_id = -1;
+    }
+    else {
+        if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID));
+        if (ThisWriteTask == taskID) {
+            file_id = H5Fopen(filename.c_str(),flag, H5P_DEFAULT);
+            if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename);
+            parallel_access_id = -1;
+        }
+        else {
+            parallel_access_id = -2;
+        }
+        MPI_Barrier(comm);
+    }
+#else
+    file_id = H5Fopen(filename.c_str(), flag, H5P_DEFAULT);
+    if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename);
+#endif
+}
+
+// Close the file
+void H5OutputFile::close()
+{
+#ifdef USEPARALLELHDF
+    if(file_id < 0 && parallel_access_id == -1) io_error("Attempted to close file which is not open!");
+    if (parallel_access_id == -1) H5Fclose(file_id);
+#else
+    if(file_id < 0) io_error("Attempted to close file which is not open!");
+    H5Fclose(file_id);
+#endif
+    file_id = -1;
+#ifdef USEPARALLELHDF
+    parallel_access_id = -1;
+#endif
+}
+
+/// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
+/// the input data if not explicitly specified with the filetype_id parameter.
+template <typename T> void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data,
+    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_hyperslab, bool flag_collective)
+{
+    int rank = 1;
+    hsize_t dims[1] = {len};
+    if (memtype_id == -1) memtype_id = hdf5_type(T{});
+    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
+}
+
+void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective)
+{
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    int rank = 1;
+    hsize_t dims[1] = {len};
+
+    hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist;
+    herr_t status, ret;
+    memtype_id = H5Tcopy (H5T_C_S1);
+    status = H5Tset_size (memtype_id, data.size());
+    filetype_id = H5Tcopy (H5T_C_S1);
+    status = H5Tset_size (filetype_id, data.size());
+
+    // Create the dataspace
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        // set up the collective transfer properties list
+        xfer_plist = H5Pcreate(H5P_DATASET_XFER);
+        if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
+        if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+        else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT);
+        if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
+        // the result of above should be that all processors write to the same
+        // point of the hdf file.
+    }
+#endif
+    // Write the data
+    if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0)
+    io_error(std::string("Failed to write dataset: ")+name);
+
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+#ifdef USEPARALLELHDF
+    if (flag_parallel) H5Pclose(xfer_plist);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+}
+void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data,
+    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
+{
+    int rank = 1;
+    hsize_t dims[1] = {len};
+    if (memtype_id == -1) {
+        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    }
+    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
+}
+
+
+/// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+/// the input data if not explicitly specified with the filetype_id parameter.
+template <typename T> void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel,
+    bool flag_hyperslab, bool flag_collective)
+{
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+    std::vector<hsize_t> chunks(rank);
+
+    // Get HDF5 data type of the array in memory
+    if (memtype_id == -1) memtype_id = hdf5_type(T{});
+
+    // Determine type of the dataset to create
+    if(filetype_id < 0) filetype_id = memtype_id;
+
+#ifdef USEPARALLELHDF
+    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+    if (flag_parallel) {
+        //if parallel hdf5 get the full extent of the data
+        //this bit of code communicating information can probably be done elsewhere
+        //minimize number of mpi communications
+        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
+        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
+        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
+        for (auto i=0;i<rank;i++) {
+            dims_offset[i] = 0;
+            if (flag_first_dim_parallel && i > 0) continue;
+            for (auto j=1;j<=ThisWriteTask;j++) {
+                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
+            }
+        }
+        if (flag_first_dim_parallel && rank > 1) {
+            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
+        }
+    }
+#endif
+
+    // Determine if going to compress data in chunks
+    // Only chunk non-zero size datasets
+    int nonzero_size = 1;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
+        }
+        else {
+            if(dims[i]==0) nonzero_size = 0;
+        }
+#else
+        if(dims[i]==0) nonzero_size = 0;
+#endif
+    }
+    // Only chunk datasets where we would have >1 chunk
+    int large_dataset = 0;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+        else {
+            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+#else
+        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+#endif
+    }
+    if(nonzero_size && large_dataset)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        }
+#else
+        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+#endif
+    }
+
+    // Create the dataspace
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        //then all threads create the same simple data space
+        //so the meta information is the same
+        if (flag_hyperslab) {
+            //allocate the space spanning the file
+            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
+            //allocate the memory space
+            //allocate the memory space
+            memspace_id = H5Screate_simple(rank, dims, NULL);
+        }
+        else {
+            dspace_id = H5Screate_simple(rank, dims, NULL);
+            memspace_id = dspace_id;
+        }
+    }
+    else {
+        dspace_id = H5Screate_simple(rank, dims, NULL);
+        memspace_id = dspace_id;
+    }
+#else
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
+#endif
+
+    // Dataset creation properties
+    prop_id = H5P_DEFAULT;
+#ifdef USEHDFCOMPRESSION
+    // this defines compression
+    if(nonzero_size && large_dataset)
+    {
+        prop_id = H5Pcreate(H5P_DATASET_CREATE);
+        H5Pset_layout(prop_id, H5D_CHUNKED);
+        H5Pset_chunk(prop_id, rank, chunks.data());
+        H5Pset_deflate(prop_id, HDFDEFLATE);
+    }
+#endif
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+    H5Pclose(prop_id);
+
+    prop_id = H5P_DEFAULT;
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        // set up the collective transfer properties list
+        prop_id = H5Pcreate(H5P_DATASET_XFER);
+        //if all tasks are participating in the writes
+        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
+        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
+        if (flag_hyperslab) {
+            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
+            if (dims[0] == 0) {
+                H5Sselect_none(dspace_id);
+                H5Sselect_none(memspace_id);
+            }
+
+        }
+        if (mpi_hdf_dims_tot[0] > 0) {
+            // Write the data
+            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        }
+    }
+    else if (dims[0] > 0)
+    {
+        // Write the data
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+
+#else
+    // Write the data
+    if (dims[0] > 0) {
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+#endif
+
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+    H5Pclose(prop_id);
+#ifdef USEPARALLELHDF
+    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+}
+void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel,
+    bool flag_hyperslab, bool flag_collective)
+{
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+    std::vector<hsize_t> chunks(rank);
+    // Get HDF5 data type of the array in memory
+    if (memtype_id == -1) {
+        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    }
+    // Determine type of the dataset to create
+    if(filetype_id < 0) filetype_id = memtype_id;
+
+#ifdef USEPARALLELHDF
+    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+    //if parallel hdf5 get the full extent of the data
+    //this bit of code communicating information can probably be done elsewhere
+    //minimize number of mpi communications
+    if (flag_parallel) {
+        //if parallel hdf5 get the full extent of the data
+        //this bit of code communicating information can probably be done elsewhere
+        //minimize number of mpi communications
+        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
+        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
+        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
+        for (auto i=0;i<rank;i++) {
+            dims_offset[i] = 0;
+            if (flag_first_dim_parallel && i > 0) continue;
+            for (auto j=1;j<=ThisWriteTask;j++) {
+                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
+            }
+        }
+        if (flag_first_dim_parallel && rank > 1) {
+            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
+        }
+    }
+#endif
+
+    // Determine if going to compress data in chunks
+    // Only chunk non-zero size datasets
+    int nonzero_size = 1;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
+        }
+        else {
+            if(dims[i]==0) nonzero_size = 0;
+        }
+#else
+        if(dims[i]==0) nonzero_size = 0;
+#endif
+    }
+    // Only chunk datasets where we would have >1 chunk
+    unsigned int large_dataset = 0;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+        else {
+            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+#else
+        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+#endif
+    }
+    if(nonzero_size && large_dataset)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        }
+#else
+        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+#endif
+    }
+
+    // Create the dataspace
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        //then all threads create the same simple data space
+        //so the meta information is the same
+        if (flag_hyperslab) {
+            //allocate the space spanning the file
+            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
+            //allocate the memory space
+            memspace_id = H5Screate_simple(rank, dims, NULL);
+        }
+        else {
+            dspace_id = H5Screate_simple(rank, dims, NULL);
+            memspace_id = dspace_id;
+        }
+    }
+    else {
+        dspace_id = H5Screate_simple(rank, dims, NULL);
+        memspace_id = dspace_id;
+    }
+#else
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
+#endif
+
+    // Dataset creation properties
+    prop_id = H5P_DEFAULT;
+#ifdef USEHDFCOMPRESSION
+    // this defines compression
+    if(nonzero_size && large_dataset)
+    {
+        prop_id = H5Pcreate(H5P_DATASET_CREATE);
+        H5Pset_layout(prop_id, H5D_CHUNKED);
+        H5Pset_chunk(prop_id, rank, chunks.data());
+        H5Pset_deflate(prop_id, HDFDEFLATE);
+    }
+#endif
+
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+    H5Pclose(prop_id);
+
+    prop_id = H5P_DEFAULT;
+
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        // set up the collective transfer properties list
+        prop_id = H5Pcreate(H5P_DATASET_XFER);
+        //if all tasks are participating in the writes
+        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
+        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
+        if (flag_hyperslab) {
+            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
+            if (dims[0] == 0) {
+                H5Sselect_none(dspace_id);
+                H5Sselect_none(memspace_id);
+            }
+
+        }
+        if (mpi_hdf_dims_tot[0] > 0) {
+            // Write the data
+            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        }
+    }
+    else if (dims[0] > 0)
+    {
+        // Write the data
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+
+#else
+    // Write the data
+    if (dims[0] > 0) {
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+#endif
+
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+    H5Pclose(prop_id);
+#ifdef USEPARALLELHDF
+    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+}
+
+/// write an attribute
+template <typename T> void H5OutputFile::write_attribute(std::string parent, std::string name, std::vector<T> data)
+{
+    // Get HDF5 data type of the value to write
+    hid_t dtype_id = hdf5_type(data[0]);
+    hsize_t size = data.size();
+
+    // Open the parent object
+    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+    // Create dataspace
+    hid_t dspace_id = H5Screate(H5S_SIMPLE);
+    hid_t dspace_extent  = H5Sset_extent_simple(dspace_id, 1, &size, NULL);
+
+    // Create attribute
+    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+    // Write the attribute
+    if(H5Awrite(attr_id, dtype_id, data.data()) < 0)
+    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+    // Clean up
+    H5Aclose(attr_id);
+    H5Sclose(dspace_id);
+    H5Oclose(parent_id);
+}
+
+template <typename T> void H5OutputFile::write_attribute(std::string parent, std::string name, T data)
+{
+    // Get HDF5 data type of the value to write
+    hid_t dtype_id = hdf5_type(data);
+
+    // Open the parent object
+    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+    // Create dataspace
+    hid_t dspace_id = H5Screate(H5S_SCALAR);
+
+    // Create attribute
+    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+    // Write the attribute
+    if(H5Awrite(attr_id, dtype_id, &data) < 0)
+    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+    // Clean up
+    H5Aclose(attr_id);
+    H5Sclose(dspace_id);
+    H5Oclose(parent_id);
+}
+
+void H5OutputFile::write_attribute(std::string parent, std::string name, std::string data)
+{
+    // Get HDF5 data type of the value to write
+    hid_t dtype_id = H5Tcopy(H5T_C_S1);
+    if (data.size() == 0) data=" ";
+    H5Tset_size(dtype_id, data.size());
+    H5Tset_strpad(dtype_id, H5T_STR_NULLTERM);
+
+    // Open the parent object
+    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+    // Create dataspace
+    hid_t dspace_id = H5Screate(H5S_SCALAR);
+
+    // Create attribute
+    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+    // Write the attribute
+    if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0)
+    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+    // Clean up
+    H5Aclose(attr_id);
+    H5Sclose(dspace_id);
+    H5Oclose(parent_id);
+}
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
new file mode 100644
index 0000000..1cd5d3f
--- /dev/null
+++ b/HDF5Wrapper.h
@@ -0,0 +1,128 @@
+#ifndef __HDF5WRAPPER_H
+#define __HDF5WRAPPER_H
+
+#include "common.h"
+
+#ifdef USEMPI
+#include <mpi.h>
+#endif 
+
+// Overloaded function to return HDF5 type given a C type
+static inline hid_t hdf5_type(float dummy)              {return H5T_NATIVE_FLOAT;}
+static inline hid_t hdf5_type(double dummy)             {return H5T_NATIVE_DOUBLE;}
+static inline hid_t hdf5_type(short dummy)              {return H5T_NATIVE_SHORT;}
+static inline hid_t hdf5_type(int dummy)                {return H5T_NATIVE_INT;}
+static inline hid_t hdf5_type(long dummy)               {return H5T_NATIVE_LONG;}
+static inline hid_t hdf5_type(long long dummy)          {return H5T_NATIVE_LLONG;}
+static inline hid_t hdf5_type(unsigned short dummy)     {return H5T_NATIVE_USHORT;}
+static inline hid_t hdf5_type(unsigned int dummy)       {return H5T_NATIVE_UINT;}
+static inline hid_t hdf5_type(unsigned long dummy)      {return H5T_NATIVE_ULONG;}
+static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;}
+static inline hid_t hdf5_type(std::string dummy)        {return H5T_C_S1;}
+static inline hid_t hdf5_type_from_string(std::string dummy) 
+{
+    if (dummy == std::string("float32")) return H5T_NATIVE_FLOAT;
+    else if (dummy == std::string("float64")) return H5T_NATIVE_DOUBLE;
+    else if (dummy == std::string("float64")) return H5T_NATIVE_INT;
+    else if (dummy == std::string("int64")) return H5T_NATIVE_LLONG;
+    else return H5T_C_S1;
+}
+
+///\name HDF class to manage writing information
+class H5OutputFile
+{
+
+private:
+
+    hid_t file_id;
+#ifdef USEPARALLELHDF
+    hid_t parallel_access_id;
+#endif
+
+protected:
+
+    // Called if a HDF5 call fails (might need to MPI_Abort)
+    void io_error(std::string message) {
+        std::cerr << message << std::endl;
+#ifdef USEMPI
+        MPI_Abort(MPI_COMM_WORLD, 1);
+#endif
+        abort();
+    }
+    unsigned int HDFOUTPUTCHUNKSIZE = 8192;
+public:
+
+    // Constructor
+    H5OutputFile() {
+        file_id = -1;
+#ifdef USEPARALLELHDF
+        parallel_access_id = -1;
+#endif
+    }
+
+    // Create a new file
+    void create(std::string filename, hid_t flag = H5F_ACC_TRUNC,
+        int taskID = -1, bool iparallelopen = true);
+
+    void append(std::string filename, hid_t flag = H5F_ACC_RDWR,
+        int taskID = -1, bool iparallelopen = true);
+
+    // Close the file
+    void close();
+
+    hid_t create_group(std::string groupname) {
+        hid_t group_id = H5Gcreate(file_id, groupname.c_str(),
+            H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        return group_id;
+    }
+    herr_t close_group(hid_t gid) {
+        herr_t status = H5Gclose(gid);
+        return status;
+    }
+
+    //????
+    hid_t create_dataset(std::string dsetname, hid_t file) {
+        hid_t dset_id H5Dcreate(file_id, datasetname, -1, -1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        hid_t group_id = H5Dcreate(file_id, groupname.c_str(),
+            H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        return group_id;
+    }
+    herr_t close_dataset(hid_t did) {
+        herr_t status = H5Gclose(gid);
+        return status;
+    }
+
+    // Destructor closes the file if it's open
+    ~H5OutputFile()
+    {
+        if(file_id >= 0) close();
+    }
+
+    /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
+       hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
+    void write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel = true, bool flag_collective = true);
+    void write_dataset(std::string name, hsize_t len, void *data,
+       hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
+
+    /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
+    void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
+
+    /// write an attribute
+    template <typename T> void write_attribute(std::string parent, std::string name, std::vector<T> data);
+
+    template <typename T> void write_attribute(std::string parent, std::string name, T data);
+
+    void write_attribute(std::string parent, std::string name, std::string data);
+};
+
+#endif
diff --git a/MipMap.cc b/MipMap.cc
index fea92bc..35ffb5c 100644
--- a/MipMap.cc
+++ b/MipMap.cc
@@ -18,11 +18,13 @@ void MipMap::createDataset(H5::Group group, const std::vector<hsize_t>& chunkDim
     std::ostringstream mipMapName;
     mipMapName << "MipMaps/DATA/DATA_XY_" << mip;
     
-    if (useChunks(datasetDims)) {
-        createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims);
-    } else {
-        createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims);
-    }
+//     if (useChunks(datasetDims)) {
+//         createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims);
+//     } else {
+//         createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims);
+//     }
+//???
+    
 }
 
 void MipMap::createBuffers(std::vector<hsize_t>& bufferDims) {
@@ -88,11 +90,17 @@ hsize_t MipMaps::size(const std::vector<hsize_t>& standardDims, const std::vecto
     return size;
 }
 
-void MipMaps::createDatasets(H5::Group group) {
+// void MipMaps::createDatasets(H5::Group group) {
+//     for (auto& mipMap : mipMaps) {
+//         mipMap.createDataset(group, chunkDims);
+//     }
+//     
+// }
+
+void MipMaps::createDatasets(H5OutputFile H5outputfile , hid_t gid) {
     for (auto& mipMap : mipMaps) {
-        mipMap.createDataset(group, chunkDims);
+        mipMap.createDataset(H5outputfile, gid, chunkDims);
     }
-    
 }
 
 void MipMaps::createBuffers(const std::vector<hsize_t>& standardBufferDims) {
diff --git a/MipMap.h b/MipMap.h
index ee2c7a8..5716c94 100644
--- a/MipMap.h
+++ b/MipMap.h
@@ -10,7 +10,8 @@ struct MipMap {
     MipMap(const std::vector<hsize_t>& datasetDims, int mip);
     ~MipMap();
     
-    void createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims);
+//     void createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims);
+    void createDataset(H5Outputfile h5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims);
     void createBuffers(std::vector<hsize_t>& bufferDims);
     
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
@@ -57,7 +58,8 @@ struct MipMaps {
     // We need the dataset dimensions to work out how many mipmaps we have
     static hsize_t size(const std::vector<hsize_t>& standardDims, const std::vector<hsize_t>& standardBufferDims);
     
-    void createDatasets(H5::Group group);
+//     void createDatasets(H5::Group group);
+    void createDatasets(H5OutputFile H5Outputfile, hid_t gid);
     void createBuffers(const std::vector<hsize_t>& standardBufferDims);
     
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
diff --git a/SlowConverter.cc b/SlowConverter.cc
index bbc75d9..ae09122 100644
--- a/SlowConverter.cc
+++ b/SlowConverter.cc
@@ -80,7 +80,7 @@ void SlowConverter::copyAndCalculate() {
             TIMER(timer.start("Write"););
             
             std::vector<hsize_t> start = trimAxes({s, c, 0, 0}, N);
-            writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
+//             writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
             
             DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;);
             TIMER(timer.start(timerLabelStatsMipmaps););
@@ -293,7 +293,7 @@ void SlowConverter::copyAndCalculate() {
                     auto standardCount = trimAxes({1, depth, ySize, xSize}, N);
                     auto standardStart = trimAxes({s, 0, yOffset, xOffset}, N);
                     
-                    readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart);
+//                     readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart);
                     
                     // rotate tile slice
                     DEBUG(std::cout << " Calculating rotation..." << std::flush;);
@@ -345,7 +345,7 @@ void SlowConverter::copyAndCalculate() {
                     auto swizzledCount = trimAxes({1, xSize, ySize, depth}, N);
                     auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N);
                     
-                    writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart);
+//                     writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart);
                     
                     DEBUG(std::cout << " Writing Z statistics..." << std::endl;);
                     // write Z statistics
diff --git a/Stats.cc b/Stats.cc
index 17ed207..f62d893 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -1,6 +1,7 @@
 #include "Stats.h"
 
-Stats::Stats(const std::vector<hsize_t>& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) {}
+Stats::Stats(const std::vector<hsize_t>& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) {
+}
 
 Stats::~Stats() {
     if (!fullBasicBufferDims.empty()) {
@@ -21,22 +22,24 @@ hsize_t Stats::size(std::vector<hsize_t> dims, hsize_t numBins, hsize_t partialH
     return (2 * sizeof(float) + 2 * sizeof(double) + sizeof(int64_t)) * statsSize + sizeof(int64_t) * (statsSize * numBins + statsSize * numBins * partialHistMultiplier);
 }
 
-void Stats::createDatasets(H5::Group group, std::string name) {
-    H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
-    floatType.setOrder(H5T_ORDER_LE);
-    
-    H5::IntType intType(H5::PredType::NATIVE_INT64);
-    intType.setOrder(H5T_ORDER_LE);
-    
-    createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims);
-    createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims);
-    createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims);
-    createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims);
-    createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims);
+// void Stats::createDatasets(H5::Group group, std::string name) {
+void Stats::createDatasets(H5OutputFile H5outputfile, hid_t gid, std::string name) {
+//     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
+//     floatType.setOrder(H5T_ORDER_LE);
+//     
+//     H5::IntType intType(H5::PredType::NATIVE_INT64);
+//     intType.setOrder(H5T_ORDER_LE);
+//     
+//     createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims);
+//     createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims);
+//     createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims);
+//     createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims);
+//     createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims);
+//     
+//     if (numBins) {
+//         createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins}));
+//     }
     
-    if (numBins) {
-        createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins}));
-    }
 }
 
 void Stats::createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier) {
@@ -84,14 +87,16 @@ void Stats::write(const std::vector<hsize_t>& basicBufferDims, const std::vector
     }
 }
     
-void Stats::writeBasic(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    writeHdf5Data(minDset, minVals, basicBufferDims, count, start);
-    writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start);
-    writeHdf5Data(sumDset, sums, basicBufferDims, count, start);
-    writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start);
-    writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start);
+// void Stats::writeBasic(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+void Stats::writeBasic(H5OutputFile H5outputfile, std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     writeHdf5Data(minDset, minVals, basicBufferDims, count, start);
+//     writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start);
+//     writeHdf5Data(sumDset, sums, basicBufferDims, count, start);
+//     writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start);
+//     writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start);
 }
 
-void Stats::writeHistogram(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start);
+// void Stats::writeHistogram(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+void Stats::writeHistogram(H5OutputFile H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start);
 }
diff --git a/Stats.h b/Stats.h
index 50abda2..50dbde8 100644
--- a/Stats.h
+++ b/Stats.h
@@ -3,6 +3,7 @@
 
 #include "common.h"
 #include "Util.h"
+#include "HDF5Wrapper.h"
 
 struct StatsCounter {
     StatsCounter() : minVal(std::numeric_limits<float>::max()), maxVal(-std::numeric_limits<float>::max()), sum(0), sumSq(0), nanCount(0) {
@@ -44,14 +45,15 @@ struct StatsCounter {
 };
 
 struct Stats {
-    Stats() {}
+    Stats();
     Stats(const std::vector<hsize_t>& basicDatasetDims, hsize_t numBins = 0);
     ~Stats();
     
     static hsize_t size(std::vector<hsize_t> dims, hsize_t numBins = 0, hsize_t partialHistMultiplier = 0);
     
     // Setup
-    void createDatasets(H5::Group group, std::string name);
+    //void createDatasets(H5::Group group, std::string name);
+    void createDatasets(H5Outputfile H5outputfile, hid_t gid, std::string name);
     void createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier = 0);
     
     // Basic stats
@@ -115,13 +117,22 @@ struct Stats {
     hsize_t numBins;
     
     // Datasets
-    H5::DataSet minDset;
-    H5::DataSet maxDset;
-    H5::DataSet sumDset;
-    H5::DataSet ssqDset;
-    H5::DataSet nanDset;
+//     H5::DataSet minDset;
+//     H5::DataSet maxDset;
+//     H5::DataSet sumDset;
+//     H5::DataSet ssqDset;
+//     H5::DataSet nanDset;
+//     
+//     H5::DataSet histDset;
     
-    H5::DataSet histDset;
+    hid_t minDset;
+    hid_t maxDset;
+    hid_t sumDset;
+    hid_t ssqDset;
+    hid_t nanDset;
+    
+    hid_t histDset;
+
     
     // Buffer dimensions
     
@@ -129,6 +140,7 @@ struct Stats {
     hsize_t partialHistMultiplier;
 
     // Buffers
+    //why not change this to vectors ???
     float* minVals;
     float* maxVals;
     double* sums;
diff --git a/Util.cc b/Util.cc
index d5f42b5..2197ee5 100644
--- a/Util.cc
+++ b/Util.cc
@@ -145,95 +145,95 @@ void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize
 }
 
 // Only available in C++ API from 1.10.1
-bool hdf5Exists(H5::H5Location& location, const std::string& name) {
-    return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0;
-}
-
-void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims) {
-    auto splitPath = split(path, '/');
-    
-    auto name = splitPath.back();
-    splitPath.pop_back();
-    
-    for (auto& groupname : splitPath) {
-        if (!hdf5Exists(group, groupname)) {
-            group = group.createGroup(groupname);
-        } else {
-            group = group.openGroup(groupname);
-        }
-    }
-    
-    H5::DSetCreatPropList propList;
-    if (!chunkDims.empty()) {
-        propList.setChunk(chunkDims.size(), chunkDims.data());
-    }
-    
-    auto dataSpace = H5::DataSpace(dims.size(), dims.data());
-    dataset = group.createDataSet(name, dataType, dataSpace, propList);
-}
-
-void writeHdf5Attribute(H5::Group group, std::string name, std::string value) {
-    H5::StrType strType(H5::PredType::C_S1, 256);
-    H5::DataSpace dataSpace(H5S_SCALAR);
-    auto attribute = group.createAttribute(name, strType, dataSpace);
-    attribute.write(strType, value);
-}
-
-void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) {
-    H5::IntType intType(H5::PredType::NATIVE_INT64);
-    intType.setOrder(H5T_ORDER_LE);
-    H5::DataSpace dataSpace(H5S_SCALAR);
-    auto attribute = group.createAttribute(name, intType, dataSpace);
-    attribute.write(intType, &value);
-}
-
-void writeHdf5Attribute(H5::Group group, std::string name, double value) {
-    H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE);
-    doubleType.setOrder(H5T_ORDER_LE);
-    H5::DataSpace dataSpace(H5S_SCALAR);
-    auto attribute = group.createAttribute(name, doubleType, dataSpace);
-    attribute.write(doubleType, &value);
-}
-
-void writeHdf5Attribute(H5::Group group, std::string name, bool value) {
-    H5::IntType boolType(H5::PredType::NATIVE_HBOOL);
-    H5::DataSpace dataSpace(H5S_SCALAR);
-    auto attribute = group.createAttribute(name, boolType, dataSpace);
-    attribute.write(boolType, &value);
-}
-
-void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    H5::DataSpace memSpace(dims.size(), dims.data());
-    auto fileSpace = dataset.getSpace();
-    if (!count.empty() && !start.empty()) {
-        fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
-    }
-    dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace);
-}
-
-void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    H5::DataSpace memSpace(dims.size(), dims.data());
-    auto fileSpace = dataset.getSpace();
-    if (!count.empty() && !start.empty()) {
-        fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
-    }
-    dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace);
-}
-
-void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    H5::DataSpace memSpace(dims.size(), dims.data());
-    auto fileSpace = dataset.getSpace();
-    if (!count.empty() && !start.empty()) {
-        fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
-    }
-    dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace);
-}
-
-void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    H5::DataSpace memSpace(dims.size(), dims.data());
-    auto fileSpace = dataset.getSpace();
-    if (!count.empty() && !start.empty()) {
-        fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
-    }
-    dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace);
-}
+// bool hdf5Exists(H5::H5Location& location, const std::string& name) {
+//     return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0;
+// }
+// 
+// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims) {
+//     auto splitPath = split(path, '/');
+//     
+//     auto name = splitPath.back();
+//     splitPath.pop_back();
+//     
+//     for (auto& groupname : splitPath) {
+//         if (!hdf5Exists(group, groupname)) {
+//             group = group.createGroup(groupname);
+//         } else {
+//             group = group.openGroup(groupname);
+//         }
+//     }
+//     
+//     H5::DSetCreatPropList propList;
+//     if (!chunkDims.empty()) {
+//         propList.setChunk(chunkDims.size(), chunkDims.data());
+//     }
+//     
+//     auto dataSpace = H5::DataSpace(dims.size(), dims.data());
+//     dataset = group.createDataSet(name, dataType, dataSpace, propList);
+// }
+// 
+// void writeHdf5Attribute(H5::Group group, std::string name, std::string value) {
+//     H5::StrType strType(H5::PredType::C_S1, 256);
+//     H5::DataSpace dataSpace(H5S_SCALAR);
+//     auto attribute = group.createAttribute(name, strType, dataSpace);
+//     attribute.write(strType, value);
+// }
+// 
+// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) {
+//     H5::IntType intType(H5::PredType::NATIVE_INT64);
+//     intType.setOrder(H5T_ORDER_LE);
+//     H5::DataSpace dataSpace(H5S_SCALAR);
+//     auto attribute = group.createAttribute(name, intType, dataSpace);
+//     attribute.write(intType, &value);
+// }
+// 
+// void writeHdf5Attribute(H5::Group group, std::string name, double value) {
+//     H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE);
+//     doubleType.setOrder(H5T_ORDER_LE);
+//     H5::DataSpace dataSpace(H5S_SCALAR);
+//     auto attribute = group.createAttribute(name, doubleType, dataSpace);
+//     attribute.write(doubleType, &value);
+// }
+// 
+// void writeHdf5Attribute(H5::Group group, std::string name, bool value) {
+//     H5::IntType boolType(H5::PredType::NATIVE_HBOOL);
+//     H5::DataSpace dataSpace(H5S_SCALAR);
+//     auto attribute = group.createAttribute(name, boolType, dataSpace);
+//     attribute.write(boolType, &value);
+// }
+// 
+// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     H5::DataSpace memSpace(dims.size(), dims.data());
+//     auto fileSpace = dataset.getSpace();
+//     if (!count.empty() && !start.empty()) {
+//         fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
+//     }
+//     dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace);
+// }
+// 
+// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     H5::DataSpace memSpace(dims.size(), dims.data());
+//     auto fileSpace = dataset.getSpace();
+//     if (!count.empty() && !start.empty()) {
+//         fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
+//     }
+//     dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace);
+// }
+// 
+// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     H5::DataSpace memSpace(dims.size(), dims.data());
+//     auto fileSpace = dataset.getSpace();
+//     if (!count.empty() && !start.empty()) {
+//         fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
+//     }
+//     dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace);
+// }
+// 
+// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+//     H5::DataSpace memSpace(dims.size(), dims.data());
+//     auto fileSpace = dataset.getSpace();
+//     if (!count.empty() && !start.empty()) {
+//         fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data());
+//     }
+//     dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace);
+// }
diff --git a/Util.h b/Util.h
index dbd119a..51f7773 100644
--- a/Util.h
+++ b/Util.h
@@ -29,17 +29,17 @@ void readFitsStringAttribute(fitsfile* filePtr, const std::string& name, std::st
 void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize_t size, float* destination);
 
 // Only available in C++ API from 1.10.1
-bool hdf5Exists(H5::H5Location& location, const std::string& name);
-void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims = EMPTY_DIMS);
-void writeHdf5Attribute(H5::Group group, std::string name, std::string value);
-void writeHdf5Attribute(H5::Group group, std::string name, int64_t value);
-void writeHdf5Attribute(H5::Group group, std::string name, double value);
-void writeHdf5Attribute(H5::Group group, std::string name, bool value);
-
-void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
-void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
-void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
-
-void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+// bool hdf5Exists(H5::H5Location& location, const std::string& name);
+// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims = EMPTY_DIMS);
+// void writeHdf5Attribute(H5::Group group, std::string name, std::string value);
+// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value);
+// void writeHdf5Attribute(H5::Group group, std::string name, double value);
+// void writeHdf5Attribute(H5::Group group, std::string name, bool value);
+
+// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+
+// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector<hsize_t>& dims, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
 
 #endif

From 03a053b42253a34087a8ed9b00d15cdadb478e6c Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Thu, 30 Jul 2020 14:27:37 +0800
Subject: [PATCH 02/13] Update to HDF5 Wrapper API

---
 .gitignore     |   1 +
 CMakeLists.txt |   2 +-
 Converter.cc   | 104 +++++++++++---------
 Converter.h    |  31 +++---
 HDF5Wrapper.cc | 259 ++++++++++++++++++++++++++++++++++++++++++++++++-
 HDF5Wrapper.h  | 128 +++++++++++++++++++-----
 MipMap.h       |  32 +++---
 Stats.cc       |  21 ++--
 Stats.h        |  36 +++----
 UI.h           |  86 ++++++++++++++++
 main.cc        |  91 ++---------------
 11 files changed, 578 insertions(+), 213 deletions(-)
 create mode 100644 UI.h

diff --git a/.gitignore b/.gitignore
index 87140a3..e62ad2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 cmake-build-*/
 .idea/
 build/
+*.h.gch
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 074eee4..b8764ca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,7 +27,7 @@ set(CMAKE_CXX_STANDARD 11)
 #     endif()
 # endmacro()
 
-FIND_PACKAGE(HDF5 REQUIRED COMPONENTS CXX)
+FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C)
 INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR})
 find_package(OpenMP)
 
diff --git a/Converter.cc b/Converter.cc
index 587c0ec..4438c11 100644
--- a/Converter.cc
+++ b/Converter.cc
@@ -1,49 +1,51 @@
 #include "Converter.h"
 
-Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) : timer(), progress(progress) {
+Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) :
+  timer(), progress(progress)
+{
     TIMER(timer.start("Setup"););
-    
+
     openFitsFile(&inputFilePtr, inputFileName);
-    
+
     long dims[4];
-    
+
     getFitsDims(inputFilePtr, N, dims);
-        
+
     stokes = N == 4 ? dims[3] : 1;
     depth = N >= 3 ? dims[2] : 1;
     height = dims[1];
     width = dims[0];
-    
+
     swizzledName = N == 3 ? "ZYX" : "ZYXW";
-    
+
     standardDims = trimAxes({stokes, depth, height, width}, N);
     tileDims = trimAxes({1, 1, TILE_SIZE, TILE_SIZE}, N);
-    
+
     numBins = int(std::max(std::sqrt(width * height), 2.0));
-    
+
     // STATS OBJECTS
 
     auto statsXYDims = trimAxes({stokes, depth}, N - 2);
     statsXY = Stats(statsXYDims, numBins);
-    
+
     if (depth > 1) {
         swizzledDims = trimAxes({stokes, width, height, depth}, N);
         statsZ = Stats(trimAxes({stokes, height, width}, N - 1));
         auto statsXYZDims = trimAxes({stokes}, N - 3);
         statsXYZ = Stats(statsXYZDims, numBins);
     }
-    
+
     // MIPMAPS
     mipMaps = MipMaps(standardDims, tileDims);
-    
+
     // Prepare output file
     this->outputFileName = outputFileName;
-    tempOutputFileName = outputFileName + ".tmp";        
+    tempOutputFileName = outputFileName + ".tmp";
 }
 
 Converter::~Converter() {
     // TODO this is probably unnecessary; the file object destructor should close the file properly.
-    HDF5outputFile.close();
+    H5outputfile.close();
 }
 
 std::unique_ptr<Converter> Converter::getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress) {
@@ -54,91 +56,100 @@ std::unique_ptr<Converter> Converter::getConverter(std::string inputFileName, st
     }
 }
 
-void Converter::copyAndCalculate() {
-    // implemented in subclasses
-}
-
-void Converter::reportMemoryUsage() {
-    // implemented in subclasses
-}
+// void Converter::copyAndCalculate() {
+//     // implemented in subclasses
+// }
+//
+// void Converter::reportMemoryUsage() {
+//     // implemented in subclasses
+// }
+//
 
 void Converter::convert() {
     // CREATE OUTPUT FILE
-    
+
     // TODO dataset variables should be local and passed into the copy function?
-    
+
 //     outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC);
 //     outputGroup = outputFile.createGroup("0");
 
     H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC);
-    hid_t gid = H5outputfile.create_group("0");
-    
+    std::string parentpath = "0";
+    hid_t gid = H5outputfile.create_group(parentpath);
+
     std::vector<hsize_t> chunkDims;
     if (useChunks(standardDims)) {
         chunkDims = tileDims;
     }
-    
+
 //     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
 //     floatType.setOrder(H5T_ORDER_LE);
 //     createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims);
 //????
 
-    
-    
+
 //     statsXY.createDatasets(outputGroup, "XY");
 
     if (depth > 1) {
 //         statsXYZ.createDatasets(outputGroup, "XYZ");
 //         statsZ.createDatasets(outputGroup, "Z");
-        
+
 //         auto swizzledGroup = outputGroup.createGroup("SwizzledData");
         // We use this name in papers because it sounds more serious. :)
 //         outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData");
 //         createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims);
     }
-    
-    mipMaps.createDatasets(outputGroup);
-    
+
+    /// mipMaps.createDatasets(outputGroup);
+
     // COPY HEADERS
-    
+
     TIMER(timer.start("Headers"););
-    
+
 //     writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
 //     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
 //     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
+    H5outputfile.write_attribute(parentpath, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
+    H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
+    H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
+
 
     int numAttributes;
     readFitsHeader(inputFilePtr, numAttributes);
-    
+
     // IMPORTANT: This is 1-indexed!
-    for (int i = 1; i <= numAttributes; i++) {        
+    for (int i = 1; i <= numAttributes; i++) {
         std::string attributeName;
         std::string attributeValue;
         readFitsAttribute(inputFilePtr, i, attributeName, attributeValue);
-        
+
         if (attributeName.empty() || attributeName.find("COMMENT") == 0 || attributeName.find("HISTORY") == 0) {
             // TODO we should actually do something about these
         } else {
-            if (outputGroup.attrExists(attributeName)) {
+            // if (outputGroup.attrExists(attributeName)) {
+            if (H5outputfile.exists_attribute(parentpath, attributeName)) {
                 std::cout << "Warning: Skipping duplicate attribute '" << attributeName << "'" << std::endl;
             } else {
                 bool parsingFailure(false);
-                
+
                 if (attributeValue.length() >= 2 && attributeValue.find('\'') == 0 &&
                     attributeValue.find_last_of('\'') == attributeValue.length() - 1) {
                     // STRING
                     std::string attributeValueStr;
                     readFitsStringAttribute(inputFilePtr, attributeName, attributeValueStr);
 //                     writeHdf5Attribute(outputGroup, attributeName, attributeValueStr);
+                    H5outputfile.write_attribute(parentpath, attributeName, attributeValueStr);
                 } else if (attributeValue == "T" || attributeValue == "F") {
                     // BOOLEAN
                     bool attributeValueBool = (attributeValue == "T");
 //                     writeHdf5Attribute(outputGroup, attributeName, attributeValueBool);
+                    H5outputfile.write_attribute(parentpath, attributeName, attributeValueBool);
                 } else if (attributeValue.find('.') != std::string::npos) {
                     // TRY TO PARSE AS DOUBLE
                     try {
                         double attributeValueDouble = std::stod(attributeValue);
 //                         writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
+                        H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble);
                     } catch (const std::invalid_argument& ia) {
                         std::cout << "Warning: could not parse attribute '" << attributeName << "' as a float." << std::endl;
                         parsingFailure = true;
@@ -147,7 +158,8 @@ void Converter::convert() {
                         long double attributeValueLongDouble = std::stold(attributeValue);
                         double attributeValueDouble = (double) attributeValueLongDouble;
 //                         writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble);
-                        
+                        H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble);
+
                         std::ostringstream ostream;
                         ostream.precision(13);
                         ostream << attributeValueDouble;
@@ -155,7 +167,7 @@ void Converter::convert() {
                         std::string round_trip(ostream.str());
                         transform(original.begin(), original.end(), original.begin(), ::toupper);
                         transform(round_trip.begin(), round_trip.end(), round_trip.begin(), ::toupper);
-                                                
+
                         if (original != round_trip) {
                             std::cout << "Warning: the value of attribute  '" << attributeName << "' is not representable as a normalised double precision floating point number. Some precision has been lost.\nOriginal string representation:\n'" << original << "'\nFinal string representation:\n'" << round_trip << "'" << std::endl;
                         }
@@ -165,26 +177,28 @@ void Converter::convert() {
                     try {
                         int64_t attributeValueInt = std::stoi(attributeValue);
 //                         writeHdf5Attribute(outputGroup, attributeName, attributeValueInt);
+                        H5outputfile.write_attribute(parentpath, attributeName, attributeValueInt);
                     } catch (const std::invalid_argument& ia) {
                         std::cout << "Warning: could not parse attribute '" << attributeName << "' as an integer." << std::endl;
                         parsingFailure = true;
                     }
                 }
-                
+
                 if (parsingFailure) {
                     // FALL BACK TO STRING
 //                     writeHdf5Attribute(outputGroup, attributeName, attributeValue);
+                    H5outputfile.write_attribute(parentpath, attributeName, attributeValue);
                 }
             }
         }
     }
-    
+
     // MAIN CONVERSION AND CALCULATION FUNCTION
 
     copyAndCalculate();
-            
+
     TIMER(timer.print(product(standardDims)););
-    
+
     // Rename from temp file
     rename(tempOutputFileName.c_str(), outputFileName.c_str());
 }
diff --git a/Converter.h b/Converter.h
index e0f36e9..a356f25 100644
--- a/Converter.h
+++ b/Converter.h
@@ -13,50 +13,51 @@ class Converter {
     Converter() {}
     Converter(std::string inputFileName, std::string outputFileName, bool progress);
     ~Converter();
-    
+
     static std::unique_ptr<Converter> getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress);
     void convert();
     virtual void reportMemoryUsage();
-    
+
+    H5OutputFile H5outputfile;
+
 protected:
     virtual void copyAndCalculate();
-    
+
     Timer timer;
     bool progress;
-    
+
     std::string tempOutputFileName;
     std::string outputFileName;
     fitsfile* inputFilePtr;
-    
+
     // Main HDF5 objects
 //     H5::H5File outputFile;
 //     H5::Group outputGroup;
 //     H5::DataSet standardDataSet;
 //     H5::DataSet swizzledDataSet;
 
-    H5OutputFile H5outputfile;
-    
+
     float* standardCube;
     float* rotatedCube;
-    
+
     // Stats
     Stats statsXY;
     Stats statsZ;
     Stats statsXYZ;
-    
+
     // MipMaps
     MipMaps mipMaps;
-    
+
     int N;
     hsize_t stokes, depth, height, width;
     hsize_t numBins;
-    
+
     // Dataset dimensions
-    
+
     std::vector<hsize_t> standardDims;
     std::vector<hsize_t> swizzledDims;
     std::vector<hsize_t> tileDims;
-    
+
     std::string swizzledName;
 };
 
@@ -65,7 +66,7 @@ class FastConverter : public Converter {
 public:
     FastConverter(std::string inputFileName, std::string outputFileName, bool progress);
     void reportMemoryUsage() override;
-    
+
 protected:
     void copyAndCalculate() override;
 };
@@ -75,7 +76,7 @@ class SlowConverter : public Converter {
 public:
     SlowConverter(std::string inputFileName, std::string outputFileName, bool progress);
     void reportMemoryUsage() override;
-    
+
 protected:
     void copyAndCalculate() override;
 };
diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index 2e5bc7a..25d91ca 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -1,4 +1,4 @@
-#include "HDF5Wrapper.h" 
+#include "HDF5Wrapper.h"
 
 void H5OutputFile::create(std::string filename, hid_t flag,
     int taskID, bool iparallelopen)
@@ -91,6 +91,263 @@ void H5OutputFile::close()
 #endif
 }
 
+std::vector<std::string> H5OutputFile::_tokenize(const std::string &s)
+{
+    std::string delims("/");
+    std::string::size_type lastPos = s.find_first_not_of(delims, 0);
+    std::string::size_type pos     = s.find_first_of(delims, lastPos);
+
+    std::vector<std::string> tokens;
+    while (std::string::npos != pos || std::string::npos != lastPos) {
+        tokens.push_back(s.substr(lastPos, pos - lastPos));
+        lastPos = s.find_first_not_of(delims, pos);
+        pos = s.find_first_of(delims, lastPos);
+    }
+    return tokens;
+}
+
+/// get an attribute going to list of hids
+void H5OutputFile::_get_attribute(std::vector<hid_t> &ids, const std::string attr_name)
+{
+    //can use H5Aexists as it is the C interface but how to access it?
+    auto exists = H5Aexists(ids.back(), attr_name.c_str());
+    if (exists == 0) {
+        throw std::invalid_argument(std::string("attribute not found ") + attr_name);
+    }
+    else if (exists < 0) {
+        throw std::runtime_error("Error on H5Aexists");
+    }
+    auto attr = H5Aopen(ids.back(), attr_name.c_str(), H5P_DEFAULT);
+    ids.push_back(attr);
+}
+/// get attributes parsing list of ids and vector of strings
+void H5OutputFile::_get_attribute(std::vector<hid_t> &ids, const std::vector<std::string> &parts)
+{
+    // This is the attribute name, so open it and store the id
+    if (parts.size() == 1) {
+        _get_attribute(ids, parts[0]);
+    }
+    else
+    {
+        //otherwise enter group and recursively call funciotn
+        H5O_info_t object_info;
+        hid_t newid;
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        if (object_info.type == H5O_TYPE_GROUP) {
+            newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
+        }
+        else if (object_info.type == H5O_TYPE_DATASET) {
+            newid = H5Dopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
+        }
+        ids.push_back(newid);
+        //get the substring
+        std::vector<std::string> subparts(parts.begin() + 1, parts.end());
+        //call function again
+        _get_attribute(ids, subparts);
+    }
+}
+
+/// get attribute in file, storing relevant ids in vector
+void H5OutputFile::get_attribute(std::vector<hid_t> &ids, const std::string &name)
+{
+    std::vector<std::string> parts = _tokenize(name);
+    ids.push_back(file_id);
+    _get_attribute(ids, parts);
+}
+
+/// get an attribute going to list of hids
+void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::string dset_name)
+{
+    auto dset_id = H5Dopen(ids.back(), dset_name.c_str(), H5P_DEFAULT);
+    if (dset_id < 0) {
+        throw std::invalid_argument(std::string("dataset not found ") + dset_name);
+    }
+    ids.push_back(dset_id);
+}
+/// get attributes parsing list of ids and vector of strings
+void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::string> &parts)
+{
+    // This is the attribute name, so open it and store the id
+    if (parts.size() == 1) {
+        _get_dataset(ids, parts[0]);
+    }
+    else
+    {
+        //otherwise enter group and recursively call funciotn
+        H5O_info_t object_info;
+        hid_t newid;
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        if (object_info.type == H5O_TYPE_GROUP) {
+            newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
+        }
+        else if (object_info.type == H5O_TYPE_DATASET) {
+            throw std::runtime_error("Incorrect path to data set, encountered another data set in path");
+        }
+        ids.push_back(newid);
+        //get the substring
+        std::vector<std::string> subparts(parts.begin() + 1, parts.end());
+        //call function again
+        _get_dataset(ids, subparts);
+    }
+}
+
+/// get attribute in file, storing relevant ids in vector
+void H5OutputFile::get_dataset(std::vector<hid_t> &ids, const std::string &name)
+{
+    std::vector<std::string> parts = _tokenize(name);
+    ids.push_back(file_id);
+    _get_dataset(ids, parts);
+}
+
+
+/// close open hids stored in vector
+void H5OutputFile::close_hdf_ids(std::vector<hid_t> &ids)
+{
+    H5O_info_t object_info;
+    for (auto &id:ids)
+    {
+        H5Oget_info(id, &object_info);
+        if (object_info.type == H5O_TYPE_GROUP) {
+            H5Gclose(id);
+        }
+        else if (object_info.type == H5O_TYPE_GROUP) {
+            H5Dclose(id);
+        }
+    }
+}
+
+/// read a scalar attribute
+template<typename T> void H5OutputFile::_do_read(const hid_t &attr, const hid_t &type, T &val)
+{
+    if (hdf5_type(T{}) == H5T_C_S1)
+    {
+      _do_read_string(attr, type, val);
+    }
+    else {
+      H5Aread(attr, type, &val);
+    }
+}
+
+/// read a string attribute
+void H5OutputFile::_do_read_string(const hid_t &attr, const hid_t &type, std::string &val)
+{
+    std::vector<char> buf;
+    hid_t type_in_file = H5Aget_type(attr);
+    hid_t type_in_memory = H5Tcopy(type); // copy memory type because we'll need to modify it
+    size_t length = H5Tget_size(type_in_file); // get length of the string in the file
+    buf.resize(length+1); // resize buffer in memory, allowing for null terminator
+    H5Tset_size(type_in_memory, length+1); // tell HDF5 the length of the buffer in memory
+    H5Tset_strpad(type_in_memory, H5T_STR_NULLTERM); // specify that we want a null terminated string
+    H5Aread(attr, type_in_memory, buf.data());
+    H5Tclose(type_in_memory);
+    H5Tclose(type_in_file);
+    val=std::string(buf.data());
+}
+
+/// read a vector attribute
+template<typename T> void H5OutputFile::_do_read_v(const hid_t &attr, const hid_t &type, std::vector<T> &val)
+{
+    hid_t space = H5Aget_space (attr);
+    int npoints = H5Sget_simple_extent_npoints(space);
+    val.resize(npoints);
+    H5Aread(attr, type, val.data());
+    H5Sclose(space);
+}
+
+template<typename T> const T H5OutputFile::read_attribute(const std::string &name) {
+    std::string attr_name;
+    T val;
+    hid_t type;
+    H5O_info_t object_info;
+    std::vector <hid_t> ids;
+    //traverse the file to get to the attribute, storing the ids of the
+    //groups, data spaces, etc that have been opened.
+    get_attribute(ids, name);
+    //now reverse ids and load attribute
+    reverse(ids.begin(),ids.end());
+    //determine hdf5 type of the array in memory
+    type = hdf5_type(T{});
+    // read the data
+    _do_read<T>(ids[0], type, val);
+    H5Aclose(ids[0]);
+    ids.erase(ids.begin());
+    //now have hdf5 ids traversed to get to desired attribute so move along to close all
+    //based on their object type
+    close_hdf_ids(ids);
+    return val;
+}
+
+//read vector attribute
+template<typename T> const std::vector<T> H5OutputFile::read_attribute_v(const std::string &name) {
+    std::string attr_name;
+    std::vector<T> val;
+    hid_t type;
+    H5O_info_t object_info;
+    std::vector <hid_t> ids;
+    //traverse the file to get to the attribute, storing the ids of the
+    //groups, data spaces, etc that have been opened.
+    get_attribute(ids, name);
+    //now reverse ids and load attribute
+    reverse(ids.begin(),ids.end());
+    //determine hdf5 type of the array in memory
+    type = hdf5_type(T{});
+    // read the data
+    _do_read_v<T>(ids[0], type, val);
+    H5Aclose(ids[0]);
+    ids.erase(ids.begin());
+    //now have hdf5 ids traversed to get to desired attribute so move along to close all
+    //based on their object type
+    close_hdf_ids(ids);
+    return val;
+}
+
+
+bool H5OutputFile::exists_attribute(const std::string &parent, const std::string &name) {
+    std::string attr_name = parent+std::string("/")+name;
+    std::vector <hid_t> ids;
+    bool exists = true;
+    //groups, data spaces, etc that have been opened.
+    try {
+      get_attribute(ids, attr_name);
+    }
+    catch (const std::invalid_argument& ia)
+    {
+      exists = false;
+    }
+    //now reverse ids and load attribute
+    reverse(ids.begin(),ids.end());
+    //check if present
+    if (exists) {
+      H5Aclose(ids[0]);
+      ids.erase(ids.begin());
+    }
+    close_hdf_ids(ids);
+    return exists;
+}
+
+bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &name) {
+    std::string dset_name = parent+std::string("/")+name;
+    std::vector <hid_t> ids;
+    bool exists = true;
+    //groups, data spaces, etc that have been opened.
+    try {
+      get_dataset(ids, dset_name);
+    }
+    catch (const std::invalid_argument& ia)
+    {
+      exists = false;
+    }
+    //now reverse ids and load attribute
+    reverse(ids.begin(),ids.end());
+    //check if present
+    if (exists) {
+      H5Dclose(ids[0]);
+      ids.erase(ids.begin());
+    }
+    close_hdf_ids(ids);
+    return exists;
+}
+
 /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
 /// the input data if not explicitly specified with the filetype_id parameter.
 template <typename T> void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data,
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index 1cd5d3f..2180a98 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -5,7 +5,7 @@
 
 #ifdef USEMPI
 #include <mpi.h>
-#endif 
+#endif
 
 // Overloaded function to return HDF5 type given a C type
 static inline hid_t hdf5_type(float dummy)              {return H5T_NATIVE_FLOAT;}
@@ -19,15 +19,25 @@ static inline hid_t hdf5_type(unsigned int dummy)       {return H5T_NATIVE_UINT;
 static inline hid_t hdf5_type(unsigned long dummy)      {return H5T_NATIVE_ULONG;}
 static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;}
 static inline hid_t hdf5_type(std::string dummy)        {return H5T_C_S1;}
-static inline hid_t hdf5_type_from_string(std::string dummy) 
+static inline hid_t hdf5_type_from_string(std::string dummy)
 {
     if (dummy == std::string("float32")) return H5T_NATIVE_FLOAT;
     else if (dummy == std::string("float64")) return H5T_NATIVE_DOUBLE;
-    else if (dummy == std::string("float64")) return H5T_NATIVE_INT;
+    else if (dummy == std::string("int16")) return H5T_NATIVE_SHORT;
+    else if (dummy == std::string("int32")) return H5T_NATIVE_INT;
     else if (dummy == std::string("int64")) return H5T_NATIVE_LLONG;
+    else if (dummy == std::string("uint16")) return H5T_NATIVE_USHORT;
+    else if (dummy == std::string("uint32")) return H5T_NATIVE_UINT;
+    else if (dummy == std::string("uint64")) return H5T_NATIVE_ULLONG;
     else return H5T_C_S1;
 }
 
+#if H5_VERSION_GE(1,10,1)
+#endif
+
+#if H5_VERSION_GE(1,12,0)
+#endif
+
 ///\name HDF class to manage writing information
 class H5OutputFile
 {
@@ -41,7 +51,10 @@ class H5OutputFile
 
 protected:
 
-    // Called if a HDF5 call fails (might need to MPI_Abort)
+    /// size of chunks when compressing
+    unsigned int HDFOUTPUTCHUNKSIZE = 8192;
+
+    /// Called if a HDF5 call fails (might need to MPI_Abort)
     void io_error(std::string message) {
         std::cerr << message << std::endl;
 #ifdef USEMPI
@@ -49,7 +62,25 @@ class H5OutputFile
 #endif
         abort();
     }
-    unsigned int HDFOUTPUTCHUNKSIZE = 8192;
+
+    /// tokenize a path given an input string
+    std::vector<std::string> _tokenize(const std::string &s);
+    /// get attribute id
+    void _get_attribute(std::vector<hid_t> &ids, const std::string attr_name);
+    /// get attribute id from tokenized string
+    void _get_attribute(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
+    /// wrapper for reading scalar
+    template<typename T> void _do_read(const hid_t &attr, const hid_t &type, T &val);
+    /// wrapper for reading string
+    void _do_read_string(const hid_t &attr, const hid_t &type, std::string &val);
+    /// wrapper for reading vector
+    template<typename T> void _do_read_v(const hid_t &attr, const hid_t &type, std::vector<T> &val);
+
+    /// get dataset id
+    void _get_dataset(std::vector<hid_t> &ids, const std::string dset_name);
+    /// get attribute id from tokenized string
+    void _get_dataset(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
+
 public:
 
     // Constructor
@@ -60,44 +91,85 @@ class H5OutputFile
 #endif
     }
 
-    // Create a new file
+    // Destructor closes the file if it's open
+    ~H5OutputFile()
+    {
+        if(file_id >= 0) close();
+    }
+
+    /// Create a new file
     void create(std::string filename, hid_t flag = H5F_ACC_TRUNC,
         int taskID = -1, bool iparallelopen = true);
-
+    /// Append to a file
     void append(std::string filename, hid_t flag = H5F_ACC_RDWR,
         int taskID = -1, bool iparallelopen = true);
 
-    // Close the file
+    /// Close the file
     void close();
 
+    /// create a group
     hid_t create_group(std::string groupname) {
         hid_t group_id = H5Gcreate(file_id, groupname.c_str(),
             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         return group_id;
     }
+    /// close group
     herr_t close_group(hid_t gid) {
         herr_t status = H5Gclose(gid);
         return status;
     }
 
-    //????
-    hid_t create_dataset(std::string dsetname, hid_t file) {
-        hid_t dset_id H5Dcreate(file_id, datasetname, -1, -1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-        hid_t group_id = H5Dcreate(file_id, groupname.c_str(),
-            H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-        return group_id;
+    /// close hids stored in vector
+    void close_hdf_ids(std::vector<hid_t> &ids);
+
+    /// create a dataspace
+    hid_t create_dataspace(const std::vector<hsize_t> &dims)
+    {
+        hid_t dspace_id;
+        int rank = dims.size();
+        dspace_id = H5Screate_simple(rank, dims.data(), NULL);
+        return dspace_id;
     }
-    herr_t close_dataset(hid_t did) {
-        herr_t status = H5Gclose(gid);
+    hid_t create_dataspace(int rank, hsize_t *dims)
+    {
+        hid_t dspace_id;
+        dspace_id = H5Screate_simple(rank, dims, NULL);
+        return dspace_id;
+    }
+    hid_t create_dataspace(hsize_t len)
+    {
+        int rank = 1;
+        hsize_t dims[1] = {len};
+        hid_t dspace_id = H5Screate_simple(rank, dims, NULL);
+        return dspace_id;
+    }
+    /// close data set
+    herr_t close_dataspace(hid_t dspace_id) {
+        herr_t status = H5Dclose(dspace_id);
         return status;
     }
 
-    // Destructor closes the file if it's open
-    ~H5OutputFile()
+    /// create a dataset
+    hid_t create_dataset(std::string dsetname, hid_t type_id, hid_t dspace_id)
     {
-        if(file_id >= 0) close();
+        hid_t dset_id;
+        dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id,
+          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        return dset_id;
+    }
+    template <typename T> hid_t create_dataset(std::string dsetname, T *data, hid_t dspace_id)
+    {
+        hid_t dset_id;
+        hid_t type_id = hdf5_type(T{});
+        dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id,
+          H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+        return dset_id;
+    }
+    /// close data set
+    herr_t close_dataset(hid_t dset_id) {
+        herr_t status = H5Dclose(dset_id);
+        return status;
     }
-
     /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
     /// the input data if not explicitly specified with the filetype_id parameter.
     template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
@@ -117,12 +189,24 @@ class H5OutputFile
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
 
+    /// get a dataset with full path given by name
+    void get_dataset(std::vector<hid_t> &ids, const std::string &name);
+    /// check if dataset exits
+    bool exists_dataset(const std::string &parent, const std::string &name);
+
+    /// get an attribute with full path given by name
+    void get_attribute(std::vector<hid_t> &ids, const std::string &name);
+    /// read scalar attribute
+    template<typename T> const T read_attribute(const std::string &name);
+    /// read vector attribute
+    template<typename T> const std::vector<T> read_attribute_v(const std::string &name);
+    /// sees if attribute exits
+    bool exists_attribute(const std::string &parent, const std::string &name);
     /// write an attribute
     template <typename T> void write_attribute(std::string parent, std::string name, std::vector<T> data);
-
     template <typename T> void write_attribute(std::string parent, std::string name, T data);
-
     void write_attribute(std::string parent, std::string name, std::string data);
+
 };
 
 #endif
diff --git a/MipMap.h b/MipMap.h
index 5716c94..d9195cc 100644
--- a/MipMap.h
+++ b/MipMap.h
@@ -9,11 +9,11 @@ struct MipMap {
     MipMap() {};
     MipMap(const std::vector<hsize_t>& datasetDims, int mip);
     ~MipMap();
-    
+
 //     void createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims);
-    void createDataset(H5Outputfile h5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims);
+    void createDataset(H5OutputFile &h5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims);
     void createBuffers(std::vector<hsize_t>& bufferDims);
-    
+
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
         hsize_t mipIndex = totalChannelOffset * width * height + (y / mip) * width + (x / mip);
         vals[mipIndex] += val;
@@ -29,23 +29,23 @@ struct MipMap {
             }
         }
     }
-    
+
     void write(hsize_t stokesOffset, hsize_t channelOffset);
     void resetBuffers();
-    
+
     std::vector<hsize_t> datasetDims;
     int mip;
-    
+
     H5::DataSet dataset;
-    
+
     std::vector<hsize_t> bufferDims;
     hsize_t bufferSize;
-    
+
     hsize_t width;
     hsize_t height;
     hsize_t depth;
     hsize_t stokes;
-    
+
     double* vals;
     int* count;
 };
@@ -54,14 +54,14 @@ struct MipMap {
 struct MipMaps {
     MipMaps() {};
     MipMaps(std::vector<hsize_t> standardDims, const std::vector<hsize_t>& chunkDims);
-    
+
     // We need the dataset dimensions to work out how many mipmaps we have
     static hsize_t size(const std::vector<hsize_t>& standardDims, const std::vector<hsize_t>& standardBufferDims);
-    
+
 //     void createDatasets(H5::Group group);
-    void createDatasets(H5OutputFile H5Outputfile, hid_t gid);
+    void createDatasets(H5OutputFile &H5outputfile, hid_t gid);
     void createBuffers(const std::vector<hsize_t>& standardBufferDims);
-    
+
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
         for (auto& mipMap : mipMaps) {
             mipMap.accumulate(val, x, y, totalChannelOffset);
@@ -73,16 +73,16 @@ struct MipMaps {
             mipMap.calculate();
         }
     }
-    
+
     // TODO if we ever want a tiled mipmap calculation
     // we'll need to implement options to pass in custom buffer dims
     // and additional x and y offsets
     void write(hsize_t stokesOffset, hsize_t channelOffset);
     void resetBuffers();
-    
+
     std::vector<hsize_t> standardDims;
     std::vector<hsize_t> chunkDims;
-    
+
     std::vector<MipMap> mipMaps;
 };
 
diff --git a/Stats.cc b/Stats.cc
index f62d893..f0ff618 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -23,35 +23,36 @@ hsize_t Stats::size(std::vector<hsize_t> dims, hsize_t numBins, hsize_t partialH
 }
 
 // void Stats::createDatasets(H5::Group group, std::string name) {
-void Stats::createDatasets(H5OutputFile H5outputfile, hid_t gid, std::string name) {
+void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name) {
+
 //     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
 //     floatType.setOrder(H5T_ORDER_LE);
-//     
+//
 //     H5::IntType intType(H5::PredType::NATIVE_INT64);
 //     intType.setOrder(H5T_ORDER_LE);
-//     
+//
 //     createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims);
 //     createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims);
 //     createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims);
 //     createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims);
 //     createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims);
-//     
+//
 //     if (numBins) {
 //         createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins}));
 //     }
-    
+
 }
 
 void Stats::createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier) {
     fullBasicBufferDims = dims;
     auto statsSize = product(dims);
-    
+
     minVals = new float[statsSize];
     maxVals = new float[statsSize];
     sums = new double[statsSize];
     sumsSq = new double[statsSize];
     nanCounts = new int64_t[statsSize];
-    
+
     if (numBins) {
         histograms = new int64_t[statsSize * numBins];
         partialHistograms = new int64_t[statsSize * numBins * partialHistMultiplier];
@@ -67,7 +68,7 @@ void Stats::clearHistogramBuffers() {
 
 void Stats::write() {
     writeBasic(fullBasicBufferDims);
-    
+
     if (numBins) {
         writeHistogram(fullBasicBufferDims);
     }
@@ -80,13 +81,13 @@ void Stats::write(const std::vector<hsize_t>& count, const std::vector<hsize_t>&
 void Stats::write(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
     auto basicN = basicDatasetDims.size();
     writeBasic(basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN));
-    
+
     if (numBins) {
         auto histN = basicN + 1;
         writeHistogram(basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN));
     }
 }
-    
+
 // void Stats::writeBasic(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 void Stats::writeBasic(H5OutputFile H5outputfile, std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 //     writeHdf5Data(minDset, minVals, basicBufferDims, count, start);
diff --git a/Stats.h b/Stats.h
index 50dbde8..a244a9d 100644
--- a/Stats.h
+++ b/Stats.h
@@ -8,7 +8,7 @@
 struct StatsCounter {
     StatsCounter() : minVal(std::numeric_limits<float>::max()), maxVal(-std::numeric_limits<float>::max()), sum(0), sumSq(0), nanCount(0) {
     }
-        
+
     void accumulateFinite(float val) {
         minVal = fmin(minVal, val);
         maxVal = fmax(maxVal, val);
@@ -48,16 +48,16 @@ struct Stats {
     Stats();
     Stats(const std::vector<hsize_t>& basicDatasetDims, hsize_t numBins = 0);
     ~Stats();
-    
+
     static hsize_t size(std::vector<hsize_t> dims, hsize_t numBins = 0, hsize_t partialHistMultiplier = 0);
-    
+
     // Setup
     //void createDatasets(H5::Group group, std::string name);
-    void createDatasets(H5Outputfile H5outputfile, hid_t gid, std::string name);
+    void createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name);
     void createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier = 0);
-    
+
     // Basic stats
-    
+
     void accumulateStatsToCounter(StatsCounter& counter, hsize_t index) {
         if (std::isfinite(maxVals[index])) {
             counter.sum += sums[index];
@@ -67,7 +67,7 @@ struct Stats {
         }
         counter.nanCount += nanCounts[index];
     }
-    
+
     void copyStatsFromCounter(hsize_t index, hsize_t totalVals, const StatsCounter& counter) {
         if ((hsize_t)counter.nanCount == totalVals) {
             minVals[index] = NAN;
@@ -80,11 +80,11 @@ struct Stats {
         sumsSq[index] = counter.sumSq;
         nanCounts[index] = counter.nanCount;
     }
-   
+
     // Histograms
 
     // Histograms
-    
+
     void clearHistogramBuffers();
 
     void accumulateHistogram(float val, double min, double range, hsize_t offset) {
@@ -104,38 +104,38 @@ struct Stats {
             }
         }
     }
-    
+
     // Writing
     void write();
     void write(const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
     void write(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
     void writeBasic(const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
     void writeHistogram(const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
-    
+
     // Dataset dimensions
     std::vector<hsize_t> basicDatasetDims;
     hsize_t numBins;
-    
+
     // Datasets
 //     H5::DataSet minDset;
 //     H5::DataSet maxDset;
 //     H5::DataSet sumDset;
 //     H5::DataSet ssqDset;
 //     H5::DataSet nanDset;
-//     
+//
 //     H5::DataSet histDset;
-    
+
     hid_t minDset;
     hid_t maxDset;
     hid_t sumDset;
     hid_t ssqDset;
     hid_t nanDset;
-    
+
     hid_t histDset;
 
-    
+
     // Buffer dimensions
-    
+
     std::vector<hsize_t> fullBasicBufferDims;
     hsize_t partialHistMultiplier;
 
@@ -146,7 +146,7 @@ struct Stats {
     double* sums;
     double* sumsSq;
     int64_t* nanCounts;
-    
+
     int64_t* histograms;
     int64_t* partialHistograms;
 };
diff --git a/UI.h b/UI.h
new file mode 100644
index 0000000..2926b9e
--- /dev/null
+++ b/UI.h
@@ -0,0 +1,86 @@
+#ifndef __UI_H
+#define __UI_H
+
+#include <getopt.h>
+#include "common.h"
+
+bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) {
+    extern int optind;
+    extern char *optarg;
+
+    int opt;
+    bool err(false);
+
+    std::ostringstream usage;
+    usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION
+    << " using IDIA schema version " << SCHEMA_VERSION << std::endl
+    << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl
+    << "Options:" << std::endl
+    << "-o\tOutput filename" << std::endl
+    << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl
+    << "-p\tPrint progress output (by default the program is silent)" << std::endl
+    << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl
+    << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl;
+
+    while ((opt = getopt(argc, argv, ":o:spqm")) != -1) {
+        switch (opt) {
+            case 'o':
+                outputFileName.assign(optarg);
+                break;
+            case 's':
+                // use slower but less memory-intensive method
+                slow = true;
+                break;
+            case 'p':
+                progress = true;
+                break;
+            case 'q':
+                std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl;
+                break;
+            case 'm':
+                // only print memory usage and exit
+                onlyReportMemory = true;
+                break;
+            case ':':
+                err = true;
+                std::cerr << "Missing argument for option " << opt << "." << std::endl;
+                break;
+            case '?':
+                err = true;
+                std::cerr << "Unknown option " << opt << "." << std::endl;
+                break;
+        }
+    }
+
+    if (optind >= argc) {
+        err = true;
+        std::cerr << "Missing input filename parameter." << std::endl;
+    } else {
+        inputFileName.assign(argv[optind]);
+        optind++;
+    }
+
+    if (argc > optind) {
+        err = true;
+        std::cerr << "Unexpected additional parameters." << std::endl;
+    }
+
+    if (err) {
+        std::cerr << std::endl << usage.str() << std::endl;
+        return false;
+    }
+
+    if (outputFileName.empty()) {
+        auto fitsIndex = inputFileName.find_last_of(".fits");
+        if (fitsIndex != std::string::npos) {
+            outputFileName = inputFileName.substr(0, fitsIndex - 4);
+            outputFileName += ".hdf5";
+        } else {
+            outputFileName = inputFileName + ".hdf5";
+        }
+    }
+
+    return true;
+}
+
+#endif
diff --git a/main.cc b/main.cc
index f0ac7a6..d816679 100644
--- a/main.cc
+++ b/main.cc
@@ -1,106 +1,27 @@
-#include <getopt.h>
+#include "UI.h"
 #include "Converter.h"
 
-bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) {
-    extern int optind;
-    extern char *optarg;
-    
-    int opt;
-    bool err(false);
-    
-    std::ostringstream usage;
-    usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION 
-    << " using IDIA schema version " << SCHEMA_VERSION << std::endl
-    << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl
-    << "Options:" << std::endl 
-    << "-o\tOutput filename" << std::endl 
-    << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl 
-    << "-p\tPrint progress output (by default the program is silent)" << std::endl
-    << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl
-    << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl;
-    
-    while ((opt = getopt(argc, argv, ":o:spqm")) != -1) {
-        switch (opt) {
-            case 'o':
-                outputFileName.assign(optarg);
-                break;
-            case 's':
-                // use slower but less memory-intensive method
-                slow = true;
-                break;
-            case 'p':
-                progress = true;
-                break;
-            case 'q':
-                std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl;
-                break;
-            case 'm':
-                // only print memory usage and exit
-                onlyReportMemory = true;
-                break;
-            case ':':
-                err = true;
-                std::cerr << "Missing argument for option " << opt << "." << std::endl;
-                break;
-            case '?':
-                err = true;
-                std::cerr << "Unknown option " << opt << "." << std::endl;
-                break;
-        }
-    }
-    
-    if (optind >= argc) {
-        err = true;
-        std::cerr << "Missing input filename parameter." << std::endl;
-    } else {
-        inputFileName.assign(argv[optind]);
-        optind++;
-    }
-    
-    if (argc > optind) {
-        err = true;
-        std::cerr << "Unexpected additional parameters." << std::endl;
-    }
-        
-    if (err) {
-        std::cerr << std::endl << usage.str() << std::endl;
-        return false;
-    }
-    
-    if (outputFileName.empty()) {
-        auto fitsIndex = inputFileName.find_last_of(".fits");
-        if (fitsIndex != std::string::npos) {
-            outputFileName = inputFileName.substr(0, fitsIndex - 4);
-            outputFileName += ".hdf5";
-        } else {
-            outputFileName = inputFileName + ".hdf5";
-        }
-    }
-    
-    return true;
-}
-
 int main(int argc, char** argv) {
     std::string inputFileName;
     std::string outputFileName;
     bool slow(false);
     bool progress(false);
     bool onlyReportMemory(false);
-    
+
     if (!getOptions(argc, argv, inputFileName, outputFileName, slow, progress, onlyReportMemory)) {
         return 1;
     }
-    
+
     std::unique_ptr<Converter> converter;
-        
+
     try {
         converter = Converter::getConverter(inputFileName, outputFileName, slow, progress);
-        
+
         if (onlyReportMemory) {
             converter->reportMemoryUsage();
             return 0;
         }
-    
+
         DEBUG(std::cout << "Converting FITS file " << inputFileName << " to HDF5 file " << outputFileName << (slow ? " using slower, memory-efficient method" : "") << std::endl;);
 
         converter->convert();

From 81d4c03612d6ee1b6356dd647caae960ab231dd2 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Fri, 31 Jul 2020 09:38:46 +0800
Subject: [PATCH 03/13] Update to HDF Wrapper API

ensured template classes defined in header, added some functionality. Still requires updates to Converter, Stats and MinMap classes to use appropriate calls
---
 Converter.cc     |  24 +--
 Converter.h      |   6 +-
 FastConverter.cc | 129 +++++++--------
 HDF5Wrapper.cc   | 415 +++++++++++++----------------------------------
 HDF5Wrapper.h    | 398 +++++++++++++++++++++++++++++++++++++++++----
 MipMap.cc        |  51 +++---
 MipMap.h         |   8 +-
 SlowConverter.cc | 174 ++++++++++----------
 Stats.cc         |  23 +--
 Stats.h          |  10 +-
 10 files changed, 694 insertions(+), 544 deletions(-)

diff --git a/Converter.cc b/Converter.cc
index 4438c11..b81a947 100644
--- a/Converter.cc
+++ b/Converter.cc
@@ -56,14 +56,14 @@ std::unique_ptr<Converter> Converter::getConverter(std::string inputFileName, st
     }
 }
 
-// void Converter::copyAndCalculate() {
-//     // implemented in subclasses
-// }
-//
-// void Converter::reportMemoryUsage() {
-//     // implemented in subclasses
-// }
-//
+void Converter::copyAndCalculate() {
+    // implemented in subclasses
+}
+
+void Converter::reportMemoryUsage() {
+    // implemented in subclasses
+}
+
 
 void Converter::convert() {
     // CREATE OUTPUT FILE
@@ -74,7 +74,7 @@ void Converter::convert() {
 //     outputGroup = outputFile.createGroup("0");
 
     H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC);
-    std::string parentpath = "0";
+    std::string parentpath("0");
     hid_t gid = H5outputfile.create_group(parentpath);
 
     std::vector<hsize_t> chunkDims;
@@ -109,9 +109,9 @@ void Converter::convert() {
 //     writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
 //     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
 //     writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
-    H5outputfile.write_attribute(parentpath, "SCHEMA_VERSION", std::string(SCHEMA_VERSION));
-    H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER", std::string(HDF5_CONVERTER));
-    H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION));
+    H5outputfile.write_attribute(parentpath, std::string("SCHEMA_VERSION"), std::string(SCHEMA_VERSION));
+    H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER"), std::string(HDF5_CONVERTER));
+    H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER_VERSION"), std::string(HDF5_CONVERTER_VERSION));
 
 
     int numAttributes;
diff --git a/Converter.h b/Converter.h
index a356f25..d09dafe 100644
--- a/Converter.h
+++ b/Converter.h
@@ -2,15 +2,15 @@
 #define __IMAGE_H
 
 #include "common.h"
+#include "Util.h"
+#include "HDF5Wrapper.h"
 #include "Stats.h"
 #include "MipMap.h"
 #include "Timer.h"
-#include "Util.h"
-#include "HDF5Wrapper.h"
 
 class Converter {
 public:
-    Converter() {}
+    Converter(){};
     Converter(std::string inputFileName, std::string outputFileName, bool progress);
     ~Converter();
 
diff --git a/FastConverter.cc b/FastConverter.cc
index ecf4af7..f4625c7 100644
--- a/FastConverter.cc
+++ b/FastConverter.cc
@@ -1,7 +1,8 @@
 #include "Converter.h"
 
 // TODO do we need these?
-FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) : Converter(inputFileName, outputFileName, progress) {}
+FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) :
+Converter(inputFileName, outputFileName, progress) {}
 
 void FastConverter::reportMemoryUsage() {
     std::unordered_map<std::string, hsize_t> sizes;
@@ -9,22 +10,22 @@ void FastConverter::reportMemoryUsage() {
     sizes["Main dataset"] = depth * height * width * sizeof(float);
     sizes["Mipmaps"] = MipMaps::size(standardDims, {depth, height, width});
     sizes["XY stats"] = Stats::size({depth}, numBins);
-    
+
     if (depth > 1) {
         sizes["Rotation"] = sizes["Main dataset"];
         sizes["XYZ stats"] = Stats::size({}, numBins, depth);
         sizes["Z stats"] = Stats::size({height, width});
     }
-    
+
     hsize_t total(0);
-    
+
     std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl;
-    
+
     for (auto& kv : sizes) {
         std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl;
         total += kv.second;
     }
-    
+
     std::string note = "";
     if (depth > 1) {
         total -= std::min(sizes["Mipmaps"], sizes["Rotation"]);
@@ -37,22 +38,22 @@ void FastConverter::reportMemoryUsage() {
 void FastConverter::copyAndCalculate() {
     const hsize_t pixelProgressStride = std::max((hsize_t)1, (hsize_t)(width * height / 100));
     const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100));
-    
+
     TIMER(timer.start("Allocate"););
-    
+
     // Process one stokes at a time
     hsize_t cubeSize = depth * height * width;
     standardCube = new float[cubeSize];
-    
+
     statsXY.createBuffers({depth});
-    
+
     if (depth > 1) {
         statsXYZ.createBuffers({}, depth);
         statsZ.createBuffers({height, width});
     }
-    
+
     mipMaps.createBuffers({depth, height, width});
-    
+
     std::string timerLabelXYRotation = depth > 1 ? "XY statistics and rotation" : "XY statistics";
 
     for (unsigned int currentStokes = 0; currentStokes < stokes; currentStokes++) {
@@ -63,48 +64,48 @@ void FastConverter::copyAndCalculate() {
         TIMER(timer.start("Read"););
         DEBUG(std::cout << "+ Reading main dataset..." << std::flush;);
         readFitsData(inputFilePtr, 0, currentStokes, cubeSize, standardCube);
-        
+
         // We have to allocate the swizzled cube for each stokes because we free it to make room for mipmaps
         if (depth > 1) {
             TIMER(timer.start("Allocate"););
             rotatedCube = new float[cubeSize];
         }
-        
+
         DEBUG(std::cout << " " << timerLabelXYRotation <<  "..." << std::flush;);
         PROGRESS("\tMain loop\t");
         TIMER(timer.start(timerLabelXYRotation););
 
         // First loop calculates stats for each XY slice and rotates the dataset
-        
+
 #pragma omp parallel for
         for (hsize_t i = 0; i < depth; i++) {
             PROGRESS_DECIMATED(i, channelProgressStride, "|");
             StatsCounter counterXY;
-            
+
             auto& indexXY = i;
             std::function<void(float)> accumulate;
-            
+
             auto lazy_accumulate = [&] (float val) {
                 counterXY.accumulateFiniteLazy(val);
             };
-            
+
             auto first_accumulate = [&] (float val) {
                 counterXY.accumulateFiniteLazyFirst(val);
                 accumulate = lazy_accumulate;
             };
-            
+
             accumulate = first_accumulate;
-            
+
             for (hsize_t j = 0; j < height; j++) {
                 for (hsize_t k = 0; k < width; k++) {
                     auto sourceIndex = k + width * j + (height * width) * i;
                     auto destIndex = i + depth * j + (height * depth) * k;
                     auto& val = standardCube[sourceIndex];
-                    
+
                     if (depth > 1) {
                         rotatedCube[destIndex] = val;
                     }
-                    
+
                     // Accumulate XY stats
                     if (std::isfinite(val)) {
                         accumulate(val);
@@ -113,11 +114,11 @@ void FastConverter::copyAndCalculate() {
                     }
                 }
             }
-            
+
             // Final correction of XY min and max
             statsXY.copyStatsFromCounter(indexXY, height * width, counterXY);
         }
-        
+
         PROGRESS(std::endl);
 
         if (depth > 1) {
@@ -125,7 +126,7 @@ void FastConverter::copyAndCalculate() {
             DEBUG(std::cout << " XYZ statistics..." << std::flush;);
             PROGRESS("\tXYZ stats" << std::endl);
             TIMER(timer.start("XYZ statistics"););
-            
+
             StatsCounter counterXYZ;
 
             for (hsize_t i = 0; i < depth; i++) {
@@ -136,7 +137,7 @@ void FastConverter::copyAndCalculate() {
             statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ);
 
             // Second loop calculates stats for each Z profile (i.e. average/min/max XY slices)
-            
+
             DEBUG(std::cout << " Z statistics... " << std::flush;);
             PROGRESS("\tZ stats\t\t");
             TIMER(timer.start("Z statistics"););
@@ -145,10 +146,10 @@ void FastConverter::copyAndCalculate() {
             for (hsize_t j = 0; j < height; j++) {
                 for (hsize_t k = 0; k < width; k++) {
                     StatsCounter counterZ;
-                    
+
                     auto indexZ = k + j * width;
                     PROGRESS_DECIMATED(indexZ, pixelProgressStride, ".");
-                    
+
                     for (hsize_t i = 0; i < depth; i++) {
                         auto sourceIndex = k + width * j + (height * width) * i;
                         auto& val = standardCube[sourceIndex];
@@ -160,7 +161,7 @@ void FastConverter::copyAndCalculate() {
                             counterZ.accumulateNonFinite();
                         }
                     }
-                    
+
                     statsZ.copyStatsFromCounter(indexZ, depth, counterZ);
                 }
             }
@@ -169,62 +170,62 @@ void FastConverter::copyAndCalculate() {
         }
 
         // Third loop handles histograms
-        
+
         DEBUG(std::cout << " Histograms..." << std::flush;);
         PROGRESS("\tHistograms\t");
         TIMER(timer.start("Histograms"););
-        
+
         double cubeMin;
         double cubeMax;
         double cubeRange;
         bool cubeHist(false);
-                    
+
         if (depth > 1) {
             cubeMin = statsXYZ.minVals[0];
             cubeMax = statsXYZ.maxVals[0];
             cubeRange = cubeMax - cubeMin;
             cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0;
         }
-        
+
         statsXY.clearHistogramBuffers();
         statsXYZ.clearHistogramBuffers();
 
 #pragma omp parallel for
         for (hsize_t i = 0; i < depth; i++) {
             PROGRESS_DECIMATED(i, channelProgressStride, "|");
-            
+
             auto& indexXY = i;
             double chanMin = statsXY.minVals[indexXY];
             double chanMax = statsXY.maxVals[indexXY];
             double chanRange = chanMax - chanMin;
-            
+
             bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0);
-            
+
             if (!chanHist && !cubeHist) {
                 continue; // skip the loop entirely
             }
-            
+
             auto doChannelHistogram = [&] (float val) {
                 // XY histogram
                 statsXY.accumulateHistogram(val, chanMin, chanRange, i);
             };
-            
+
             auto doCubeHistogram = [&] (float val) {
                 // Partial XYZ histogram
                 statsXYZ.accumulatePartialHistogram(val, cubeMin, cubeRange, i);
             };
-            
+
             auto doNothing = [&] (float val) {
                 UNUSED(val);
             };
-            
+
             std::function<void(float)> channelHistogramFunc = doChannelHistogram;
             std::function<void(float)> cubeHistogramFunc = doCubeHistogram;
-                        
+
             if (!chanHist) {
                 channelHistogramFunc = doNothing;
             }
-            
+
             if (!cubeHist) {
                 cubeHistogramFunc = doNothing;
             }
@@ -238,23 +239,23 @@ void FastConverter::copyAndCalculate() {
                 }
             } // end of XY loop
         } // end of parallel Z loop
-        
+
         if (depth > 1) {
             // Consolidate partial XYZ histograms into final histogram
             statsXYZ.consolidatePartialHistogram();
         }
-        
+
         PROGRESS(std::endl);
 
         DEBUG(std::cout << " Writing main and rotated datasets... " << std::flush;);
         PROGRESS("\tWrite data" << std::endl);
         TIMER(timer.start("Write"););
-                    
+
         std::vector<hsize_t> memDims = {depth, height, width};
         std::vector<hsize_t> count = trimAxes({1, depth, height, width}, N);
         std::vector<hsize_t> start = trimAxes({currentStokes, 0, 0, 0}, N);
 //         writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
-        
+
         if (depth > 1) {
             // This all technically worked if we reused the standard filespace and memspace
             // But it's probably not a good idea to rely on two incorrect values cancelling each other out
@@ -267,17 +268,17 @@ void FastConverter::copyAndCalculate() {
         if (depth > 1) {
             DEBUG(std::cout << " Freeing memory from rotated dataset..." << std::flush;);
             TIMER(timer.start("Free"););
-            
+
             delete[] rotatedCube;
         }
-        
+
         // Fourth loop handles mipmaps
-        
+
         // In the fast algorithm, we keep one Stokes of mipmaps in memory at once and parallelise by channel
         DEBUG(std::cout << " Mipmaps..." << std::endl;);
         PROGRESS("\tMipmaps\t\t");
         TIMER(timer.start("Mipmaps"););
-        
+
 #pragma omp parallel for
         for (hsize_t c = 0; c < depth; c++) {
             PROGRESS_DECIMATED(c, channelProgressStride, "|");
@@ -292,33 +293,33 @@ void FastConverter::copyAndCalculate() {
             }
         } // end of mipmap loop
         PROGRESS(std::endl);
-        
+
         // Final mipmap calculation
         mipMaps.calculate();
-        
+
         TIMER(timer.start("Write"););
         PROGRESS("\tWrite stats & mipmaps" << std::endl);
-        
+
         // Write the mipmaps
-        mipMaps.write(currentStokes, 0);
-        
-        // Write the statistics                
-        statsXY.write({1, depth}, {currentStokes, 0});
-        
+        mipMaps.write(H5outputfile, currentStokes, 0);
+
+        // Write the statistics
+        statsXY.write(H5outputfile,{1, depth}, {currentStokes, 0});
+
         if (depth > 1) {
-            statsXYZ.write({1}, {currentStokes});
-            statsZ.write({1, height, width}, {currentStokes, 0, 0});
+            statsXYZ.write(H5outputfile, {1}, {currentStokes});
+            statsZ.write(H5outputfile, {1, height, width}, {currentStokes, 0, 0});
         }
-                
+
         // Clear the mipmaps before the next Stokes
         TIMER(timer.start("Mipmaps"););
         mipMaps.resetBuffers();
-        
+
     } // end of Stokes loop
-    
+
     // Free memory
     DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;);
     TIMER(timer.start("Free"););
-    
+
     delete[] standardCube;
 }
diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index 25d91ca..b358ee0 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -1,5 +1,20 @@
 #include "HDF5Wrapper.h"
 
+// Constructor
+H5OutputFile::H5OutputFile()
+{
+    file_id = -1;
+#ifdef USEPARALLELHDF
+    parallel_access_id = -1;
+#endif
+}
+
+// Destructor closes the file if it's open
+H5OutputFile::~H5OutputFile()
+{
+    if(file_id >= 0) close();
+}
+
 void H5OutputFile::create(std::string filename, hid_t flag,
     int taskID, bool iparallelopen)
 {
@@ -150,8 +165,7 @@ void H5OutputFile::_get_attribute(std::vector<hid_t> &ids, const std::vector<std
 /// get attribute in file, storing relevant ids in vector
 void H5OutputFile::get_attribute(std::vector<hid_t> &ids, const std::string &name)
 {
-    std::vector<std::string> parts = _tokenize(name);
-    ids.push_back(file_id);
+    auto parts = _tokenize(name);
     _get_attribute(ids, parts);
 }
 
@@ -164,7 +178,7 @@ void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::string dset_
     }
     ids.push_back(dset_id);
 }
-/// get attributes parsing list of ids and vector of strings
+/// get dataset parsing list of ids and vector of strings
 void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::string> &parts)
 {
     // This is the attribute name, so open it and store the id
@@ -194,8 +208,7 @@ void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::
 /// get attribute in file, storing relevant ids in vector
 void H5OutputFile::get_dataset(std::vector<hid_t> &ids, const std::string &name)
 {
-    std::vector<std::string> parts = _tokenize(name);
-    ids.push_back(file_id);
+    auto parts = _tokenize(name);
     _get_dataset(ids, parts);
 }
 
@@ -216,16 +229,13 @@ void H5OutputFile::close_hdf_ids(std::vector<hid_t> &ids)
     }
 }
 
-/// read a scalar attribute
-template<typename T> void H5OutputFile::_do_read(const hid_t &attr, const hid_t &type, T &val)
+/// close open hids stored in vector
+void H5OutputFile::close_path(std::string path)
 {
-    if (hdf5_type(T{}) == H5T_C_S1)
-    {
-      _do_read_string(attr, type, val);
-    }
-    else {
-      H5Aread(attr, type, &val);
-    }
+    auto parts = _tokenize(path);
+    std::vector<hid_t> ids;
+    // _get_path(ids, parts);
+    close_hdf_ids(ids);
 }
 
 /// read a string attribute
@@ -244,64 +254,6 @@ void H5OutputFile::_do_read_string(const hid_t &attr, const hid_t &type, std::st
     val=std::string(buf.data());
 }
 
-/// read a vector attribute
-template<typename T> void H5OutputFile::_do_read_v(const hid_t &attr, const hid_t &type, std::vector<T> &val)
-{
-    hid_t space = H5Aget_space (attr);
-    int npoints = H5Sget_simple_extent_npoints(space);
-    val.resize(npoints);
-    H5Aread(attr, type, val.data());
-    H5Sclose(space);
-}
-
-template<typename T> const T H5OutputFile::read_attribute(const std::string &name) {
-    std::string attr_name;
-    T val;
-    hid_t type;
-    H5O_info_t object_info;
-    std::vector <hid_t> ids;
-    //traverse the file to get to the attribute, storing the ids of the
-    //groups, data spaces, etc that have been opened.
-    get_attribute(ids, name);
-    //now reverse ids and load attribute
-    reverse(ids.begin(),ids.end());
-    //determine hdf5 type of the array in memory
-    type = hdf5_type(T{});
-    // read the data
-    _do_read<T>(ids[0], type, val);
-    H5Aclose(ids[0]);
-    ids.erase(ids.begin());
-    //now have hdf5 ids traversed to get to desired attribute so move along to close all
-    //based on their object type
-    close_hdf_ids(ids);
-    return val;
-}
-
-//read vector attribute
-template<typename T> const std::vector<T> H5OutputFile::read_attribute_v(const std::string &name) {
-    std::string attr_name;
-    std::vector<T> val;
-    hid_t type;
-    H5O_info_t object_info;
-    std::vector <hid_t> ids;
-    //traverse the file to get to the attribute, storing the ids of the
-    //groups, data spaces, etc that have been opened.
-    get_attribute(ids, name);
-    //now reverse ids and load attribute
-    reverse(ids.begin(),ids.end());
-    //determine hdf5 type of the array in memory
-    type = hdf5_type(T{});
-    // read the data
-    _do_read_v<T>(ids[0], type, val);
-    H5Aclose(ids[0]);
-    ids.erase(ids.begin());
-    //now have hdf5 ids traversed to get to desired attribute so move along to close all
-    //based on their object type
-    close_hdf_ids(ids);
-    return val;
-}
-
-
 bool H5OutputFile::exists_attribute(const std::string &parent, const std::string &name) {
     std::string attr_name = parent+std::string("/")+name;
     std::vector <hid_t> ids;
@@ -348,95 +300,38 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &
     return exists;
 }
 
-/// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
-/// the input data if not explicitly specified with the filetype_id parameter.
-template <typename T> void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data,
-    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_hyperslab, bool flag_collective)
-{
-    int rank = 1;
-    hsize_t dims[1] = {len};
-    if (memtype_id == -1) memtype_id = hdf5_type(T{});
-    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
-}
-
-void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective)
+/// create a dataset
+hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
+  std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+  bool flag_parallel, bool flag_hyperslab, bool flag_collective)
 {
 #ifdef USEPARALLELHDF
     MPI_Comm comm = mpi_comm_write;
     MPI_Info info = MPI_INFO_NULL;
 #endif
-    int rank = 1;
-    hsize_t dims[1] = {len};
-
-    hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist;
-    herr_t status, ret;
-    memtype_id = H5Tcopy (H5T_C_S1);
-    status = H5Tset_size (memtype_id, data.size());
-    filetype_id = H5Tcopy (H5T_C_S1);
-    status = H5Tset_size (filetype_id, data.size());
-
-    // Create the dataspace
-    dspace_id = H5Screate_simple(rank, dims, NULL);
-
-    // Create the dataset
-    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
-        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
-#ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        // set up the collective transfer properties list
-        xfer_plist = H5Pcreate(H5P_DATASET_XFER);
-        if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
-        if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
-        else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT);
-        if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
-        // the result of above should be that all processors write to the same
-        // point of the hdf file.
-    }
-#endif
-    // Write the data
-    if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0)
-    io_error(std::string("Failed to write dataset: ")+name);
-
-    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
-#ifdef USEPARALLELHDF
-    if (flag_parallel) H5Pclose(xfer_plist);
-#endif
-    H5Sclose(dspace_id);
-    H5Dclose(dset_id);
-}
-void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data,
-    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
-{
-    int rank = 1;
-    hsize_t dims[1] = {len};
-    if (memtype_id == -1) {
-        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    auto splitPath = _tokenize(path);
+    auto dsetname = splitPath.back();
+    splitPath.pop_back();
+    hid_t curr_id = file_id;
+    hid_t memspace_id, dspace_id, dset_id, prop_id = H5P_DEFAULT;
+    auto rank = dims.size();
+    std::vector<hsize_t> chunks = chunkDims;
+
+    /// not certain what to do with the open groups here.
+    for (auto& groupname : splitPath)
+    {
+      if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0)
+      {
+        curr_id = create_group(groupname);
+      }
+      else
+      {
+        curr_id = open_group(groupname);
+      }
     }
-    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
-}
-
-
-/// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
-/// the input data if not explicitly specified with the filetype_id parameter.
-template <typename T> void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
-    hid_t memtype_id, hid_t filetype_id,
-    bool flag_parallel, bool flag_first_dim_parallel,
-    bool flag_hyperslab, bool flag_collective)
-{
-#ifdef USEPARALLELHDF
-    MPI_Comm comm = mpi_comm_write;
-    MPI_Info info = MPI_INFO_NULL;
-#endif
-    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
-    std::vector<hsize_t> chunks(rank);
-
-    // Get HDF5 data type of the array in memory
-    if (memtype_id == -1) memtype_id = hdf5_type(T{});
-
-    // Determine type of the dataset to create
-    if(filetype_id < 0) filetype_id = memtype_id;
 
 #ifdef USEPARALLELHDF
+    /// will really have to update for MPI
     std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
     if (flag_parallel) {
         //if parallel hdf5 get the full extent of the data
@@ -475,31 +370,17 @@ template <typename T> void H5OutputFile::write_dataset_nd(std::string name, int
 #endif
     }
     // Only chunk datasets where we would have >1 chunk
-    int large_dataset = 0;
-    for(int i=0; i<rank; i++)
+    if(nonzero_size && !chunks.empty())
     {
 #ifdef USEPARALLELHDF
         if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], mpi_hdf_dims_tot[i]);
         }
         else {
-            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
         }
 #else
-        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-#endif
-    }
-    if(nonzero_size && large_dataset)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
 #endif
     }
 
@@ -513,85 +394,93 @@ template <typename T> void H5OutputFile::write_dataset_nd(std::string name, int
             dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
             //allocate the memory space
             //allocate the memory space
-            memspace_id = H5Screate_simple(rank, dims, NULL);
+            memspace_id = H5Screate_simple(rank, dims.data(), NULL);
         }
         else {
-            dspace_id = H5Screate_simple(rank, dims, NULL);
+            dspace_id = H5Screate_simple(rank, dims.data(), NULL);
             memspace_id = dspace_id;
         }
     }
     else {
-        dspace_id = H5Screate_simple(rank, dims, NULL);
+        dspace_id = H5Screate_simple(rank, dims.data(), NULL);
         memspace_id = dspace_id;
     }
 #else
-    dspace_id = H5Screate_simple(rank, dims, NULL);
+    dspace_id = H5Screate_simple(rank, dims.data(), NULL);
     memspace_id = dspace_id;
 #endif
 
-    // Dataset creation properties
-    prop_id = H5P_DEFAULT;
 #ifdef USEHDFCOMPRESSION
-    // this defines compression
-    if(nonzero_size && large_dataset)
-    {
-        prop_id = H5Pcreate(H5P_DATASET_CREATE);
-        H5Pset_layout(prop_id, H5D_CHUNKED);
-        H5Pset_chunk(prop_id, rank, chunks.data());
-        H5Pset_deflate(prop_id, HDFDEFLATE);
+    if (!chunk.empty()) {
+      prop_id = H5Pcreate(H5P_DATASET_CREATE);
+      H5Pset_layout(prop_id, H5D_CHUNKED);
+      H5Pset_chunk(prop_id, rank, chunks.data());
+      H5Pset_deflate(prop_id, HDFDEFLATE);
     }
 #endif
+
+    dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id,
+      H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    return dset_id;
+}
+
+void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective)
+{
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    int rank = 1;
+    hsize_t dims[1] = {len};
+
+    hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist;
+    herr_t status, ret;
+    memtype_id = H5Tcopy (H5T_C_S1);
+    status = H5Tset_size (memtype_id, data.size());
+    filetype_id = H5Tcopy (H5T_C_S1);
+    status = H5Tset_size (filetype_id, data.size());
+
+    // Create the dataspace
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+
     // Create the dataset
     dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
-        H5P_DEFAULT, prop_id, H5P_DEFAULT);
-    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
-    H5Pclose(prop_id);
-
-    prop_id = H5P_DEFAULT;
+        H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 #ifdef USEPARALLELHDF
     if (flag_parallel) {
         // set up the collective transfer properties list
-        prop_id = H5Pcreate(H5P_DATASET_XFER);
-        //if all tasks are participating in the writes
-        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
-        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
-        if (flag_hyperslab) {
-            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
-            if (dims[0] == 0) {
-                H5Sselect_none(dspace_id);
-                H5Sselect_none(memspace_id);
-            }
-
-        }
-        if (mpi_hdf_dims_tot[0] > 0) {
-            // Write the data
-            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
-        }
-    }
-    else if (dims[0] > 0)
-    {
-        // Write the data
-        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
-    }
-
-#else
-    // Write the data
-    if (dims[0] > 0) {
-        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        xfer_plist = H5Pcreate(H5P_DATASET_XFER);
+        if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
+        if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+        else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT);
+        if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name);
+        // the result of above should be that all processors write to the same
+        // point of the hdf file.
     }
 #endif
+    // Write the data
+    if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0)
+    io_error(std::string("Failed to write dataset: ")+name);
 
     // Clean up (note that dtype_id is NOT a new object so don't need to close it)
-    H5Pclose(prop_id);
 #ifdef USEPARALLELHDF
-    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+    if (flag_parallel) H5Pclose(xfer_plist);
 #endif
     H5Sclose(dspace_id);
     H5Dclose(dset_id);
 }
+
+void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data,
+    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
+{
+    int rank = 1;
+    hsize_t dims[1] = {len};
+    if (memtype_id == -1) {
+        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    }
+    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
+}
+
 void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
     hid_t memtype_id, hid_t filetype_id,
     bool flag_parallel, bool flag_first_dim_parallel,
@@ -770,87 +659,3 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
     H5Sclose(dspace_id);
     H5Dclose(dset_id);
 }
-
-/// write an attribute
-template <typename T> void H5OutputFile::write_attribute(std::string parent, std::string name, std::vector<T> data)
-{
-    // Get HDF5 data type of the value to write
-    hid_t dtype_id = hdf5_type(data[0]);
-    hsize_t size = data.size();
-
-    // Open the parent object
-    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
-    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
-
-    // Create dataspace
-    hid_t dspace_id = H5Screate(H5S_SIMPLE);
-    hid_t dspace_extent  = H5Sset_extent_simple(dspace_id, 1, &size, NULL);
-
-    // Create attribute
-    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
-
-    // Write the attribute
-    if(H5Awrite(attr_id, dtype_id, data.data()) < 0)
-    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
-
-    // Clean up
-    H5Aclose(attr_id);
-    H5Sclose(dspace_id);
-    H5Oclose(parent_id);
-}
-
-template <typename T> void H5OutputFile::write_attribute(std::string parent, std::string name, T data)
-{
-    // Get HDF5 data type of the value to write
-    hid_t dtype_id = hdf5_type(data);
-
-    // Open the parent object
-    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
-    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
-
-    // Create dataspace
-    hid_t dspace_id = H5Screate(H5S_SCALAR);
-
-    // Create attribute
-    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
-
-    // Write the attribute
-    if(H5Awrite(attr_id, dtype_id, &data) < 0)
-    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
-
-    // Clean up
-    H5Aclose(attr_id);
-    H5Sclose(dspace_id);
-    H5Oclose(parent_id);
-}
-
-void H5OutputFile::write_attribute(std::string parent, std::string name, std::string data)
-{
-    // Get HDF5 data type of the value to write
-    hid_t dtype_id = H5Tcopy(H5T_C_S1);
-    if (data.size() == 0) data=" ";
-    H5Tset_size(dtype_id, data.size());
-    H5Tset_strpad(dtype_id, H5T_STR_NULLTERM);
-
-    // Open the parent object
-    hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
-    if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
-
-    // Create dataspace
-    hid_t dspace_id = H5Screate(H5S_SCALAR);
-
-    // Create attribute
-    hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
-    if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
-
-    // Write the attribute
-    if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0)
-    io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
-
-    // Clean up
-    H5Aclose(attr_id);
-    H5Sclose(dspace_id);
-    H5Oclose(parent_id);
-}
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index 2180a98..80fb846 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -70,11 +70,27 @@ class H5OutputFile
     /// get attribute id from tokenized string
     void _get_attribute(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
     /// wrapper for reading scalar
-    template<typename T> void _do_read(const hid_t &attr, const hid_t &type, T &val);
+    template<typename T> void _do_read(const hid_t &attr, const hid_t &type, T &val)
+    {
+        if (hdf5_type(T{}) == H5T_C_S1)
+        {
+          _do_read_string(attr, type, val);
+        }
+        else {
+          H5Aread(attr, type, &val);
+        }
+    }
     /// wrapper for reading string
     void _do_read_string(const hid_t &attr, const hid_t &type, std::string &val);
     /// wrapper for reading vector
-    template<typename T> void _do_read_v(const hid_t &attr, const hid_t &type, std::vector<T> &val);
+    template<typename T> void _do_read_v(const hid_t &attr, const hid_t &type, std::vector<T> &val)
+    {
+        hid_t space = H5Aget_space (attr);
+        int npoints = H5Sget_simple_extent_npoints(space);
+        val.resize(npoints);
+        H5Aread(attr, type, val.data());
+        H5Sclose(space);
+    }
 
     /// get dataset id
     void _get_dataset(std::vector<hid_t> &ids, const std::string dset_name);
@@ -84,18 +100,9 @@ class H5OutputFile
 public:
 
     // Constructor
-    H5OutputFile() {
-        file_id = -1;
-#ifdef USEPARALLELHDF
-        parallel_access_id = -1;
-#endif
-    }
-
+    H5OutputFile();
     // Destructor closes the file if it's open
-    ~H5OutputFile()
-    {
-        if(file_id >= 0) close();
-    }
+    ~H5OutputFile();
 
     /// Create a new file
     void create(std::string filename, hid_t flag = H5F_ACC_TRUNC,
@@ -113,12 +120,18 @@ class H5OutputFile
             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         return group_id;
     }
+    hid_t open_group(std::string groupname) {
+        hid_t group_id = H5Gopen(file_id, groupname.c_str(), H5P_DEFAULT);
+        return group_id;
+    }
     /// close group
     herr_t close_group(hid_t gid) {
         herr_t status = H5Gclose(gid);
         return status;
     }
 
+    /// close path
+    void close_path(std::string path);
     /// close hids stored in vector
     void close_hdf_ids(std::vector<hid_t> &ids);
 
@@ -165,25 +178,222 @@ class H5OutputFile
           H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         return dset_id;
     }
+
+    template <typename T> hid_t create_dataset(std::string path, T&data,
+      std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+      bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    {
+      return create_dataset(path, hdf5_type(T{}), dims, chunkDims,
+      flag_parallel, flag_hyperslab, flag_collective);
+    }
+    hid_t create_dataset(std::string path, hid_t datatype,
+      std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+      bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
     /// close data set
     herr_t close_dataset(hid_t dset_id) {
         herr_t status = H5Dclose(dset_id);
         return status;
     }
-    /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
-    /// the input data if not explicitly specified with the filetype_id parameter.
-    template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
-       hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
-    void write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel = true, bool flag_collective = true);
+
+
+    //write 1D data sets of string or input data with defined data type
+    void write_dataset(std::string name, hsize_t len, std::string data,
+      bool flag_parallel = true, bool flag_collective = true);
     void write_dataset(std::string name, hsize_t len, void *data,
-       hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
+       hid_t memtype_id=-1, hid_t filetype_id=-1,
+       bool flag_parallel = true, bool flag_first_dim_parallel = true,
+       bool flag_hyperslab = true, bool flag_collective = true);
+
+  /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
+  /// the input data if not explicitly specified with the filetype_id parameter.
+  /// template function so defined here
+  template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
+     hid_t memtype_id = -1, hid_t filetype_id=-1,
+     bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+  {
+     int rank = 1;
+     hsize_t dims[1] = {len};
+     if (memtype_id == -1) memtype_id = hdf5_type(T{});
+     write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
+  }
+
+  /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+  /// the input data if not explicitly specified with the filetype_id parameter.
+  template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+      hid_t memtype_id = -1, hid_t filetype_id = -1,
+      bool flag_parallel = true, bool flag_first_dim_parallel = true,
+      bool flag_hyperslab = true, bool flag_collective = true)
+  {
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+    std::vector<hsize_t> chunks(rank);
+
+    // Get HDF5 data type of the array in memory
+    if (memtype_id == -1) memtype_id = hdf5_type(T{});
+
+    // Determine type of the dataset to create
+    if(filetype_id < 0) filetype_id = memtype_id;
+
+#ifdef USEPARALLELHDF
+    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+    if (flag_parallel) {
+        //if parallel hdf5 get the full extent of the data
+        //this bit of code communicating information can probably be done elsewhere
+        //minimize number of mpi communications
+        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
+        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
+        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
+        for (auto i=0;i<rank;i++) {
+            dims_offset[i] = 0;
+            if (flag_first_dim_parallel && i > 0) continue;
+            for (auto j=1;j<=ThisWriteTask;j++) {
+                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
+            }
+        }
+        if (flag_first_dim_parallel && rank > 1) {
+            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
+        }
+    }
+#endif
+
+    // Determine if going to compress data in chunks
+    // Only chunk non-zero size datasets
+    int nonzero_size = 1;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
+        }
+        else {
+            if(dims[i]==0) nonzero_size = 0;
+        }
+#else
+        if(dims[i]==0) nonzero_size = 0;
+#endif
+    }
+    // Only chunk datasets where we would have >1 chunk
+    int large_dataset = 0;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+        else {
+            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+#else
+        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+#endif
+    }
+    if(nonzero_size && large_dataset)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        }
+#else
+        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+#endif
+    }
+
+    // Create the dataspace
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        //then all threads create the same simple data space
+        //so the meta information is the same
+        if (flag_hyperslab) {
+            //allocate the space spanning the file
+            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
+            //allocate the memory space
+            //allocate the memory space
+            memspace_id = H5Screate_simple(rank, dims, NULL);
+        }
+        else {
+            dspace_id = H5Screate_simple(rank, dims, NULL);
+            memspace_id = dspace_id;
+        }
+    }
+    else {
+        dspace_id = H5Screate_simple(rank, dims, NULL);
+        memspace_id = dspace_id;
+    }
+#else
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
+#endif
+
+    // Dataset creation properties
+    prop_id = H5P_DEFAULT;
+#ifdef USEHDFCOMPRESSION
+    // this defines compression
+    if(nonzero_size && large_dataset)
+    {
+        prop_id = H5Pcreate(H5P_DATASET_CREATE);
+        H5Pset_layout(prop_id, H5D_CHUNKED);
+        H5Pset_chunk(prop_id, rank, chunks.data());
+        H5Pset_deflate(prop_id, HDFDEFLATE);
+    }
+#endif
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+    H5Pclose(prop_id);
+
+    prop_id = H5P_DEFAULT;
+#ifdef USEPARALLELHDF
+    if (flag_parallel) {
+        // set up the collective transfer properties list
+        prop_id = H5Pcreate(H5P_DATASET_XFER);
+        //if all tasks are participating in the writes
+        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
+        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
+        if (flag_hyperslab) {
+            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
+            if (dims[0] == 0) {
+                H5Sselect_none(dspace_id);
+                H5Sselect_none(memspace_id);
+            }
+
+        }
+        if (mpi_hdf_dims_tot[0] > 0) {
+            // Write the data
+            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        }
+    }
+    else if (dims[0] > 0)
+    {
+        // Write the data
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+
+#else
+    // Write the data
+    if (dims[0] > 0) {
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+#endif
+
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+    H5Pclose(prop_id);
+#ifdef USEPARALLELHDF
+    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+  }
 
-    /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
-    /// the input data if not explicitly specified with the filetype_id parameter.
-    template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
-        hid_t memtype_id = -1, hid_t filetype_id = -1,
-        bool flag_parallel = true, bool flag_first_dim_parallel = true,
-        bool flag_hyperslab = true, bool flag_collective = true);
     void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
         hid_t memtype_id = -1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
@@ -196,16 +406,142 @@ class H5OutputFile
 
     /// get an attribute with full path given by name
     void get_attribute(std::vector<hid_t> &ids, const std::string &name);
+
     /// read scalar attribute
-    template<typename T> const T read_attribute(const std::string &name);
+    template<typename T> const T read_attribute(const std::string &name)
+    {
+        std::string attr_name;
+        T val;
+        hid_t type;
+        H5O_info_t object_info;
+        std::vector <hid_t> ids;
+        //traverse the file to get to the attribute, storing the ids of the
+        //groups, data spaces, etc that have been opened.
+        get_attribute(ids, name);
+        //now reverse ids and load attribute
+        reverse(ids.begin(),ids.end());
+        //determine hdf5 type of the array in memory
+        type = hdf5_type(T{});
+        // read the data
+        _do_read<T>(ids[0], type, val);
+        H5Aclose(ids[0]);
+        ids.erase(ids.begin());
+        //now have hdf5 ids traversed to get to desired attribute so move along to close all
+        //based on their object type
+        close_hdf_ids(ids);
+        return val;
+    }
     /// read vector attribute
-    template<typename T> const std::vector<T> read_attribute_v(const std::string &name);
+    template<typename T> const std::vector<T> read_attribute_v(const std::string &name)
+    {
+        std::string attr_name;
+        std::vector<T> val;
+        hid_t type;
+        H5O_info_t object_info;
+        std::vector <hid_t> ids;
+        //traverse the file to get to the attribute, storing the ids of the
+        //groups, data spaces, etc that have been opened.
+        get_attribute(ids, name);
+        //now reverse ids and load attribute
+        reverse(ids.begin(),ids.end());
+        //determine hdf5 type of the array in memory
+        type = hdf5_type(T{});
+        // read the data
+        _do_read_v<T>(ids[0], type, val);
+        H5Aclose(ids[0]);
+        ids.erase(ids.begin());
+        //now have hdf5 ids traversed to get to desired attribute so move along to close all
+        //based on their object type
+        close_hdf_ids(ids);
+        return val;
+    }
+
     /// sees if attribute exits
     bool exists_attribute(const std::string &parent, const std::string &name);
-    /// write an attribute
-    template <typename T> void write_attribute(std::string parent, std::string name, std::vector<T> data);
-    template <typename T> void write_attribute(std::string parent, std::string name, T data);
-    void write_attribute(std::string parent, std::string name, std::string data);
+
+    /// write an attribute, not that since these are template function, define here in the header
+    template <typename T> void write_attribute(const std::string &parent, const std::string &name, const std::vector<T> &data)
+    {
+        // Get HDF5 data type of the value to write
+        hid_t dtype_id = hdf5_type(data[0]);
+        hsize_t size = data.size();
+
+        // Open the parent object
+        hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+        if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+        // Create dataspace
+        hid_t dspace_id = H5Screate(H5S_SIMPLE);
+        hid_t dspace_extent  = H5Sset_extent_simple(dspace_id, 1, &size, NULL);
+
+        // Create attribute
+        hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+        if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+        // Write the attribute
+        if(H5Awrite(attr_id, dtype_id, data.data()) < 0)
+        io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+        // Clean up
+        H5Aclose(attr_id);
+        H5Sclose(dspace_id);
+        H5Oclose(parent_id);
+    }
+
+    template <typename T> void write_attribute(const std::string &parent, const std::string &name, const T &data)
+    {
+        // Get HDF5 data type of the value to write
+        hid_t dtype_id = hdf5_type(data);
+
+        // Open the parent object
+        hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+        if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+        // Create dataspace
+        hid_t dspace_id = H5Screate(H5S_SCALAR);
+
+        // Create attribute
+        hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+        if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+        // Write the attribute
+        if(H5Awrite(attr_id, dtype_id, &data) < 0)
+        io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+        // Clean up
+        H5Aclose(attr_id);
+        H5Sclose(dspace_id);
+        H5Oclose(parent_id);
+    }
+
+    void write_attribute(const std::string parent, const std::string &name, std::string data)
+    {
+        // Get HDF5 data type of the value to write
+        hid_t dtype_id = H5Tcopy(H5T_C_S1);
+        if (data.size() == 0) data=" ";
+        H5Tset_size(dtype_id, data.size());
+        H5Tset_strpad(dtype_id, H5T_STR_NULLTERM);
+
+        // Open the parent object
+        hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT);
+        if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name);
+
+        // Create dataspace
+        hid_t dspace_id = H5Screate(H5S_SCALAR);
+
+        // Create attribute
+        hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT);
+        if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent);
+
+        // Write the attribute
+        if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0)
+        io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent);
+
+        // Clean up
+        H5Aclose(attr_id);
+        H5Sclose(dspace_id);
+        H5Oclose(parent_id);
+    }
 
 };
 
diff --git a/MipMap.cc b/MipMap.cc
index 35ffb5c..8708c12 100644
--- a/MipMap.cc
+++ b/MipMap.cc
@@ -11,46 +11,49 @@ MipMap::~MipMap() {
     }
 }
 
-void MipMap::createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims) {
-    H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
-    floatType.setOrder(H5T_ORDER_LE);
-    
-    std::ostringstream mipMapName;
-    mipMapName << "MipMaps/DATA/DATA_XY_" << mip;
-    
+// void MipMap::createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims) {
+//     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
+//     floatType.setOrder(H5T_ORDER_LE);
+//
+//     std::ostringstream mipMapName;
+//     mipMapName << "MipMaps/DATA/DATA_XY_" << mip;
+//
 //     if (useChunks(datasetDims)) {
 //         createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims);
 //     } else {
 //         createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims);
 //     }
-//???
-    
+//
+// }
+
+void MipMap::createDataset(H5OutputFile &H5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims) {
 }
 
 void MipMap::createBuffers(std::vector<hsize_t>& bufferDims) {
     bufferSize = product(bufferDims);
-    
+
     vals = new double[bufferSize];
     count = new int[bufferSize];
-    
+
     resetBuffers();
-    
+
     this->bufferDims = bufferDims;
-    
+
     auto N = bufferDims.size();
-    
+
     width = bufferDims[N - 1];
     height = bufferDims[N - 2];
     depth = N > 2 ? bufferDims[N - 3] : 1;
-    stokes = N > 3 ? bufferDims[N - 4] : 1;        
+    stokes = N > 3 ? bufferDims[N - 4] : 1;
 }
 
-void MipMap::write(hsize_t stokesOffset, hsize_t channelOffset) {
+void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) {
     int N = datasetDims.size();
     std::vector<hsize_t> count = trimAxes({1, depth, height, width}, N);
     std::vector<hsize_t> start = trimAxes({stokesOffset, channelOffset, 0, 0}, N);
-    
-    writeHdf5Data(dataset, vals, bufferDims, count, start);
+
+    // writeHdf5Data(dataset, vals, bufferDims, count, start);
+    //???
 }
 
 void MipMap::resetBuffers() {
@@ -64,7 +67,7 @@ MipMaps::MipMaps(std::vector<hsize_t> standardDims, const std::vector<hsize_t>&
     auto dims = standardDims;
     int N = dims.size();
     int mip = 1;
-    
+
     // We keep going until we have a mipmap which fits entirely within the minimum size
     while (dims[N - 1] > MIN_MIPMAP_SIZE || dims[N - 2] > MIN_MIPMAP_SIZE) {
         mip *= 2;
@@ -79,7 +82,7 @@ hsize_t MipMaps::size(const std::vector<hsize_t>& standardDims, const std::vecto
     auto datasetDims = standardDims;
     auto bufferDims = standardBufferDims;
     int N = standardDims.size();
-    
+
     while (datasetDims[N - 1] > MIN_MIPMAP_SIZE || datasetDims[N - 2] > MIN_MIPMAP_SIZE) {
         mip *= 2;
         datasetDims = mipDims(datasetDims, 2);
@@ -94,10 +97,10 @@ hsize_t MipMaps::size(const std::vector<hsize_t>& standardDims, const std::vecto
 //     for (auto& mipMap : mipMaps) {
 //         mipMap.createDataset(group, chunkDims);
 //     }
-//     
+//
 // }
 
-void MipMaps::createDatasets(H5OutputFile H5outputfile , hid_t gid) {
+void MipMaps::createDatasets(H5OutputFile &H5outputfile, hid_t gid) {
     for (auto& mipMap : mipMaps) {
         mipMap.createDataset(H5outputfile, gid, chunkDims);
     }
@@ -110,9 +113,9 @@ void MipMaps::createBuffers(const std::vector<hsize_t>& standardBufferDims) {
     }
 }
 
-void MipMaps::write(hsize_t stokesOffset, hsize_t channelOffset) {
+void MipMaps::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) {
     for (auto& mipMap : mipMaps) {
-        mipMap.write(stokesOffset, channelOffset);
+        mipMap.write(H5outputfile, stokesOffset, channelOffset);
     }
 }
 
diff --git a/MipMap.h b/MipMap.h
index d9195cc..b0e776f 100644
--- a/MipMap.h
+++ b/MipMap.h
@@ -3,6 +3,7 @@
 
 #include "common.h"
 #include "Util.h"
+#include "HDF5Wrapper.h"
 
 // A single mipmap
 struct MipMap {
@@ -30,13 +31,14 @@ struct MipMap {
         }
     }
 
-    void write(hsize_t stokesOffset, hsize_t channelOffset);
+    void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset);
     void resetBuffers();
 
     std::vector<hsize_t> datasetDims;
     int mip;
 
-    H5::DataSet dataset;
+    // H5::DataSet dataset;
+    hid_t dset_id;
 
     std::vector<hsize_t> bufferDims;
     hsize_t bufferSize;
@@ -77,7 +79,7 @@ struct MipMaps {
     // TODO if we ever want a tiled mipmap calculation
     // we'll need to implement options to pass in custom buffer dims
     // and additional x and y offsets
-    void write(hsize_t stokesOffset, hsize_t channelOffset);
+    void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset);
     void resetBuffers();
 
     std::vector<hsize_t> standardDims;
diff --git a/SlowConverter.cc b/SlowConverter.cc
index ae09122..7cd0332 100644
--- a/SlowConverter.cc
+++ b/SlowConverter.cc
@@ -8,22 +8,22 @@ void SlowConverter::reportMemoryUsage() {
     sizes["Main dataset"] = height * width * sizeof(float);
     sizes["Mipmaps"] = MipMaps::size(standardDims, {1, height, width});
     sizes["XY stats"] = Stats::size({depth}, numBins);
-    
+
     if (depth > 1) {
         sizes["Rotation"] = 2 * product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N)) * sizeof(float);
         sizes["XYZ stats"] = Stats::size({}, numBins, depth);
         sizes["Z stats"] = Stats::size({TILE_SIZE, TILE_SIZE});
     }
-    
+
     hsize_t total(0);
-    
+
     std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl;
-    
+
     for (auto& kv : sizes) {
         std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl;
         total += kv.second;
     }
-    
+
     std::string note = "";
     if (depth > 1) {
         total -= std::min(sizes["Main dataset"], sizes["Rotation"] + sizes["Z stats"]);
@@ -37,35 +37,35 @@ void SlowConverter::copyAndCalculate() {
     const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100));
     hsize_t numTiles = std::ceil(width / TILE_SIZE) * std::ceil(height / TILE_SIZE);
     const hsize_t tileProgressStride = std::max((hsize_t)1, (hsize_t)(numTiles / 100));
-    
+
     // Allocate one channel at a time, and no swizzled data
     hsize_t cubeSize = height * width;
     TIMER(timer.start("Allocate"););
     standardCube = new float[cubeSize];
-    
+
     // Allocate one stokes of stats at a time
     statsXY.createBuffers({depth});
-    
+
     if (depth > 1) {
         statsXYZ.createBuffers({}, depth);
     }
-    
+
     mipMaps.createBuffers({1, height, width});
 
     std::vector<hsize_t> count = trimAxes({1, 1, height, width}, N);
     std::vector<hsize_t> memDims = {height, width};
-    
+
     std::string timerLabelStatsMipmaps = depth > 1 ? "XY and XYZ statistics and mipmaps" : "XY statistics and mipmaps";
 
-    
+
     for (unsigned int s = 0; s < stokes; s++) {
         DEBUG(std::cout << "Processing Stokes " << s << "... " << std::endl;);
         PROGRESS("Stokes " << s << ":" << std::endl);
-        
+
         PROGRESS("\tMain loop\t");
-        
+
         StatsCounter counterXYZ;
-        
+
         for (hsize_t c = 0; c < depth; c++) {
             PROGRESS_DECIMATED(c, channelProgressStride, "|");
             // read one channel
@@ -73,80 +73,80 @@ void SlowConverter::copyAndCalculate() {
             DEBUG(std::cout << " Reading main dataset..." << std::flush;);
             TIMER(timer.start("Read"););
             readFitsData(inputFilePtr, c, s, cubeSize, standardCube);
-            
+
             // Write the standard dataset
-            
+
             DEBUG(std::cout << " Writing main dataset..." << std::flush;);
             TIMER(timer.start("Write"););
-            
+
             std::vector<hsize_t> start = trimAxes({s, c, 0, 0}, N);
 //             writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
-            
+
             DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;);
             TIMER(timer.start(timerLabelStatsMipmaps););
 
             StatsCounter counterXY;
             auto indexXY = c;
             std::function<void(float)> accumulate;
-            
+
             auto lazy_accumulate = [&] (float val) {
                 counterXY.accumulateFiniteLazy(val);
             };
-            
+
             auto first_accumulate = [&] (float val) {
                 counterXY.accumulateFiniteLazyFirst(val);
                 accumulate = lazy_accumulate;
             };
-            
+
             accumulate = first_accumulate;
-            
+
             for (hsize_t y = 0; y < height; y++) {
                 for (hsize_t x = 0; x < width; x++) {
                     auto pos = y * width + x; // relative to channel slice
                     auto& val = standardCube[pos];
-                                        
+
                     if (std::isfinite(val)) {
                         // XY statistics
                         accumulate(val);
-                        
+
                         // Accumulate mipmaps
                         mipMaps.accumulate(val, x, y, 0);
-                        
+
                     } else {
                         counterXY.accumulateNonFinite();
                     }
                 }
             } // end of XY loop
-            
+
             // Final correction of XY min and max
             DEBUG(std::cout << " Final XY stats..." << std::flush;);
             statsXY.copyStatsFromCounter(indexXY, height * width, counterXY);
-            
+
             // Accumulate XYZ statistics
             if (depth > 1) {
                 DEBUG(std::cout << " Accumulating XYZ stats..." << std::flush;);
                 statsXY.accumulateStatsToCounter(counterXYZ, indexXY);
             }
-            
+
             // Final mipmap calculation
             DEBUG(std::cout << " Final mipmaps..." << std::flush;);
             mipMaps.calculate();
-            
-            
+
+
             // Write the mipmaps
             DEBUG(std::cout << " Writing mipmaps..." << std::flush;);
             TIMER(timer.start("Write"););
-            mipMaps.write(s, c);
-            
+            mipMaps.write(H5outputfile, s, c);
+
             // Reset mipmaps before next channel
             DEBUG(std::cout << " Resetting mipmap objects..." << std::endl;);
             TIMER(timer.start(timerLabelStatsMipmaps););
             mipMaps.resetBuffers();
-            
+
         } // end of first channel loop
-        
+
         PROGRESS(std::endl);
-        
+
         if (depth > 1) {
             // Final correction of XYZ min and max
             DEBUG(std::cout << " Final XYZ stats..." << std::flush;);
@@ -154,81 +154,81 @@ void SlowConverter::copyAndCalculate() {
             TIMER(timer.start(timerLabelStatsMipmaps););
             statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ);
         }
-        
+
         // XY and XYZ histograms
         // We need a second pass over all channels because we need cube min and max (and channel min and max per channel)
         // We do the second pass backwards to take advantage of caching
         DEBUG(std::cout << " Histograms..." << std::endl;);
         PROGRESS("\tHistograms\t");
         TIMER(timer.start("Histograms"););
-        
+
         double cubeMin;
         double cubeMax;
         double cubeRange;
         bool cubeHist(false);
-        
+
         if (depth > 1) {
             cubeMin = statsXYZ.minVals[0];
             cubeMax = statsXYZ.maxVals[0];
             cubeRange = cubeMax - cubeMin;
             cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0;
         }
-        
+
         statsXY.clearHistogramBuffers();
         statsXYZ.clearHistogramBuffers();
-        
+
         DEBUG(std::cout << "+ Will " << (cubeHist ? "" : "not ") << "calculate cube histogram." << std::endl;);
-        
+
         for (hsize_t c = depth; c-- > 0; ) {
             DEBUG(std::cout << "+ Processing channel " << c << "... " << std::flush;);
             PROGRESS_DECIMATED(c, channelProgressStride, "|");
             auto indexXY = c;
-                            
+
             double chanMin = statsXY.minVals[indexXY];
             double chanMax = statsXY.maxVals[indexXY];
             double chanRange = chanMax - chanMin;
-            
+
             bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0);
             DEBUG(std::cout << " Will " << (chanHist ? "" : "not ") << "calculate channel histogram." << std::flush;);
-            
+
             if (!chanHist && !cubeHist) {
                 continue;
             }
-            
+
             auto doChannelHistogram = [&] (float val) {
                 // XY histogram
                 statsXY.accumulateHistogram(val, chanMin, chanRange, c);
             };
-            
+
             auto doCubeHistogram = [&] (float val) {
                 // XYZ histogram
                 statsXYZ.accumulateHistogram(val, cubeMin, cubeRange, 0);
             };
-            
+
             auto doNothing = [&] (float val) {
                 UNUSED(val);
             };
-            
+
             std::function<void(float)> channelHistogramFunc = doChannelHistogram;
             std::function<void(float)> cubeHistogramFunc = doCubeHistogram;
-            
+
             if (!chanHist) {
                 channelHistogramFunc = doNothing;
             }
-            
+
             if (!cubeHist) {
                 cubeHistogramFunc = doNothing;
             }
-            
+
             // read one channel
             DEBUG(std::cout << " Reading main dataset..." << std::flush;);
             TIMER(timer.start("Read"););
-            
+
             readFitsData(inputFilePtr, c, s, cubeSize, standardCube);
 
             DEBUG(std::cout << " Calculating histogram(s)..." << std::endl;);
             TIMER(timer.start("Histograms"););
-            
+
             for (hsize_t p = 0; p < width * height; p++) {
                 auto& val = standardCube[p];
                     if (std::isfinite(val)) {
@@ -237,94 +237,94 @@ void SlowConverter::copyAndCalculate() {
                     }
             } // end of XY loop
         } // end of second channel loop (XY and XYZ histograms)
-        
+
         PROGRESS(std::endl);
-        
+
         // Write the statistics
         TIMER(timer.start("Write"););
         PROGRESS("\tWrite stats & mipmaps" << std::endl);
-                
-        statsXY.write({1, depth}, {s, 0});
-        
+
+        statsXY.write(H5outputfile, {1, depth}, {s, 0});
+
         if (depth > 1) {
-            statsXYZ.write({1}, {s});
+            statsXYZ.write(H5outputfile, {1}, {s});
         }
-    
+
     } // end of stokes
-    
+
     // Free memory
     DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;);
     TIMER(timer.start("Free"););
-    
+
     delete[] standardCube;
-            
+
     // Swizzle
     if (depth > 1) {
         DEBUG(std::cout << "Performing tiled rotation." << std::endl;);
         PROGRESS("Tiled rotation & Z stats" << std::endl);
         TIMER(timer.start("Allocate"););
-        
+
         hsize_t sliceSize = product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N));
         float* standardSlice = new float[sliceSize];
         float* rotatedSlice = new float[sliceSize];
-        
+
         statsZ.createBuffers({TILE_SIZE, TILE_SIZE});
-        
+
         for (unsigned int s = 0; s < stokes; s++) {
             DEBUG(std::cout << "Processing Stokes " << s << "..." << std::endl;);
             PROGRESS("\tStokes " << s << "\t");
-            
+
             hsize_t tileCount(0);
-            
+
             for (hsize_t xOffset = 0; xOffset < width; xOffset += TILE_SIZE) {
                 for (hsize_t yOffset = 0; yOffset < height; yOffset += TILE_SIZE) {
                     tileCount++;
                     hsize_t xSize = std::min(TILE_SIZE, width - xOffset);
                     hsize_t ySize = std::min(TILE_SIZE, height - yOffset);
-                    
+
                     DEBUG(std::cout << "+ Processing tile slice at " << xOffset << ", " << yOffset << "..." << std::flush;);
                     PROGRESS_DECIMATED(tileCount, tileProgressStride, "#");
-                    
+
                     // read tile slice
                     DEBUG(std::cout << " Reading main dataset..." << std::flush;);
                     TIMER(timer.start("Read"););
-                    
+
                     auto standardMemDims = trimAxes({1, depth, ySize, xSize}, N);
                     auto standardCount = trimAxes({1, depth, ySize, xSize}, N);
                     auto standardStart = trimAxes({s, 0, yOffset, xOffset}, N);
-                    
+
 //                     readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart);
-                    
+
                     // rotate tile slice
                     DEBUG(std::cout << " Calculating rotation..." << std::flush;);
                     TIMER(timer.start("Rotation"););
-                    
+
                     for (hsize_t i = 0; i < depth; i++) {
                         for (hsize_t j = 0; j < ySize; j++) {
                             for (hsize_t k = 0; k < xSize; k++) {
                                 auto sourceIndex = k + xSize * j + (ySize * xSize) * i;
                                 auto& val = standardSlice[sourceIndex];
-                                
+
                                 // rotation
                                 auto destIndex = i + depth * j + (ySize * depth) * k;
                                 rotatedSlice[destIndex] = val;
                             }
                         }
                     }
-                    
-                    // A separate pass over the same slice depth-last 
+
+                    // A separate pass over the same slice depth-last
                     DEBUG(std::cout << " Calculating Z statistics..." << std::flush;);
                     TIMER(timer.start("Z statistics"););
-                    
+
                     for (hsize_t j = 0; j < ySize; j++) {
                         for (hsize_t k = 0; k < xSize; k++) {
                             StatsCounter counterZ;
                             auto indexZ = k + xSize * j;
-                            
+
                             for (hsize_t i = 0; i < depth; i++) {
                                 auto sourceIndex = k + xSize * j + (ySize * xSize) * i;
                                 auto& val = standardSlice[sourceIndex];
-                                
+
                                 if (std::isfinite(val)) {
                                     // Not lazy; too much risk of encountering an ascending / descending sequence.
                                     counterZ.accumulateFinite(val);
@@ -332,29 +332,29 @@ void SlowConverter::copyAndCalculate() {
                                     counterZ.accumulateNonFinite();
                                 }
                             }
-                            
+
                             statsZ.copyStatsFromCounter(indexZ, depth, counterZ);
                         }
                     }
-                    
+
                     // write tile slice
                     DEBUG(std::cout << " Writing rotated dataset..." << std::endl;);
                     TIMER(timer.start("Write"););
-                    
+
                     auto swizzledMemDims = trimAxes({1, xSize, ySize, depth}, N);
                     auto swizzledCount = trimAxes({1, xSize, ySize, depth}, N);
                     auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N);
-                    
+
 //                     writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart);
-                    
+
                     DEBUG(std::cout << " Writing Z statistics..." << std::endl;);
                     // write Z statistics
-                    statsZ.write({ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset});
+                    statsZ.write(H5outputfile, {ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset});
                 }
             }
             PROGRESS(std::endl);
         }
-        
+
         TIMER(timer.start("Free"););
         DEBUG(std::cout << "Freeing memory from main and rotated dataset slices... " << std::endl;);
         delete[] standardSlice;
diff --git a/Stats.cc b/Stats.cc
index f0ff618..a798e3b 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -1,5 +1,7 @@
 #include "Stats.h"
 
+Stats::Stats(){};
+
 Stats::Stats(const std::vector<hsize_t>& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) {
 }
 
@@ -66,30 +68,31 @@ void Stats::clearHistogramBuffers() {
     memset(partialHistograms, 0, sizeof(int64_t) * statsSize * numBins * partialHistMultiplier);
 }
 
-void Stats::write() {
-    writeBasic(fullBasicBufferDims);
+void Stats::write(H5OutputFile &H5outputfile) {
+    writeBasic(H5outputfile, fullBasicBufferDims);
 
     if (numBins) {
-        writeHistogram(fullBasicBufferDims);
+        writeHistogram(H5outputfile, fullBasicBufferDims);
     }
 }
 
-void Stats::write(const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-    write(fullBasicBufferDims, count, start);
+void Stats::write(H5OutputFile &H5outputfile, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+    write(H5outputfile, fullBasicBufferDims, count, start);
 }
 
-void Stats::write(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+void Stats::write(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
     auto basicN = basicDatasetDims.size();
-    writeBasic(basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN));
+    writeBasic(H5outputfile, basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN));
 
     if (numBins) {
         auto histN = basicN + 1;
-        writeHistogram(basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN));
+        writeHistogram(H5outputfile, basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN));
     }
 }
 
 // void Stats::writeBasic(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-void Stats::writeBasic(H5OutputFile H5outputfile, std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start)
+{
 //     writeHdf5Data(minDset, minVals, basicBufferDims, count, start);
 //     writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start);
 //     writeHdf5Data(sumDset, sums, basicBufferDims, count, start);
@@ -98,6 +101,6 @@ void Stats::writeBasic(H5OutputFile H5outputfile, std::vector<hsize_t>& basicBuf
 }
 
 // void Stats::writeHistogram(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
-void Stats::writeHistogram(H5OutputFile H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
+void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 //     writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start);
 }
diff --git a/Stats.h b/Stats.h
index a244a9d..09b7528 100644
--- a/Stats.h
+++ b/Stats.h
@@ -106,11 +106,11 @@ struct Stats {
     }
 
     // Writing
-    void write();
-    void write(const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
-    void write(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
-    void writeBasic(const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
-    void writeHistogram(const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+    void write(H5OutputFile &H5outputfile);
+    void write(H5OutputFile &H5outputfile, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
+    void write(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start);
+    void writeBasic(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
+    void writeHistogram(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims = EMPTY_DIMS, const std::vector<hsize_t>& count = EMPTY_DIMS, const std::vector<hsize_t>& start = EMPTY_DIMS);
 
     // Dataset dimensions
     std::vector<hsize_t> basicDatasetDims;

From 3c793455f2d4e0417bcd0be929f1b7f8c7b78301 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Mon, 3 Aug 2020 11:59:26 +0800
Subject: [PATCH 04/13] Update to HDF5 Wrapper API

Added in extra checks for creating groups, datasets, create links (which still needs work) and some code clean. Also updated dataset creation in Stats and MipMap.
---
 Converter.cc   | 15 ++++++---
 HDF5Wrapper.cc | 88 +++++++++++++++++++++++++++++++++++++++++++++++---
 HDF5Wrapper.h  | 27 +++++++++++++---
 MipMap.cc      | 14 ++++++--
 MipMap.h       |  4 +--
 Stats.cc       |  9 +++++-
 Stats.h        | 12 +++----
 7 files changed, 142 insertions(+), 27 deletions(-)

diff --git a/Converter.cc b/Converter.cc
index b81a947..66767ae 100644
--- a/Converter.cc
+++ b/Converter.cc
@@ -74,7 +74,7 @@ void Converter::convert() {
 //     outputGroup = outputFile.createGroup("0");
 
     H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC);
-    std::string parentpath("0");
+    std::string parentpath("0/");
     hid_t gid = H5outputfile.create_group(parentpath);
 
     std::vector<hsize_t> chunkDims;
@@ -85,19 +85,24 @@ void Converter::convert() {
 //     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
 //     floatType.setOrder(H5T_ORDER_LE);
 //     createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims);
-//????
-
-
+    H5outputfile.create_dataset(parentpath, H5T_NATIVE_FLOAT,
+        standardDims, chunkDims);
 //     statsXY.createDatasets(outputGroup, "XY");
+    statsXY.createDatasets(H5outputfile, parentpath, "XY");
 
     if (depth > 1) {
 //         statsXYZ.createDatasets(outputGroup, "XYZ");
 //         statsZ.createDatasets(outputGroup, "Z");
-
 //         auto swizzledGroup = outputGroup.createGroup("SwizzledData");
         // We use this name in papers because it sounds more serious. :)
 //         outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData");
 //         createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims);
+        statsXYZ.createDatasets(H5outputfile, parentpath, "XYZ");
+        statsZ.createDatasets(H5outputfile, parentpath, "Z");
+        auto swizzledGroup = H5outputfile.create_group("0/SwizzledData");
+        // We use this name in papers because it sounds more serious. :)
+        H5outputfile.create_link("0/SwizzledData", "0/PermutedData");
+        // ??? H5outputfile.create_dataset();
     }
 
     /// mipMaps.createDatasets(outputGroup);
diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index b358ee0..eac39d4 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -169,7 +169,7 @@ void H5OutputFile::get_attribute(std::vector<hid_t> &ids, const std::string &nam
     _get_attribute(ids, parts);
 }
 
-/// get an attribute going to list of hids
+/// get a dataset going to list of hids
 void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::string dset_name)
 {
     auto dset_id = H5Dopen(ids.back(), dset_name.c_str(), H5P_DEFAULT);
@@ -205,13 +205,75 @@ void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::
     }
 }
 
-/// get attribute in file, storing relevant ids in vector
+/// get dataset in file, storing relevant ids in vector
 void H5OutputFile::get_dataset(std::vector<hid_t> &ids, const std::string &name)
 {
     auto parts = _tokenize(name);
     _get_dataset(ids, parts);
 }
 
+/// get a hdf5 id  going to list of hids
+void H5OutputFile::_get_hdf5_id(std::vector<hid_t> &ids, const std::string name)
+{
+    auto id = H5Oopen(ids.back(), name.c_str(), H5P_DEFAULT);
+    if (id < 0) {
+        throw std::invalid_argument(std::string("hdf5 object not found ") + name);
+    }
+    ids.push_back(id);
+}
+/// get hdf5 ids parsing list of ids and vector of strings
+void H5OutputFile::_get_hdf5_id(std::vector<hid_t> &ids, const std::vector<std::string> &parts)
+{
+    // This is the attribute name, so open it and store the id
+    if (parts.size() == 1) {
+        _get_hdf5_id(ids, parts[0]);
+    }
+    else
+    {
+        //otherwise enter group and recursively call funciotn
+        H5O_info_t object_info;
+        hid_t newid;
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        if (object_info.type == H5O_TYPE_GROUP) {
+            newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
+        }
+        else {
+            throw std::runtime_error("Incorrect path, encountered something other than group");
+        }
+        ids.push_back(newid);
+        //get the substring
+        std::vector<std::string> subparts(parts.begin() + 1, parts.end());
+        //call function again
+        _get_dataset(ids, subparts);
+    }
+}
+
+/// get hdf5 id in file, storing relevant ids in vector
+void H5OutputFile::get_hdf5_id(std::vector<hid_t> &ids, const std::string &name)
+{
+    auto parts = _tokenize(name);
+    _get_hdf5_id(ids, parts);
+}
+hid_t H5OutputFile::get_hdf5_id(std::string path, std::string name, bool closeids)
+{
+    return get_hdf5_id(path+name, closeids);
+}
+hid_t H5OutputFile::get_hdf5_id(std::string fullname, bool closeids)
+{
+    hid_t id;
+    std::vector<hid_t> ids;
+    try {
+        get_hdf5_id(ids, fullname);
+        id = ids.back();
+    }
+    catch (const std::invalid_argument& ia)
+    {
+        id = -1;
+    }
+    ///\todo question of whether I should close the objects that have been opened
+    if (closeids) close_hdf_ids(ids);
+    return id;
+}
 
 /// close open hids stored in vector
 void H5OutputFile::close_hdf_ids(std::vector<hid_t> &ids)
@@ -234,7 +296,7 @@ void H5OutputFile::close_path(std::string path)
 {
     auto parts = _tokenize(path);
     std::vector<hid_t> ids;
-    // _get_path(ids, parts);
+    _get_hdf5_id(ids, parts);
     close_hdf_ids(ids);
 }
 
@@ -302,7 +364,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &
 
 /// create a dataset
 hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
-  std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+  std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims,
   bool flag_parallel, bool flag_hyperslab, bool flag_collective)
 {
 #ifdef USEPARALLELHDF
@@ -424,6 +486,24 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
     return dset_id;
 }
 
+/// create a link
+herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool ihard) {
+    std::vector<hid_t> orgids;
+    hid_t orgid = get_hdf5_id(orgname);
+    hid_t linkid;
+
+    if (H5Lexists(linkid, linkname.c_str(), H5P_DEFAULT))  {
+        throw std::runtime_error("Link already exists, cannot create new link");
+    }
+    if (ihard) {
+        return H5Lcreate_hard(orgid, orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT);
+    }
+    else {
+        return H5Lcreate_soft(orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT);
+    }
+}
+
+
 void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective)
 {
 #ifdef USEPARALLELHDF
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index 80fb846..4ad1f55 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -65,10 +65,12 @@ class H5OutputFile
 
     /// tokenize a path given an input string
     std::vector<std::string> _tokenize(const std::string &s);
+
     /// get attribute id
     void _get_attribute(std::vector<hid_t> &ids, const std::string attr_name);
     /// get attribute id from tokenized string
     void _get_attribute(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
+
     /// wrapper for reading scalar
     template<typename T> void _do_read(const hid_t &attr, const hid_t &type, T &val)
     {
@@ -94,9 +96,14 @@ class H5OutputFile
 
     /// get dataset id
     void _get_dataset(std::vector<hid_t> &ids, const std::string dset_name);
-    /// get attribute id from tokenized string
+    /// get dataset id from tokenized string
     void _get_dataset(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
 
+    /// get dataset id
+    void _get_hdf5_id(std::vector<hid_t> &ids, const std::string name);
+    /// get attribute id from tokenized string
+    void _get_hdf5_id(std::vector<hid_t> &ids, const std::vector<std::string> &parts);
+
 public:
 
     // Constructor
@@ -116,7 +123,11 @@ class H5OutputFile
 
     /// create a group
     hid_t create_group(std::string groupname) {
-        hid_t group_id = H5Gcreate(file_id, groupname.c_str(),
+        hid_t group_id;
+        if (H5Lexists(group_id, groupname.c_str(), H5P_DEFAULT)) {
+            throw std::invalid_argument("Group "+groupname+"already present, not creating group");
+        }
+        group_id = H5Gcreate(file_id, groupname.c_str(),
             H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         return group_id;
     }
@@ -180,14 +191,14 @@ class H5OutputFile
     }
 
     template <typename T> hid_t create_dataset(std::string path, T&data,
-      std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+      std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
       bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
       return create_dataset(path, hdf5_type(T{}), dims, chunkDims,
       flag_parallel, flag_hyperslab, flag_collective);
     }
     hid_t create_dataset(std::string path, hid_t datatype,
-      std::vector<hsize_t> dims, const std::vector<hsize_t>& chunkDims,
+      std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
       bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
     /// close data set
     herr_t close_dataset(hid_t dset_id) {
@@ -195,6 +206,14 @@ class H5OutputFile
         return status;
     }
 
+    /// find and hdf5 object
+    void get_hdf5_id(std::vector<hid_t> &ids, const std::string &name);
+    hid_t get_hdf5_id(std::string path, std::string name, bool closeids = true);
+    hid_t get_hdf5_id(std::string fullname, bool closeids = true);
+
+    /// create a link
+    ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL
+    herr_t create_link(std::string orgname, std::string linkname, bool ihard = true);
 
     //write 1D data sets of string or input data with defined data type
     void write_dataset(std::string name, hsize_t len, std::string data,
diff --git a/MipMap.cc b/MipMap.cc
index 8708c12..9d9e61a 100644
--- a/MipMap.cc
+++ b/MipMap.cc
@@ -26,7 +26,15 @@ MipMap::~MipMap() {
 //
 // }
 
-void MipMap::createDataset(H5OutputFile &H5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims) {
+void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const std::vector<hsize_t>& chunkDims) {
+
+    std::ostringstream mipMapName;
+    mipMapName << "MipMaps/DATA/DATA_XY_" << mip;
+    if (useChunks(datasetDims)) {
+        H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims);
+    } else {
+        H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims);
+    }
 }
 
 void MipMap::createBuffers(std::vector<hsize_t>& bufferDims) {
@@ -100,9 +108,9 @@ hsize_t MipMaps::size(const std::vector<hsize_t>& standardDims, const std::vecto
 //
 // }
 
-void MipMaps::createDatasets(H5OutputFile &H5outputfile, hid_t gid) {
+void MipMaps::createDatasets(H5OutputFile &H5outputfile, std::string path) {
     for (auto& mipMap : mipMaps) {
-        mipMap.createDataset(H5outputfile, gid, chunkDims);
+        mipMap.createDataset(H5outputfile, path, chunkDims);
     }
 }
 
diff --git a/MipMap.h b/MipMap.h
index b0e776f..d4835de 100644
--- a/MipMap.h
+++ b/MipMap.h
@@ -12,7 +12,7 @@ struct MipMap {
     ~MipMap();
 
 //     void createDataset(H5::Group group, const std::vector<hsize_t>& chunkDims);
-    void createDataset(H5OutputFile &h5outputfile, hid_t gid, const std::vector<hsize_t>& chunkDims);
+    void createDataset(H5OutputFile &h5outputfile, std::string path, const std::vector<hsize_t>& chunkDims);
     void createBuffers(std::vector<hsize_t>& bufferDims);
 
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
@@ -61,7 +61,7 @@ struct MipMaps {
     static hsize_t size(const std::vector<hsize_t>& standardDims, const std::vector<hsize_t>& standardBufferDims);
 
 //     void createDatasets(H5::Group group);
-    void createDatasets(H5OutputFile &H5outputfile, hid_t gid);
+    void createDatasets(H5OutputFile &H5outputfile, std::string path);
     void createBuffers(const std::vector<hsize_t>& standardBufferDims);
 
     void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) {
diff --git a/Stats.cc b/Stats.cc
index a798e3b..4774221 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -25,7 +25,7 @@ hsize_t Stats::size(std::vector<hsize_t> dims, hsize_t numBins, hsize_t partialH
 }
 
 // void Stats::createDatasets(H5::Group group, std::string name) {
-void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name) {
+void Stats::createDatasets(H5OutputFile &H5outputfile, std::string pathname, std::string name) {
 
 //     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
 //     floatType.setOrder(H5T_ORDER_LE);
@@ -43,6 +43,13 @@ void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string na
 //         createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins}));
 //     }
 
+    for (auto i = 0; i<datasetnames.size(); i++) {
+        datasetids.push_back(H5outputfile.create_dataset(pathname + "/Statistics/" + name + datasetnames[i], datasettypes[i], basicDatasetDims));
+    }
+    if (numBins) {
+        datasetids.push_back(H5outputfile.create_dataset(pathname + "/Statistics/" + name + "/HISTOGRAMS",
+        H5T_NATIVE_LLONG, extend(basicDatasetDims, {numBins})));
+    }
 }
 
 void Stats::createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier) {
diff --git a/Stats.h b/Stats.h
index 09b7528..6a33002 100644
--- a/Stats.h
+++ b/Stats.h
@@ -53,7 +53,7 @@ struct Stats {
 
     // Setup
     //void createDatasets(H5::Group group, std::string name);
-    void createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name);
+    void createDatasets(H5OutputFile &H5outputfile, std::string path, std::string name);
     void createBuffers(std::vector<hsize_t> dims, hsize_t partialHistMultiplier = 0);
 
     // Basic stats
@@ -117,13 +117,9 @@ struct Stats {
     hsize_t numBins;
 
     // Datasets
-//     H5::DataSet minDset;
-//     H5::DataSet maxDset;
-//     H5::DataSet sumDset;
-//     H5::DataSet ssqDset;
-//     H5::DataSet nanDset;
-//
-//     H5::DataSet histDset;
+    std::vector<std::string> datasetnames = {"/MIN", "/MAX", "/SUM", "/SUM_SQ", "/NAN_COUNT"};
+    std::vector<hid_t> datasettypes = {H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_LLONG};
+    std::vector<hid_t> datasetids;
 
     hid_t minDset;
     hid_t maxDset;

From de48d807f6b264f9d9df35de78957ebd97976238 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Mon, 3 Aug 2020 15:17:17 +0800
Subject: [PATCH 05/13] Update to HDF5 Wrapper API

Update to HDF5 wrapper, some code clean up and as well, added CFITSIO cmake check
---
 CMakeLists.txt          |   3 +-
 Converter.cc            |  17 +-
 Converter.h             |   2 +
 FastConverter.cc        |   4 +
 HDF5Wrapper.cc          | 338 +++++++++++++++++++++++++++-------------
 HDF5Wrapper.h           | 299 ++++++++++++++++++++++++++---------
 MipMap.cc               |   5 +-
 README.md               |   2 +-
 SlowConverter.cc        |   3 +
 Stats.cc                |  15 ++
 cmake/FindCFITSIO.cmake |  74 +++++++++
 11 files changed, 572 insertions(+), 190 deletions(-)
 create mode 100644 cmake/FindCFITSIO.cmake

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b8764ca..9179d76 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.13)
 project(hdf_convert)
 
 set(default_build_type "Release")
-
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
 set(CMAKE_CXX_STANDARD 11)
 
 # macro(find_hdf5)
@@ -27,6 +27,7 @@ set(CMAKE_CXX_STANDARD 11)
 #     endif()
 # endmacro()
 
+FIND_PACKAGE(CFITSIO 3.000 REQUIRED)
 FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C)
 INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR})
 find_package(OpenMP)
diff --git a/Converter.cc b/Converter.cc
index 66767ae..7aaa0be 100644
--- a/Converter.cc
+++ b/Converter.cc
@@ -74,7 +74,7 @@ void Converter::convert() {
 //     outputGroup = outputFile.createGroup("0");
 
     H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC);
-    std::string parentpath("0/");
+    std::string parentpath("0");
     hid_t gid = H5outputfile.create_group(parentpath);
 
     std::vector<hsize_t> chunkDims;
@@ -85,9 +85,10 @@ void Converter::convert() {
 //     H5::FloatType floatType(H5::PredType::NATIVE_FLOAT);
 //     floatType.setOrder(H5T_ORDER_LE);
 //     createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims);
-    H5outputfile.create_dataset(parentpath, H5T_NATIVE_FLOAT,
-        standardDims, chunkDims);
 //     statsXY.createDatasets(outputGroup, "XY");
+    standardDataSet = parentpath + std::string("/DATA");
+    standardDataSetID = H5outputfile.create_dataset(standardDataSet, H5T_NATIVE_FLOAT,
+        standardDims, chunkDims);
     statsXY.createDatasets(H5outputfile, parentpath, "XY");
 
     if (depth > 1) {
@@ -99,13 +100,17 @@ void Converter::convert() {
 //         createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims);
         statsXYZ.createDatasets(H5outputfile, parentpath, "XYZ");
         statsZ.createDatasets(H5outputfile, parentpath, "Z");
-        auto swizzledGroup = H5outputfile.create_group("0/SwizzledData");
+        std::string swizzledpath = parentpath + "/SwizzledData";
+        std::string linkpath = parentpath + "/PermutedData";
+        auto swizzledGroup = H5outputfile.create_group(swizzledpath);
         // We use this name in papers because it sounds more serious. :)
-        H5outputfile.create_link("0/SwizzledData", "0/PermutedData");
-        // ??? H5outputfile.create_dataset();
+        H5outputfile.create_link(swizzledpath, linkpath);
+        swizzledDataSet = swizzledpath + "/" + swizzledName;
+        swizzledDataSetID = H5outputfile.create_dataset(swizzledDataSet, H5T_NATIVE_FLOAT, swizzledDims);
     }
 
     /// mipMaps.createDatasets(outputGroup);
+    mipMaps.createDatasets(H5outputfile, parentpath);
 
     // COPY HEADERS
 
diff --git a/Converter.h b/Converter.h
index d09dafe..492d9c5 100644
--- a/Converter.h
+++ b/Converter.h
@@ -35,6 +35,8 @@ class Converter {
 //     H5::Group outputGroup;
 //     H5::DataSet standardDataSet;
 //     H5::DataSet swizzledDataSet;
+    std::string standardDataSet, swizzledDataSet;
+    hid_t standardDataSetID, swizzledDataSetID;
 
 
     float* standardCube;
diff --git a/FastConverter.cc b/FastConverter.cc
index f4625c7..7c41454 100644
--- a/FastConverter.cc
+++ b/FastConverter.cc
@@ -255,6 +255,8 @@ void FastConverter::copyAndCalculate() {
         std::vector<hsize_t> count = trimAxes({1, depth, height, width}, N);
         std::vector<hsize_t> start = trimAxes({currentStokes, 0, 0, 0}, N);
 //         writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
+        H5outputfile.write_dataset_nd(standardDataSet,
+            memDims, standardCube, count, start);
 
         if (depth > 1) {
             // This all technically worked if we reused the standard filespace and memspace
@@ -262,6 +264,8 @@ void FastConverter::copyAndCalculate() {
             std::vector<hsize_t> swizzledCount = trimAxes({1, width, height, depth}, N);
             std::vector<hsize_t> swizzledMemDims = {width, height, depth};
 //             writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start);
+            H5outputfile.write_dataset_nd(swizzledDataSet,
+                swizzledMemDims, rotatedCube, swizzledCount, start);
         }
 
         // After writing and before mipmaps, we free the swizzled memory. We allocate it again next Stokes.
diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index eac39d4..ca1b3ae 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -106,6 +106,83 @@ void H5OutputFile::close()
 #endif
 }
 
+#ifdef USEPARALLELHDF
+void H5OutputFile::_set_mpi_dim_and_offset(MPI_Comm &comm,
+    hsize_t rank, std::vector<hsize_t> &dims,
+    std::vector<unsigned long long> &dims_single,
+    std::vector<unsigned long long> &dims_offset
+    std::vector<unsigned long long> &mpi_hdf_dims,
+    std::vector<unsigned long long> &mpi_hdf_dims_tot,
+    bool flag_parallel, bool flag_first_dim_parallel
+    )
+{
+    if (!flag_parallel) return;
+    //if parallel hdf5 get the full extent of the data
+    //this bit of code communicating information can probably be done elsewhere
+    //minimize number of mpi communications
+    for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
+    MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
+    MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
+    for (auto i=0;i<rank;i++) {
+        dims_offset[i] = 0;
+        if (flag_first_dim_parallel && i > 0) continue;
+        for (auto j=1;j<=ThisWriteTask;j++) {
+            dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
+        }
+    }
+    if (flag_first_dim_parallel && rank > 1) {
+        for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
+    }
+}
+void H5OutputFile::_set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id,
+    hsize_t rank, std::vector<hsize_t> &dims,
+    std::vector<unsigned long long> &mpi_hdf_dims_tot,
+    bool flag_parallel, bool flag_hyperslab)
+{
+    if (flag_parallel) {
+        //then all threads create the same simple data space
+        //so the meta information is the same
+        if (flag_hyperslab) {
+            //allocate the space spanning the file
+            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
+            //allocate the memory space
+            //allocate the memory space
+            memspace_id = H5Screate_simple(rank, dims.data(), NULL);
+        }
+        else {
+            dspace_id = H5Screate_simple(rank, dims.data(), NULL);
+            memspace_id = dspace_id;
+        }
+    }
+    else {
+        dspace_id = H5Screate_simple(rank, dims.data(), NULL);
+        memspace_id = dspace_id;
+    }
+}
+
+void H5OutputFile::_set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite,
+    hid_t &dspace_id, hid_t &memspace_id,
+    hsize_t rank, std::vector<hsize_t> dims, std::vector<hsize_t> dims_offset,
+    bool flag_parallel, bool flag_collective, bool flag_hyperslab)
+{
+    if (!flag_parallel) return;
+    // set up the collective transfer properties list
+    prop_id = H5Pcreate(H5P_DATASET_XFER);
+    //if all tasks are participating in the writes
+    if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
+    else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
+    if (flag_hyperslab) {
+        H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
+        if (dims[0] == 0) {
+            H5Sselect_none(dspace_id);
+            H5Sselect_none(memspace_id);
+        }
+    }
+    iwrite = (mpi_hdf_dims_tot[0] > 0);
+}
+
+#endif
+
 std::vector<std::string> H5OutputFile::_tokenize(const std::string &s)
 {
     std::string delims("/");
@@ -363,7 +440,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &
 }
 
 /// create a dataset
-hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
+hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id,
   std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims,
   bool flag_parallel, bool flag_hyperslab, bool flag_collective)
 {
@@ -371,7 +448,7 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
     MPI_Comm comm = mpi_comm_write;
     MPI_Info info = MPI_INFO_NULL;
 #endif
-    auto splitPath = _tokenize(path);
+    auto splitPath = _tokenize(fullname);
     auto dsetname = splitPath.back();
     splitPath.pop_back();
     hid_t curr_id = file_id;
@@ -393,26 +470,8 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
     }
 
 #ifdef USEPARALLELHDF
-    /// will really have to update for MPI
     std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
-    if (flag_parallel) {
-        //if parallel hdf5 get the full extent of the data
-        //this bit of code communicating information can probably be done elsewhere
-        //minimize number of mpi communications
-        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
-        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
-        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
-        for (auto i=0;i<rank;i++) {
-            dims_offset[i] = 0;
-            if (flag_first_dim_parallel && i > 0) continue;
-            for (auto j=1;j<=ThisWriteTask;j++) {
-                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
-            }
-        }
-        if (flag_first_dim_parallel && rank > 1) {
-            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
-        }
-    }
+    if (flag_parallel) _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_first_dim_parallel);
 #endif
 
     // Determine if going to compress data in chunks
@@ -448,25 +507,7 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id,
 
     // Create the dataspace
 #ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        //then all threads create the same simple data space
-        //so the meta information is the same
-        if (flag_hyperslab) {
-            //allocate the space spanning the file
-            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
-            //allocate the memory space
-            //allocate the memory space
-            memspace_id = H5Screate_simple(rank, dims.data(), NULL);
-        }
-        else {
-            dspace_id = H5Screate_simple(rank, dims.data(), NULL);
-            memspace_id = dspace_id;
-        }
-    }
-    else {
-        dspace_id = H5Screate_simple(rank, dims.data(), NULL);
-        memspace_id = dspace_id;
-    }
+    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #else
     dspace_id = H5Screate_simple(rank, dims.data(), NULL);
     memspace_id = dspace_id;
@@ -504,7 +545,8 @@ herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool
 }
 
 
-void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective)
+void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data,
+    bool flag_parallel, bool flag_collective)
 {
 #ifdef USEPARALLELHDF
     MPI_Comm comm = mpi_comm_write;
@@ -551,14 +593,16 @@ void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data
 }
 
 void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data,
-    hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
 {
     int rank = 1;
     hsize_t dims[1] = {len};
     if (memtype_id == -1) {
         throw std::runtime_error("Write data set called with void pointer but no type info passed.");
     }
-    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
+    write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id,
+        flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
 }
 
 void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
@@ -581,27 +625,7 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
 
 #ifdef USEPARALLELHDF
     std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
-    //if parallel hdf5 get the full extent of the data
-    //this bit of code communicating information can probably be done elsewhere
-    //minimize number of mpi communications
-    if (flag_parallel) {
-        //if parallel hdf5 get the full extent of the data
-        //this bit of code communicating information can probably be done elsewhere
-        //minimize number of mpi communications
-        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
-        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
-        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
-        for (auto i=0;i<rank;i++) {
-            dims_offset[i] = 0;
-            if (flag_first_dim_parallel && i > 0) continue;
-            for (auto j=1;j<=ThisWriteTask;j++) {
-                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
-            }
-        }
-        if (flag_first_dim_parallel && rank > 1) {
-            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
-        }
-    }
+    _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
 #endif
 
     // Determine if going to compress data in chunks
@@ -651,24 +675,7 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
 
     // Create the dataspace
 #ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        //then all threads create the same simple data space
-        //so the meta information is the same
-        if (flag_hyperslab) {
-            //allocate the space spanning the file
-            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
-            //allocate the memory space
-            memspace_id = H5Screate_simple(rank, dims, NULL);
-        }
-        else {
-            dspace_id = H5Screate_simple(rank, dims, NULL);
-            memspace_id = dspace_id;
-        }
-    }
-    else {
-        dspace_id = H5Screate_simple(rank, dims, NULL);
-        memspace_id = dspace_id;
-    }
+    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #else
     dspace_id = H5Screate_simple(rank, dims, NULL);
     memspace_id = dspace_id;
@@ -694,47 +701,164 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
     H5Pclose(prop_id);
 
     prop_id = H5P_DEFAULT;
+    bool iwrite = (dims[0] > 0);
+#ifdef USEPARALLELHDF
+    _set_mpi_dataset_properties(prop_id, iwrite,
+        dspace_id, memspace_id,
+        rank, dims, dims_offset,
+        flag_parallel, flag_collective, flag_hyperslab);
+#endif
+    if (iwrite) {
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
 
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+    H5Pclose(prop_id);
 #ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        // set up the collective transfer properties list
-        prop_id = H5Pcreate(H5P_DATASET_XFER);
-        //if all tasks are participating in the writes
-        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
-        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
-        if (flag_hyperslab) {
-            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
-            if (dims[0] == 0) {
-                H5Sselect_none(dspace_id);
-                H5Sselect_none(memspace_id);
-            }
+    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+}
 
+/// Write data set with hyperslab set by count and start
+void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data,
+    const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective)
+{
+    int rank = 1;
+    hsize_t dims[1] = {len};
+    if (memtype_id == -1) {
+        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    }
+    write_dataset_nd(name, rank, dims, data,
+        count, start,
+        memtype_id, filetype_id,
+        flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective);
+}
+
+///\todo need to update this to general mpi hyper slab selection
+void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+    const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel,
+    bool flag_hyperslab, bool flag_collective)
+{
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+    std::vector<hsize_t> chunks(rank);
+    // Get HDF5 data type of the array in memory
+    if (memtype_id == -1) {
+        throw std::runtime_error("Write data set called with void pointer but no type info passed.");
+    }
+    // Determine type of the dataset to create
+    if(filetype_id < 0) filetype_id = memtype_id;
+
+#ifdef USEPARALLELHDF
+    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+    // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
+#endif
+
+    // Determine if going to compress data in chunks
+    // Only chunk non-zero size datasets
+    int nonzero_size = 1;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
         }
-        if (mpi_hdf_dims_tot[0] > 0) {
-            // Write the data
-            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        else {
+            if(dims[i]==0) nonzero_size = 0;
         }
+#else
+        if(dims[i]==0) nonzero_size = 0;
+#endif
     }
-    else if (dims[0] > 0)
+    // Only chunk datasets where we would have >1 chunk
+    unsigned int large_dataset = 0;
+    for(int i=0; i<rank; i++)
     {
-        // Write the data
-        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+        else {
+            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+#else
+        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+#endif
     }
-
+    if(nonzero_size && large_dataset)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        }
 #else
-    // Write the data
-    if (dims[0] > 0) {
+        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+#endif
+    }
+
+    // Create the dataspace
+// #ifdef USEPARALLELHDF
+//     _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
+// #else
+//     dspace_id = H5Screate_simple(rank, dims, NULL);
+//     memspace_id = dspace_id;
+// #endif
+
+    // Dataset creation properties
+    prop_id = H5P_DEFAULT;
+#ifdef USEHDFCOMPRESSION
+    // this defines compression
+    if(nonzero_size && large_dataset)
+    {
+        prop_id = H5Pcreate(H5P_DATASET_CREATE);
+        H5Pset_layout(prop_id, H5D_CHUNKED);
+        H5Pset_chunk(prop_id, rank, chunks.data());
+        H5Pset_deflate(prop_id, HDFDEFLATE);
+    }
+#endif
+
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+    H5Pclose(prop_id);
+
+    prop_id = H5P_DEFAULT;
+    bool iwrite = (dims[0] > 0);
+
+    //set hyperslab
+    if (!count.empty() && !start.empty()) {
+        herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL);
+    }
+
+#ifdef USEPARALLELHDF
+    // _set_mpi_dataset_properties(prop_id, iwrite,
+    //     dspace_id, memspace_id,
+    //     rank, dims, dims_offset,
+    //     flag_parallel, flag_collective, flag_hyperslab);
+#endif
+    if (iwrite) {
         ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
         if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
     }
-#endif
 
     // Clean up (note that dtype_id is NOT a new object so don't need to close it)
     H5Pclose(prop_id);
 #ifdef USEPARALLELHDF
-    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+    // if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
 #endif
     H5Sclose(dspace_id);
     H5Dclose(dset_id);
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index 4ad1f55..c21a29c 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -63,6 +63,27 @@ class H5OutputFile
         abort();
     }
 
+    /// parallel hdf5 mpi routines
+#ifdef USEPARALLELHDF
+    void _set_mpi_dim_and_offset(MPI_Comm &comm,
+        hsize_t rank, std::vector<hsize_t> &dims,
+        std::vector<unsigned long long> &dims_single,
+        std::vector<unsigned long long> &dims_offset,
+        std::vector<unsigned long long> &mpi_hdf_dims,
+        std::vector<unsigned long long> &mpi_hdf_dims_tot,
+        bool flag_parallel, bool flag_first_dim_parallel
+    );
+    void _set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id,
+        hsize_t rank, std::vector<hsize_t> &dims,
+        std::vector<unsigned long long> &mpi_hdf_dims_tot,
+        bool flag_parallel, bool flag_hyperslab);
+
+    void _set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite,
+        hid_t &dspace_id, hid_t &memspace_id,
+        hsize_t rank, std::vector<hsize_t> dims, std::vector<hsize_t> dims_offset,
+        bool flag_parallel, bool flag_collective, bool flag_hyperslab)
+#endif
+
     /// tokenize a path given an input string
     std::vector<std::string> _tokenize(const std::string &s);
 
@@ -190,16 +211,31 @@ class H5OutputFile
         return dset_id;
     }
 
-    template <typename T> hid_t create_dataset(std::string path, T&data,
-      std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
-      bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    template <typename T> hid_t create_dataset(std::string path, std::string name, T&data,
+        std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        return create_dataset(path+"/"+name, hdf5_type(T{}), dims, chunkDims,
+            flag_parallel, flag_hyperslab, flag_collective);
+    }
+    hid_t create_dataset(std::string path, std::string name, hid_t datatype,
+        std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        return create_dataset(path+"/"+name, datatype, dims, chunkDims,
+            flag_parallel, flag_hyperslab, flag_collective);
+    }
+    template <typename T> hid_t create_dataset(std::string fullname, T&data,
+        std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
-      return create_dataset(path, hdf5_type(T{}), dims, chunkDims,
-      flag_parallel, flag_hyperslab, flag_collective);
+        return create_dataset(fullname, hdf5_type(T{}), dims, chunkDims,
+            flag_parallel, flag_hyperslab, flag_collective);
     }
-    hid_t create_dataset(std::string path, hid_t datatype,
+    hid_t create_dataset(std::string fullname, hid_t datatype,
       std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
       bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
+
     /// close data set
     herr_t close_dataset(hid_t dset_id) {
         herr_t status = H5Dclose(dset_id);
@@ -215,13 +251,23 @@ class H5OutputFile
     ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL
     herr_t create_link(std::string orgname, std::string linkname, bool ihard = true);
 
-    //write 1D data sets of string or input data with defined data type
+    /// write 1D data sets of string or input data with defined data type
+    /// without hyperslab selection
     void write_dataset(std::string name, hsize_t len, std::string data,
-      bool flag_parallel = true, bool flag_collective = true);
+        bool flag_parallel = true, bool flag_collective = true);
     void write_dataset(std::string name, hsize_t len, void *data,
-       hid_t memtype_id=-1, hid_t filetype_id=-1,
-       bool flag_parallel = true, bool flag_first_dim_parallel = true,
-       bool flag_hyperslab = true, bool flag_collective = true);
+        hid_t memtype_id=-1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
+    /// with a hyperslab selection defined by count, start
+    void write_dataset(std::string name, hsize_t len, std::string data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        bool flag_parallel = true, bool flag_collective = true);
+    void write_dataset(std::string name, hsize_t len, void *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id=-1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
 
   /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
   /// the input data if not explicitly specified with the filetype_id parameter.
@@ -235,7 +281,6 @@ class H5OutputFile
      if (memtype_id == -1) memtype_id = hdf5_type(T{});
      write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
   }
-
   /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
   /// the input data if not explicitly specified with the filetype_id parameter.
   template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
@@ -258,24 +303,7 @@ class H5OutputFile
 
 #ifdef USEPARALLELHDF
     std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
-    if (flag_parallel) {
-        //if parallel hdf5 get the full extent of the data
-        //this bit of code communicating information can probably be done elsewhere
-        //minimize number of mpi communications
-        for (auto i=0;i<rank;i++) dims_single[i]=dims[i];
-        MPI_Allgather(dims_single.data(), rank, MPI_UNSIGNED_LONG_LONG, mpi_hdf_dims.data(), rank, MPI_UNSIGNED_LONG_LONG, comm);
-        MPI_Allreduce(dims_single.data(), mpi_hdf_dims_tot.data(), rank, MPI_UNSIGNED_LONG_LONG, MPI_SUM, comm);
-        for (auto i=0;i<rank;i++) {
-            dims_offset[i] = 0;
-            if (flag_first_dim_parallel && i > 0) continue;
-            for (auto j=1;j<=ThisWriteTask;j++) {
-                dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1];
-            }
-        }
-        if (flag_first_dim_parallel && rank > 1) {
-            for (auto i=1; i<rank;i++) mpi_hdf_dims_tot[i] = dims[i];
-        }
-    }
+    _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
 #endif
 
     // Determine if going to compress data in chunks
@@ -325,25 +353,7 @@ class H5OutputFile
 
     // Create the dataspace
 #ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        //then all threads create the same simple data space
-        //so the meta information is the same
-        if (flag_hyperslab) {
-            //allocate the space spanning the file
-            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
-            //allocate the memory space
-            //allocate the memory space
-            memspace_id = H5Screate_simple(rank, dims, NULL);
-        }
-        else {
-            dspace_id = H5Screate_simple(rank, dims, NULL);
-            memspace_id = dspace_id;
-        }
-    }
-    else {
-        dspace_id = H5Screate_simple(rank, dims, NULL);
-        memspace_id = dspace_id;
-    }
+    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #else
     dspace_id = H5Screate_simple(rank, dims, NULL);
     memspace_id = dspace_id;
@@ -368,41 +378,183 @@ class H5OutputFile
     H5Pclose(prop_id);
 
     prop_id = H5P_DEFAULT;
+    bool iwrite = (dims[0] > 0);
+#ifdef USEPARALLELHDF
+    _set_mpi_dataset_properties(prop_id, iwrite,
+        dspace_id, memspace_id,
+        rank, dims, dims_offset,
+        flag_parallel, flag_collective, flag_hyperslab);
+#endif
+    if (iwrite) {
+        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    }
+
+    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+    H5Pclose(prop_id);
 #ifdef USEPARALLELHDF
-    if (flag_parallel) {
-        // set up the collective transfer properties list
-        prop_id = H5Pcreate(H5P_DATASET_XFER);
-        //if all tasks are participating in the writes
-        if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE);
-        else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT);
-        if (flag_hyperslab) {
-            H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL);
-            if (dims[0] == 0) {
-                H5Sselect_none(dspace_id);
-                H5Sselect_none(memspace_id);
-            }
+    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+#endif
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+  }
+
+    //write dataset with hyperslab selection
+    void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
+
+    /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    /// template function so defined here
+    template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        int rank = 1;
+        hsize_t dims[1] = {len};
+        if (memtype_id == -1) memtype_id = hdf5_type(T{});
+        write_dataset_nd(name, rank, dims, data,
+            count, start, memtype_id, filetype_id,
+            flag_parallel, flag_hyperslab, flag_collective);
+    }
+    template <typename T> void write_dataset_nd(std::string name, std::vector<hsize_t> dims, T *data,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        write_dataset_nd(name, dims.size(), dims.data(), data,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
+    }
+
+    /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    template <typename T> void write_dataset_nd(std::string name, std::vector<hsize_t> dims, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        write_dataset_nd(name, dims.size(), dims.data(), data,
+            count, start,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
+    }
+    template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
+    {
+#ifdef USEPARALLELHDF
+    MPI_Comm comm = mpi_comm_write;
+    MPI_Info info = MPI_INFO_NULL;
+#endif
+    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+    std::vector<hsize_t> chunks(rank);
+
+    // Get HDF5 data type of the array in memory
+    if (memtype_id == -1) memtype_id = hdf5_type(T{});
+
+    // Determine type of the dataset to create
+    if(filetype_id < 0) filetype_id = memtype_id;
+
+#ifdef USEPARALLELHDF
+    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+    // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
+#endif
 
+    // Determine if going to compress data in chunks
+    // Only chunk non-zero size datasets
+    int nonzero_size = 1;
+    for(int i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
         }
-        if (mpi_hdf_dims_tot[0] > 0) {
-            // Write the data
-            ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-            if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        else {
+            if(dims[i]==0) nonzero_size = 0;
         }
+#else
+        if(dims[i]==0) nonzero_size = 0;
+#endif
     }
-    else if (dims[0] > 0)
+    // Only chunk datasets where we would have >1 chunk
+    int large_dataset = 0;
+    for(int i=0; i<rank; i++)
     {
-        // Write the data
-        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+        else {
+            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+        }
+#else
+        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
+#endif
+    }
+    if(nonzero_size && large_dataset)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        }
+#else
+        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+#endif
     }
 
+    // Create the dataspace
+#ifdef USEPARALLELHDF
+    // _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #else
-    // Write the data
-    if (dims[0] > 0) {
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
+#endif
+
+    // Dataset creation properties
+    prop_id = H5P_DEFAULT;
+#ifdef USEHDFCOMPRESSION
+    // this defines compression
+    if(nonzero_size && large_dataset)
+    {
+        prop_id = H5Pcreate(H5P_DATASET_CREATE);
+        H5Pset_layout(prop_id, H5D_CHUNKED);
+        H5Pset_chunk(prop_id, rank, chunks.data());
+        H5Pset_deflate(prop_id, HDFDEFLATE);
+    }
+#endif
+    // Create the dataset
+    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+    H5Pclose(prop_id);
+
+    prop_id = H5P_DEFAULT;
+    bool iwrite = (dims[0] > 0);
+    if (!count.empty() && !start.empty()) {
+        herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL);
+    }
+#ifdef USEPARALLELHDF
+    // _set_mpi_dataset_properties(prop_id, iwrite,
+    //     dspace_id, memspace_id,
+    //     rank, dims, dims_offset,
+    //     flag_parallel, flag_collective, flag_hyperslab);
+#endif
+    if (iwrite) {
         ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
         if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
     }
-#endif
 
     // Clean up (note that dtype_id is NOT a new object so don't need to close it)
     H5Pclose(prop_id);
@@ -411,9 +563,10 @@ class H5OutputFile
 #endif
     H5Sclose(dspace_id);
     H5Dclose(dset_id);
-  }
+    }
 
     void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         hid_t memtype_id = -1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
diff --git a/MipMap.cc b/MipMap.cc
index 9d9e61a..0ae092a 100644
--- a/MipMap.cc
+++ b/MipMap.cc
@@ -29,7 +29,7 @@ MipMap::~MipMap() {
 void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const std::vector<hsize_t>& chunkDims) {
 
     std::ostringstream mipMapName;
-    mipMapName << "MipMaps/DATA/DATA_XY_" << mip;
+    mipMapName << "/MipMaps/DATA/DATA_XY_" << mip;
     if (useChunks(datasetDims)) {
         H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims);
     } else {
@@ -61,7 +61,8 @@ void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t cha
     std::vector<hsize_t> start = trimAxes({stokesOffset, channelOffset, 0, 0}, N);
 
     // writeHdf5Data(dataset, vals, bufferDims, count, start);
-    //???
+    ///\todo need to figure out if I use the name or the id from which I get the name
+    // H5outputfile.write_dataset_nd(dataset, bufferDims, vals, count, start);
 }
 
 void MipMap::resetBuffers() {
diff --git a/README.md b/README.md
index 9017529..bf1a1c9 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ C++ implementation of FITS to IDIA-HDF5 converter, optimised using OpenMP
 
 ## Installation
 
-Dependencies: CFITSIO, HDF5, HDF5 C++ bindings
+Dependencies: CFITSIO, HDF5
 
 To install:
 
diff --git a/SlowConverter.cc b/SlowConverter.cc
index 7cd0332..7cd8626 100644
--- a/SlowConverter.cc
+++ b/SlowConverter.cc
@@ -81,6 +81,8 @@ void SlowConverter::copyAndCalculate() {
 
             std::vector<hsize_t> start = trimAxes({s, c, 0, 0}, N);
 //             writeHdf5Data(standardDataSet, standardCube, memDims, count, start);
+            H5outputfile.write_dataset_nd(standardDataSet,
+                memDims.size(), memDims.data(), standardCube, count, start);
 
             DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;);
             TIMER(timer.start(timerLabelStatsMipmaps););
@@ -346,6 +348,7 @@ void SlowConverter::copyAndCalculate() {
                     auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N);
 
 //                     writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart);
+                    H5outputfile.write_dataset_nd(swizzledDataSet, swizzledMemDims, rotatedSlice, swizzledCount, swizzledStart);
 
                     DEBUG(std::cout << " Writing Z statistics..." << std::endl;);
                     // write Z statistics
diff --git a/Stats.cc b/Stats.cc
index 4774221..7c1ef68 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -105,9 +105,24 @@ void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector<hsize_t>& b
 //     writeHdf5Data(sumDset, sums, basicBufferDims, count, start);
 //     writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start);
 //     writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start);
+
+
+    ///\todo need to get the names of the ids stored in datasetids
+    ///or do I just use the names?
+    //???
+    for (auto i = 0; i<datasetnames.size(); i++) {
+        // get_name(datasetids);
+    }
+    // H5outputfile.write_dataset_nd(minDset, basicBufferDims, minVals, count, start);
+    // H5outputfile.write_dataset_nd(maxDset, basicBufferDims, maxVals, count, start);
+    // H5outputfile.write_dataset_nd(sumDset, basicBufferDims, sums, count, start);
+    // H5outputfile.write_dataset_nd(ssqDset, basicBufferDims, sumsSq, count, start);
+    // H5outputfile.write_dataset_nd(nanDset, basicBufferDims, nanCounts, count, start);
 }
 
 // void Stats::writeHistogram(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 //     writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start);
+    //???
+    // H5outputfile.write_dataset_nd(histDset, extend(basicBufferDims, {numBins}, histograms, count, start);
 }
diff --git a/cmake/FindCFITSIO.cmake b/cmake/FindCFITSIO.cmake
new file mode 100644
index 0000000..a749701
--- /dev/null
+++ b/cmake/FindCFITSIO.cmake
@@ -0,0 +1,74 @@
+# - Try to find CFITSIO.
+# Variables used by this module:
+#  CFITSIO_ROOT_DIR     - CFITSIO root directory
+# Variables defined by this module:
+#  CFITSIO_FOUND        - system has CFITSIO
+#  CFITSIO_INCLUDE_DIR  - the CFITSIO include directory (cached)
+#  CFITSIO_INCLUDE_DIRS - the CFITSIO include directories
+#                         (identical to CFITSIO_INCLUDE_DIR)
+#  CFITSIO_LIBRARY      - the CFITSIO library (cached)
+#  CFITSIO_LIBRARIES    - the CFITSIO libraries
+#                         (identical to CFITSIO_LIBRARY)
+#  CFITSIO_VERSION_STRING the found version of CFITSIO, padded to 3 digits
+
+# Copyright (C) 2009
+# ASTRON (Netherlands Institute for Radio Astronomy)
+# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# This file is part of the LOFAR software suite.
+# The LOFAR software suite is free software: you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# The LOFAR software suite is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+#
+# $Id$
+
+if(NOT CFITSIO_FOUND)
+
+  find_path(CFITSIO_INCLUDE_DIR fitsio.h
+    HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES include include/cfitsio
+             include/libcfitsio0)
+
+  if(CFITSIO_INCLUDE_DIR)
+    FILE(READ "${CFITSIO_INCLUDE_DIR}/fitsio.h" CFITSIO_H)
+    set(CFITSIO_VERSION_REGEX ".*#define CFITSIO_VERSION[^0-9]*([0-9]+)\\.([0-9]+).*")
+    if ("${CFITSIO_H}" MATCHES ${CFITSIO_VERSION_REGEX})
+      # Pad CFITSIO minor version to three digit because 3.181 is older than 3.35
+      STRING(REGEX REPLACE ${CFITSIO_VERSION_REGEX}
+                           "\\1.\\200" CFITSIO_VERSION_STRING "${CFITSIO_H}")
+      STRING(SUBSTRING ${CFITSIO_VERSION_STRING} 0 5 CFITSIO_VERSION_STRING)
+      STRING(REGEX REPLACE "^([0-9]+)[.]([0-9]+)" "\\1" CFITSIO_VERSION_MAJOR ${CFITSIO_VERSION_STRING})
+      # CFITSIO_VERSION_MINOR will contain 80 for 3.08, 181 for 3.181 and 200 for 3.2
+      STRING(REGEX REPLACE "^([0-9]+)[.]0*([0-9]+)" "\\2" CFITSIO_VERSION_MINOR ${CFITSIO_VERSION_STRING})
+    else ()
+      set(CFITSIO_VERSION_STRING "Unknown")
+    endif()
+  endif(CFITSIO_INCLUDE_DIR)
+
+  find_library(CFITSIO_LIBRARY cfitsio
+    HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES lib)
+  find_library(M_LIBRARY m)
+  mark_as_advanced(CFITSIO_INCLUDE_DIR CFITSIO_LIBRARY M_LIBRARY)
+
+  if(CMAKE_VERSION VERSION_LESS "2.8.3")
+    find_package_handle_standard_args(CFITSIO DEFAULT_MSG
+      CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR)
+  else ()
+    include(FindPackageHandleStandardArgs)
+    find_package_handle_standard_args(CFITSIO
+      REQUIRED_VARS CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR
+      VERSION_VAR CFITSIO_VERSION_STRING)
+  endif ()
+
+  set(CFITSIO_INCLUDE_DIRS ${CFITSIO_INCLUDE_DIR})
+  set(CFITSIO_LIBRARIES ${CFITSIO_LIBRARY} ${M_LIBRARY})
+
+endif(NOT CFITSIO_FOUND)

From 2783620043781d93788b7397f9a802cc3d2d29a6 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Mon, 3 Aug 2020 16:13:43 +0800
Subject: [PATCH 06/13] Added some comments

---
 Converter.h   | 8 ++++++++
 HDF5Wrapper.h | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/Converter.h b/Converter.h
index 492d9c5..a24f6d8 100644
--- a/Converter.h
+++ b/Converter.h
@@ -64,6 +64,10 @@ class Converter {
 };
 
 
+/*!
+Converter class that makes use of OpenMP parallelisation
+when calculating statistics and histograms
+*/
 class FastConverter : public Converter {
 public:
     FastConverter(std::string inputFileName, std::string outputFileName, bool progress);
@@ -74,6 +78,10 @@ class FastConverter : public Converter {
 };
 
 
+/*!
+Converter class that has minimmal memory overhead but has no parallelisation 
+when calculating statistics and histograms
+*/
 class SlowConverter : public Converter {
 public:
     SlowConverter(std::string inputFileName, std::string outputFileName, bool progress);
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index c21a29c..e769875 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -251,6 +251,10 @@ class H5OutputFile
     ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL
     herr_t create_link(std::string orgname, std::string linkname, bool ihard = true);
 
+    ///\todo add a write_to_dataset which is similar to below but provide the
+    ///dataset id. Does require storing the data space somewhere to close
+    ///it elsewhere.
+
     /// write 1D data sets of string or input data with defined data type
     /// without hyperslab selection
     void write_dataset(std::string name, hsize_t len, std::string data,

From 3e753dc8c16e1aa7e2f788d80cbabed41e005d41 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 10:28:44 +0800
Subject: [PATCH 07/13] Update to HDF5 Wrapper

---
 HDF5Wrapper.cc | 248 +++++++++++++++--------------------
 HDF5Wrapper.h  | 350 +++++++++++++++++++++----------------------------
 2 files changed, 256 insertions(+), 342 deletions(-)

diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index ca1b3ae..c65f77c 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -183,6 +183,73 @@ void H5OutputFile::_set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite,
 
 #endif
 
+void H5OutputFile::_set_chunks(std::vector<hsize_t> &chunks,
+    int rank, hsize_t *dims,
+#ifdef USEPARALLELHDF
+    std::vector<hsize_t> &mpi_hdf_dims_tot,
+#endif
+    bool flag_parallel
+)
+{
+    // check if data set has non-zero size and if it large
+    auto nonzero_size = true;
+    auto large_dataset = true;
+    for(auto i=0; i<rank; i++)
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel)
+            if(mpi_hdf_dims_tot[i]==0) nonzero_size = false;
+            if(mpi_hdf_dims_tot[i] < HDFOUTPUTCHUNKSIZE) large_dataset = false;
+        }
+        else {
+            if(dims[i]==0) nonzero_size = false;
+            if(dims[i] < HDFOUTPUTCHUNKSIZE) large_dataset = false;
+        }
+#else
+        if(dims[i]==0) nonzero_size = false;
+        if(dims[i] < HDFOUTPUTCHUNKSIZE) large_dataset = false;
+#endif
+    }
+    if (!nonzero_size) {
+        chunks.clear();
+        return;
+    }
+
+    // now if chunks is empty and have large data set, set chunks to
+    // the suggested chunk size
+    if(nonzero_size && large_dataset && chunks.empty())
+    {
+        chunks.resize(rank, HDFOUTPUTCHUNKSIZE);
+    }
+    // Otherwise, set passed chunk size to min of extend if above
+    else if(nonzero_size && !chunks.empty())
+    {
+#ifdef USEPARALLELHDF
+        if (flag_parallel) {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], mpi_hdf_dims_tot[i]);
+        }
+        else {
+            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
+        }
+#else
+        for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
+#endif
+    }
+}
+
+hid_t H5OutputFile::_set_compression(int rank, std::vector<hsize_t> &chunks)
+{
+#ifdef USEHDFCOMPRESSION
+    if (chunks.empty()) return H5P_DEFAULT;
+    hid_t prop_id = H5Pcreate(H5P_DATASET_CREATE);
+    H5Pset_layout(prop_id, H5D_CHUNKED);
+    H5Pset_chunk(prop_id, rank, chunks.data());
+    H5Pset_deflate(prop_id, HDFDEFLATE);
+#else
+    return H5P_DEFAULT;
+#endif
+}
+
 std::vector<std::string> H5OutputFile::_tokenize(const std::string &s)
 {
     std::string delims("/");
@@ -442,6 +509,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &
 /// create a dataset
 hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id,
   std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims,
+  bool flag_closedataset,
   bool flag_parallel, bool flag_hyperslab, bool flag_collective)
 {
 #ifdef USEPARALLELHDF
@@ -455,57 +523,31 @@ hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id,
     hid_t memspace_id, dspace_id, dset_id, prop_id = H5P_DEFAULT;
     auto rank = dims.size();
     std::vector<hsize_t> chunks = chunkDims;
+    std::vector<hid_t> ids;
 
-    /// not certain what to do with the open groups here.
     for (auto& groupname : splitPath)
     {
-      if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0)
-      {
-        curr_id = create_group(groupname);
-      }
-      else
-      {
-        curr_id = open_group(groupname);
-      }
+      if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0) curr_id = create_group(groupname);
+      else curr_id = open_group(groupname);
+      ids.push_back(curr_id);
     }
 
 #ifdef USEPARALLELHDF
     std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
-    if (flag_parallel) _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_first_dim_parallel);
+    _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
 #endif
 
     // Determine if going to compress data in chunks
     // Only chunk non-zero size datasets
-    int nonzero_size = 1;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
-        }
-        else {
-            if(dims[i]==0) nonzero_size = 0;
-        }
-#else
-        if(dims[i]==0) nonzero_size = 0;
-#endif
-    }
-    // Only chunk datasets where we would have >1 chunk
-    if(nonzero_size && !chunks.empty())
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min(chunks[i], dims[i]);
-#endif
-    }
-
-    // Create the dataspace
+    _set_chunks(chunks, dims,
+    #ifdef USEPARALLELHDF
+        mpi_hdf_dims_tot,
+    #endif
+        flag_parallel
+    );
+
+    // Create the dataspace where the data space might have a hyperslab
+    // selection if using parallel hdf5
 #ifdef USEPARALLELHDF
     _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #else
@@ -514,16 +556,20 @@ hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id,
 #endif
 
 #ifdef USEHDFCOMPRESSION
-    if (!chunk.empty()) {
-      prop_id = H5Pcreate(H5P_DATASET_CREATE);
-      H5Pset_layout(prop_id, H5D_CHUNKED);
-      H5Pset_chunk(prop_id, rank, chunks.data());
-      H5Pset_deflate(prop_id, HDFDEFLATE);
-    }
+    prop_id = _set_compression(rank, chunks);
 #endif
 
     dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id,
-      H5P_DEFAULT, prop_id, H5P_DEFAULT);
+        H5P_DEFAULT, prop_id, H5P_DEFAULT);
+    H5Pclose(prop_id);
+
+    if (flag_closedataset)
+    {
+        H5Sclose(dspace_id);
+        H5Dclose(dset_id);
+        reverse(ids.begin(),ids.end());
+        close_hdf_ids(ids);
+    }
     return dset_id;
 }
 
@@ -630,48 +676,12 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
 
     // Determine if going to compress data in chunks
     // Only chunk non-zero size datasets
-    int nonzero_size = 1;
-    for(int i=0; i<rank; i++)
-    {
+    _set_chunks(chunks, rank, dims,
 #ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
-        }
-        else {
-            if(dims[i]==0) nonzero_size = 0;
-        }
-#else
-        if(dims[i]==0) nonzero_size = 0;
+        mpi_hdf_dims_tot,
 #endif
-    }
-    // Only chunk datasets where we would have >1 chunk
-    unsigned int large_dataset = 0;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-        else {
-            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-#else
-        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-#endif
-    }
-    if(nonzero_size && large_dataset)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-#endif
-    }
+        flag_parallel
+    );
 
     // Create the dataspace
 #ifdef USEPARALLELHDF
@@ -684,14 +694,7 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
     // Dataset creation properties
     prop_id = H5P_DEFAULT;
 #ifdef USEHDFCOMPRESSION
-    // this defines compression
-    if(nonzero_size && large_dataset)
-    {
-        prop_id = H5Pcreate(H5P_DATASET_CREATE);
-        H5Pset_layout(prop_id, H5D_CHUNKED);
-        H5Pset_chunk(prop_id, rank, chunks.data());
-        H5Pset_deflate(prop_id, HDFDEFLATE);
-    }
+    prop_id = _set_compression(rank, chunks);
 #endif
 
     // Create the dataset
@@ -766,48 +769,12 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
 
     // Determine if going to compress data in chunks
     // Only chunk non-zero size datasets
-    int nonzero_size = 1;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
-        }
-        else {
-            if(dims[i]==0) nonzero_size = 0;
-        }
-#else
-        if(dims[i]==0) nonzero_size = 0;
-#endif
-    }
-    // Only chunk datasets where we would have >1 chunk
-    unsigned int large_dataset = 0;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-        else {
-            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-#else
-        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-#endif
-    }
-    if(nonzero_size && large_dataset)
-    {
+    _set_chunks(chunks, rank, dims,
 #ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
+        mpi_hdf_dims_tot,
 #endif
-    }
+        flag_parallel
+    );
 
     // Create the dataspace
 // #ifdef USEPARALLELHDF
@@ -820,16 +787,8 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
     // Dataset creation properties
     prop_id = H5P_DEFAULT;
 #ifdef USEHDFCOMPRESSION
-    // this defines compression
-    if(nonzero_size && large_dataset)
-    {
-        prop_id = H5Pcreate(H5P_DATASET_CREATE);
-        H5Pset_layout(prop_id, H5D_CHUNKED);
-        H5Pset_chunk(prop_id, rank, chunks.data());
-        H5Pset_deflate(prop_id, HDFDEFLATE);
-    }
+    prop_id = _set_compression(rank, chunks);
 #endif
-
     // Create the dataset
     dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
         H5P_DEFAULT, prop_id, H5P_DEFAULT);
@@ -854,7 +813,6 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
         ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
         if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
     }
-
     // Clean up (note that dtype_id is NOT a new object so don't need to close it)
     H5Pclose(prop_id);
 #ifdef USEPARALLELHDF
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index e769875..bbe3901 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -84,6 +84,31 @@ class H5OutputFile
         bool flag_parallel, bool flag_collective, bool flag_hyperslab)
 #endif
 
+    /// set chunks size for a dataset
+    void _set_chunks(std::vector<hsize_t> &chunks,
+        int rank, hsize_t *dims,
+    #ifdef USEPARALLELHDF
+        std::vector<hsize_t> &mpi_hdf_dims_tot,
+    #endif
+        bool flag_parallel
+    );
+    void _set_chunks(std::vector<hsize_t> &chunks,
+        std::vector<hsize_t> &dims,
+#ifdef USEPARALLELHDF
+        std::vector<hsize_t> &mpi_hdf_dims_tot,
+#endif
+        bool flag_parallel
+    )
+    {
+        _set_chunks(chunks, dims.size(), dims.data(),
+#ifdef USEPARALLELHDF
+            mpi_hdf_dims_tot,
+#endif
+            flag_parallel
+        );
+    }
+    hid_t _set_compression(int rank, std::vector<hsize_t> &chunks);
+
     /// tokenize a path given an input string
     std::vector<std::string> _tokenize(const std::string &s);
 
@@ -188,9 +213,9 @@ class H5OutputFile
         hid_t dspace_id = H5Screate_simple(rank, dims, NULL);
         return dspace_id;
     }
-    /// close data set
+    /// close data space
     herr_t close_dataspace(hid_t dspace_id) {
-        herr_t status = H5Dclose(dspace_id);
+        herr_t status = H5Sclose(dspace_id);
         return status;
     }
 
@@ -210,30 +235,38 @@ class H5OutputFile
           H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
         return dset_id;
     }
-
+    /// create data set and return hid
+    /// and possibly close all groups, data spaces and data set itself after creation
     template <typename T> hid_t create_dataset(std::string path, std::string name, T&data,
         std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_closedataset = true,
         bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
         return create_dataset(path+"/"+name, hdf5_type(T{}), dims, chunkDims,
+            flag_closedataset,
             flag_parallel, flag_hyperslab, flag_collective);
     }
     hid_t create_dataset(std::string path, std::string name, hid_t datatype,
         std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_closedataset = true,
         bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
         return create_dataset(path+"/"+name, datatype, dims, chunkDims,
+            flag_closedataset,
             flag_parallel, flag_hyperslab, flag_collective);
     }
     template <typename T> hid_t create_dataset(std::string fullname, T&data,
         std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+        bool flag_closedataset = true,
         bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
         return create_dataset(fullname, hdf5_type(T{}), dims, chunkDims,
+            flag_closedataset,
             flag_parallel, flag_hyperslab, flag_collective);
     }
     hid_t create_dataset(std::string fullname, hid_t datatype,
       std::vector<hsize_t> dims, std::vector<hsize_t> chunkDims = std::vector<hsize_t>(0),
+      bool flag_closedataset = true,
       bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true);
 
     /// close data set
@@ -251,164 +284,140 @@ class H5OutputFile
     ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL
     herr_t create_link(std::string orgname, std::string linkname, bool ihard = true);
 
-    ///\todo add a write_to_dataset which is similar to below but provide the
-    ///dataset id. Does require storing the data space somewhere to close
-    ///it elsewhere.
-
-    /// write 1D data sets of string or input data with defined data type
-    /// without hyperslab selection
-    void write_dataset(std::string name, hsize_t len, std::string data,
+    /// writes to an existing data set
+    /// with a hyperslab selection defined by count, start
+    void write_to_dataset(std::string name, hsize_t len, std::string data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         bool flag_parallel = true, bool flag_collective = true);
-    void write_dataset(std::string name, hsize_t len, void *data,
+    void write_to_dataset(std::string name, hsize_t len, void *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         hid_t memtype_id=-1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
-    /// with a hyperslab selection defined by count, start
+
+    /// write 1D data sets of string or input data with defined data type
+    /// without hyperslab selection, creates dataset, writes to it and closes
     void write_dataset(std::string name, hsize_t len, std::string data,
-        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         bool flag_parallel = true, bool flag_collective = true);
     void write_dataset(std::string name, hsize_t len, void *data,
-        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         hid_t memtype_id=-1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
-
-  /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
-  /// the input data if not explicitly specified with the filetype_id parameter.
-  /// template function so defined here
-  template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
-     hid_t memtype_id = -1, hid_t filetype_id=-1,
-     bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
-  {
-     int rank = 1;
-     hsize_t dims[1] = {len};
-     if (memtype_id == -1) memtype_id = hdf5_type(T{});
-     write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
-  }
-  /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
-  /// the input data if not explicitly specified with the filetype_id parameter.
-  template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
-      hid_t memtype_id = -1, hid_t filetype_id = -1,
-      bool flag_parallel = true, bool flag_first_dim_parallel = true,
-      bool flag_hyperslab = true, bool flag_collective = true)
-  {
-#ifdef USEPARALLELHDF
-    MPI_Comm comm = mpi_comm_write;
-    MPI_Info info = MPI_INFO_NULL;
-#endif
-    hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
-    std::vector<hsize_t> chunks(rank);
-
-    // Get HDF5 data type of the array in memory
-    if (memtype_id == -1) memtype_id = hdf5_type(T{});
-
-    // Determine type of the dataset to create
-    if(filetype_id < 0) filetype_id = memtype_id;
-
-#ifdef USEPARALLELHDF
-    std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
-    _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
-#endif
-
-    // Determine if going to compress data in chunks
-    // Only chunk non-zero size datasets
-    int nonzero_size = 1;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
-        }
-        else {
-            if(dims[i]==0) nonzero_size = 0;
-        }
-#else
-        if(dims[i]==0) nonzero_size = 0;
-#endif
-    }
-    // Only chunk datasets where we would have >1 chunk
-    int large_dataset = 0;
-    for(int i=0; i<rank; i++)
+    /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    /// template function so defined here
+    template <typename T> void write_dataset(std::string name, hsize_t len, T *data,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
     {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-        else {
-            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-#else
-        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-#endif
+        int rank = 1;
+        hsize_t dims[1] = {len};
+        if (memtype_id == -1) memtype_id = hdf5_type(T{});
+        write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
     }
-    if(nonzero_size && large_dataset)
+    /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    template <typename T> void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
     {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-#endif
+        memtype_id = hdf5_type(T{});
+        write_dataset_nd(name, rank, dims, (void*)data,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
+// #ifdef USEPARALLELHDF
+//         MPI_Comm comm = mpi_comm_write;
+//         MPI_Info info = MPI_INFO_NULL;
+// #endif
+//         hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
+//         std::vector<hsize_t> chunks;
+//
+//         // Get HDF5 data type of the array in memory
+//         if (memtype_id == -1) memtype_id = hdf5_type(T{});
+//
+//         // Determine type of the dataset to create
+//         if(filetype_id < 0) filetype_id = memtype_id;
+//
+// #ifdef USEPARALLELHDF
+//         std::vector<unsigned long long> mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank);
+//         _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel);
+// #endif
+//
+//         // Determine if going to compress data in chunks
+//         // Only chunk non-zero size datasets
+//         _set_chunks(chunks, rank, dims,
+// #ifdef USEPARALLELHDF
+//             mpi_hdf_dims_tot,
+// #endif
+//             flag_parallel
+//         );
+//
+//         // Create the dataspace
+// #ifdef USEPARALLELHDF
+//         _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
+// #else
+//         dspace_id = H5Screate_simple(rank, dims, NULL);
+//         memspace_id = dspace_id;
+// #endif
+//
+//         // Dataset creation properties
+//         prop_id = H5P_DEFAULT;
+// #ifdef USEHDFCOMPRESSION
+//         prop_id = _set_compression(rank, chunks);
+// #endif
+//         // Create the dataset
+//         dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
+//             H5P_DEFAULT, prop_id, H5P_DEFAULT);
+//         if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
+//         H5Pclose(prop_id);
+//
+//         prop_id = H5P_DEFAULT;
+//         bool iwrite = (dims[0] > 0);
+// #ifdef USEPARALLELHDF
+//         _set_mpi_dataset_properties(prop_id, iwrite,
+//             dspace_id, memspace_id,
+//             rank, dims, dims_offset,
+//             flag_parallel, flag_collective, flag_hyperslab);
+// #endif
+//         if (iwrite) {
+//             ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+//             if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+//         }
+//         // Clean up (note that dtype_id is NOT a new object so don't need to close it)
+//         H5Pclose(prop_id);
+// #ifdef USEPARALLELHDF
+//         if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
+// #endif
+//         H5Sclose(dspace_id);
+//         H5Dclose(dset_id);
     }
-
-    // Create the dataspace
-#ifdef USEPARALLELHDF
-    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
-#else
-    dspace_id = H5Screate_simple(rank, dims, NULL);
-    memspace_id = dspace_id;
-#endif
-
-    // Dataset creation properties
-    prop_id = H5P_DEFAULT;
-#ifdef USEHDFCOMPRESSION
-    // this defines compression
-    if(nonzero_size && large_dataset)
+    template <typename T> void write_dataset_nd(std::string name, std::vector<hsize_t> dims, T *data,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
     {
-        prop_id = H5Pcreate(H5P_DATASET_CREATE);
-        H5Pset_layout(prop_id, H5D_CHUNKED);
-        H5Pset_chunk(prop_id, rank, chunks.data());
-        H5Pset_deflate(prop_id, HDFDEFLATE);
-    }
-#endif
-    // Create the dataset
-    dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,
-        H5P_DEFAULT, prop_id, H5P_DEFAULT);
-    if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name);
-    H5Pclose(prop_id);
-
-    prop_id = H5P_DEFAULT;
-    bool iwrite = (dims[0] > 0);
-#ifdef USEPARALLELHDF
-    _set_mpi_dataset_properties(prop_id, iwrite,
-        dspace_id, memspace_id,
-        rank, dims, dims_offset,
-        flag_parallel, flag_collective, flag_hyperslab);
-#endif
-    if (iwrite) {
-        ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
-        if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+        write_dataset_nd(name, dims.size(), dims.data(), data,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
     }
-
-    // Clean up (note that dtype_id is NOT a new object so don't need to close it)
-    H5Pclose(prop_id);
-#ifdef USEPARALLELHDF
-    if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id);
-#endif
-    H5Sclose(dspace_id);
-    H5Dclose(dset_id);
-  }
-
     //write dataset with hyperslab selection
     void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
         hid_t memtype_id = -1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
 
+    /// with a hyperslab selection defined by count, start
+    void write_dataset(std::string name, hsize_t len, std::string data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        bool flag_parallel = true, bool flag_collective = true);
+    void write_dataset(std::string name, hsize_t len, void *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id=-1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
     /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of
     /// the input data if not explicitly specified with the filetype_id parameter.
     /// template function so defined here
@@ -424,16 +433,6 @@ class H5OutputFile
             count, start, memtype_id, filetype_id,
             flag_parallel, flag_hyperslab, flag_collective);
     }
-    template <typename T> void write_dataset_nd(std::string name, std::vector<hsize_t> dims, T *data,
-        hid_t memtype_id = -1, hid_t filetype_id = -1,
-        bool flag_parallel = true, bool flag_first_dim_parallel = true,
-        bool flag_hyperslab = true, bool flag_collective = true)
-    {
-        write_dataset_nd(name, dims.size(), dims.data(), data,
-            memtype_id, filetype_id,
-            flag_parallel, flag_first_dim_parallel,
-            flag_hyperslab, flag_collective);
-    }
 
     /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
     /// the input data if not explicitly specified with the filetype_id parameter.
@@ -475,48 +474,12 @@ class H5OutputFile
 
     // Determine if going to compress data in chunks
     // Only chunk non-zero size datasets
-    int nonzero_size = 1;
-    for(int i=0; i<rank; i++)
-    {
+    _set_chunks(chunks, rank, dims,
 #ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i]==0) nonzero_size = 0;
-        }
-        else {
-            if(dims[i]==0) nonzero_size = 0;
-        }
-#else
-        if(dims[i]==0) nonzero_size = 0;
+        mpi_hdf_dims_tot,
 #endif
-    }
-    // Only chunk datasets where we would have >1 chunk
-    int large_dataset = 0;
-    for(int i=0; i<rank; i++)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-        else {
-            if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-        }
-#else
-        if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1;
-#endif
-    }
-    if(nonzero_size && large_dataset)
-    {
-#ifdef USEPARALLELHDF
-        if (flag_parallel) {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, mpi_hdf_dims_tot[i]);
-        }
-        else {
-            for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-        }
-#else
-        for(auto i=0; i<rank; i++) chunks[i] = std::min((hsize_t) HDFOUTPUTCHUNKSIZE, dims[i]);
-#endif
-    }
+        flag_parallel
+    );
 
     // Create the dataspace
 #ifdef USEPARALLELHDF
@@ -529,14 +492,7 @@ class H5OutputFile
     // Dataset creation properties
     prop_id = H5P_DEFAULT;
 #ifdef USEHDFCOMPRESSION
-    // this defines compression
-    if(nonzero_size && large_dataset)
-    {
-        prop_id = H5Pcreate(H5P_DATASET_CREATE);
-        H5Pset_layout(prop_id, H5D_CHUNKED);
-        H5Pset_chunk(prop_id, rank, chunks.data());
-        H5Pset_deflate(prop_id, HDFDEFLATE);
-    }
+    prop_id = _set_compression(rank, chunks);
 #endif
     // Create the dataset
     dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id,

From 5f5d281d8ed446622cdd7e8eda9bc44743542dc8 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 11:21:24 +0800
Subject: [PATCH 08/13] Update to hdf include

---
 common.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/common.h b/common.h
index dfc21d6..3cc0b2f 100644
--- a/common.h
+++ b/common.h
@@ -14,7 +14,8 @@
 #include <iterator>
 #include <cstring>
 
-#include <H5Cpp.h>
+// #include <H5Cpp.h>
+#include <hdf5.h>
 #include <fitsio.h>
 
 #define SCHEMA_VERSION "0.3"

From 7cbc69b7c416bfbf3214a1b21ef476fa7f4f1a18 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 11:21:54 +0800
Subject: [PATCH 09/13] Update to cmake

Added report, allowed possibility of parallel compressed hdf5
---
 CMakeLists.txt | 129 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 104 insertions(+), 25 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9179d76..8b73644 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,37 +5,72 @@ set(default_build_type "Release")
 set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
 set(CMAKE_CXX_STANDARD 11)
 
-# macro(find_hdf5)
-# 	find_package(HDF5 REQUIRED COMPONENTS C)
-# 	if (HDF5_FOUND)
-# 		include_directories(${HDF5_INCLUDE_DIRS})
-# 		if (HDF5_IS_PARALLEL HDF5_CONVERTER_ALLOWPARALLELHDF5)
-# 			set (ENV{HDF5_PREFER_PARALLEL} true)
-# 			set(HDF5_CONVERTER_HAS_PARALLEL_HDF5 Yes)
-# 			list(APPEND HDF5_CONVERTER_DEFINES USEPARALLELHDF)
-# 			if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDF5_CONVERTER_ALLOWCOMPRESSIONPARALLELHDF5)
-# 				set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes)
-# 				list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION)
-# 				list(APPEND HDF5_CONVERTER_DEFINES PARALLELCOMPRESSIONACTIVE)
-# 			endif()
-# 		else()
-# 			if (HDF5_CONVERTER_ALLOWCOMPRESSIONHDF5)
-# 				set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes)
-# 				list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION)
-# 			endif()
-# 		endif()
-#     endif()
-# endmacro()
-
-FIND_PACKAGE(CFITSIO 3.000 REQUIRED)
-FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C)
-INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR})
+#
+# Options users can give on the command line via -D
+#
+macro(hdfconvert_option optname optdesc status)
+	option(HDFCONVERT_${optname} "${optdesc}" "${status}")
+endmacro()
+
+# set default options
+hdfconvert_option(USEMPI "Use MPI" OFF)
+hdfconvert_option(USEOPENMP "Use OpenMP" ON)
+hdfconvert_option(ALLOWCOMPRESSIONHDF5 "Attempt to include HDF5 compression support " ON)
+hdfconvert_option(ALLOWPARALLELHDF5 "Attempt to include parallel HDF5 support " ON)
+hdfconvert_option(ALLOWCOMPRESSIONPARALLELHDF5 "Attempt to include parallel HDF5 compression support " OFF)
+
+# find hdf5 macro where flags set if parallel, if particular version, if compression
+macro(find_hdf5)
+	find_package(HDF5 REQUIRED COMPONENTS C)
+	if (HDF5_FOUND)
+		include_directories(${HDF5_INCLUDE_DIRS})
+		if (HDF5_IS_PARALLEL AND HDFCONVERT_ALLOWPARALLELHDF5)
+			set (ENV{HDF5_PREFER_PARALLEL} true)
+			set(HDFCONVERT_HAS_PARALLEL_HDF5 Yes)
+			list(APPEND HDFCONVERT_DEFINES USEPARALLELHDF)
+			if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDFCONVERT_ALLOWCOMPRESSIONPARALLELHDF5)
+				set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes)
+				list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION)
+				list(APPEND HDFCONVERT_DEFINES PARALLELCOMPRESSIONACTIVE)
+			endif()
+		else()
+			if (HDFCONVERT_ALLOWCOMPRESSIONHDF5)
+				set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes)
+				list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION)
+			endif()
+		endif()
+    endif()
+endmacro()
+
+#
+# How we find MPI and set it up
+#
+macro(find_mpi)
+	find_package(MPI)
+	if (MPI_FOUND)
+        include_directories(${MPI_CXX_INCLUDE_PATH})
+		list(APPEND HDF5_CONVERT_DEFINES USEMPI)
+		set(HDFCONVERT_HAS_MPI Yes)
+	endif()
+endmacro()
+
+find_package(CFITSIO 3.000 REQUIRED)
+set(HDFCONVERT_HAS_OPENMP No)
 find_package(OpenMP)
+set(HDFCONVERT_HAS_COMPRESSED_HDF5 No)
+set(HDFCONVERT_HAS_PARALLEL_HDF5 No)
+find_hdf5()
+set(HDFCONVERT_HAS_MPI No)
+if (HDFCONVERT_MPI)
+	find_mpi()
+endif()
+
 
 if (OPENMP_FOUND)
     set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
     set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
     set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+    set (HDFCONVERT_USEOPENMP)
 endif()
 
 if (Verbose)
@@ -46,6 +81,50 @@ if (Timer)
     ADD_DEFINITIONS(-D_TIMER_)
 endif()
 
+#
+# Tell the world what what we are doing
+#
+macro(hdf_convert_report feature)
+
+	# Output feature name and underscore it in the next line
+	message("\n${feature}")
+	string(REGEX REPLACE "." "-" _underscores ${feature})
+	message("${_underscores}\n")
+
+	set(_args "${ARGN}")
+	list(LENGTH _args _nargs)
+	math(EXPR _nargs "${_nargs} - 1")
+	foreach(_idx RANGE 0 ${_nargs} 2)
+
+		# Items in the list come with a message first, then the variable name
+		list(GET _args ${_idx} _msg)
+		math(EXPR _idx2 "${_idx} + 1")
+		list(GET _args ${_idx2} _varname)
+
+		# We try to keep things up to 80 cols
+		string(LENGTH ${_msg} _len)
+		math(EXPR _nspaces "75 - ${_len}")
+		string(RANDOM LENGTH ${_nspaces} _spaces)
+		string(REGEX REPLACE "." " " _spaces "${_spaces}")
+		string(CONCAT _msg "${_msg}" ${_spaces})
+		message(" ${_msg} ${HDFCONVERT_HAS_${_varname}}")
+	endforeach()
+endmacro()
+
+message("\nHDF Converter successfully configured with the following settings:")
+hdf_convert_report("HDF5 Settings"
+    "Compressed HDF5" COMPRESSED_HDF5
+    "Parallel HDF5" PARALLEL_HDF5
+)
+if (HDFCONVERT_HAS_COMPRESSED_HDF5 AND HDFCONVERT_HAS_PARALLEL_HDF5)
+	message("\n WARNING: Parallel Compression HDF5 active, use with caution as it is unstable!\n")
+endif()
+hdf_convert_report("Parallel settings"
+    "OpenMP" OPENMP
+    "MPI" MPI
+)
+
+
 set(LINK_LIBS ${LINK_LIBS} cfitsio ${HDF5_LIBRARIES})
 
 set(SOURCE_FILES

From 14a956cd8c613f585a644e8ce087348d1280daf3 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 11:40:41 +0800
Subject: [PATCH 10/13] Update for HDF5 1.12 compatibility

---
 HDF5Wrapper.cc | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index c65f77c..4fe91e7 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -291,7 +291,13 @@ void H5OutputFile::_get_attribute(std::vector<hid_t> &ids, const std::vector<std
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        unsigned int fields = H5O_INFO_ALL;
+        hid_t lapl_id = H5P_DEFAULT;
+#if H5_VERSION_GE(1,12,0)
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
+#else
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);
+#endif
         if (object_info.type == H5O_TYPE_GROUP) {
             newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
         }
@@ -334,7 +340,13 @@ void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        unsigned int fields = H5O_INFO_ALL;
+        hid_t lapl_id = H5P_DEFAULT;
+#if H5_VERSION_GE(1,12,0)
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
+#else
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);
+#endif
         if (object_info.type == H5O_TYPE_GROUP) {
             newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
         }
@@ -377,7 +389,13 @@ void H5OutputFile::_get_hdf5_id(std::vector<hid_t> &ids, const std::vector<std::
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT);
+        unsigned int fields = H5O_INFO_ALL;
+        hid_t lapl_id = H5P_DEFAULT;
+#if H5_VERSION_GE(1,12,0)
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
+#else
+        H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);
+#endif
         if (object_info.type == H5O_TYPE_GROUP) {
             newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT);
         }
@@ -425,7 +443,12 @@ void H5OutputFile::close_hdf_ids(std::vector<hid_t> &ids)
     H5O_info_t object_info;
     for (auto &id:ids)
     {
+        hid_t lapl_id = H5P_DEFAULT;
+#if H5_VERSION_GE(1,12,0)
+        H5Oget_info(id, &object_info, lapl_id);
+#else
         H5Oget_info(id, &object_info);
+#endif
         if (object_info.type == H5O_TYPE_GROUP) {
             H5Gclose(id);
         }

From 680ded823d2b61bd6c2abd10b572f967d906c077 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 12:27:33 +0800
Subject: [PATCH 11/13] Update to HDF5 Wrapper

Added functionality to write to existing data set
---
 HDF5Wrapper.cc | 80 +++++++++++++++++++++++++++++++++-----------------
 HDF5Wrapper.h  | 54 +++++++++++++++++++++++++++++++---
 2 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index 4fe91e7..bc0e3fb 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -139,24 +139,16 @@ void H5OutputFile::_set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id,
     std::vector<unsigned long long> &mpi_hdf_dims_tot,
     bool flag_parallel, bool flag_hyperslab)
 {
-    if (flag_parallel) {
-        //then all threads create the same simple data space
-        //so the meta information is the same
-        if (flag_hyperslab) {
-            //allocate the space spanning the file
-            dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
-            //allocate the memory space
-            //allocate the memory space
-            memspace_id = H5Screate_simple(rank, dims.data(), NULL);
-        }
-        else {
-            dspace_id = H5Screate_simple(rank, dims.data(), NULL);
-            memspace_id = dspace_id;
-        }
-    }
-    else {
-        dspace_id = H5Screate_simple(rank, dims.data(), NULL);
-        memspace_id = dspace_id;
+    // if parallel and doing parallel hyperslab
+    // then all threads create the same simple data space
+    // so the meta information is the same
+    if (flag_parallel && flag_hyperslab) {
+        /// close the previously allocated space
+        H5Sclose(dspace_id);
+        //allocate the space spanning the file
+        dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL);
+        //allocate the memory space
+        memspace_id = H5Screate_simple(rank, dims.data(), NULL);
     }
 }
 
@@ -571,11 +563,10 @@ hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id,
 
     // Create the dataspace where the data space might have a hyperslab
     // selection if using parallel hdf5
-#ifdef USEPARALLELHDF
-    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
-#else
     dspace_id = H5Screate_simple(rank, dims.data(), NULL);
     memspace_id = dspace_id;
+#ifdef USEPARALLELHDF
+    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #endif
 
 #ifdef USEHDFCOMPRESSION
@@ -613,6 +604,42 @@ herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool
     }
 }
 
+//write dataset with hyperslab selection
+void H5OutputFile::write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+    const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+    hid_t memtype_id, hid_t filetype_id,
+    bool flag_parallel, bool flag_first_dim_parallel,
+    bool flag_hyperslab, bool flag_collective)
+{
+    // Open the dataset
+    hid_t dspace_id, memspace_id, prop_id, dset_id;
+    herr_t ret;
+    prop_id = H5P_DEFAULT;
+    dset_id = H5Dopen(file_id, name.c_str(), H5P_DEFAULT);
+    // create simple data space
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
+    ///\todo question of what to do in the case of a parallel data space regarding hyperslab
+    ///selection
+#ifdef USEPARALLELHDF
+#endif
+    ///\todo question regarding compression since data set could be created with
+    /// some compression or chunked data. Should already be set
+
+    //set local hyperslab
+    if (!count.empty() && !start.empty()) {
+        ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL);
+        if (ret < 0) io_error(std::string("Failed to select hyperslab for writing to dataset: ")+name);
+    }
+    ///\todo question of what to do in terms of mpi transfer properties
+#ifdef USEPARALLELHDF
+#endif
+    ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data);
+    if (ret < 0) io_error(std::string("Failed to write dataset: ")+name);
+    if (memspace_id != dspace_id) H5Sclose(memspace_id);
+    H5Sclose(dspace_id);
+    H5Dclose(dset_id);
+}
 
 void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data,
     bool flag_parallel, bool flag_collective)
@@ -685,6 +712,7 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
 #endif
     hid_t dspace_id, dset_id, prop_id, memspace_id, ret;
     std::vector<hsize_t> chunks(rank);
+    prop_id = H5P_DEFAULT;
     // Get HDF5 data type of the array in memory
     if (memtype_id == -1) {
         throw std::runtime_error("Write data set called with void pointer but no type info passed.");
@@ -707,11 +735,10 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
     );
 
     // Create the dataspace
-#ifdef USEPARALLELHDF
-    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
-#else
     dspace_id = H5Screate_simple(rank, dims, NULL);
     memspace_id = dspace_id;
+#ifdef USEPARALLELHDF
+    _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
 #endif
 
     // Dataset creation properties
@@ -799,12 +826,11 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v
         flag_parallel
     );
 
+    dspace_id = H5Screate_simple(rank, dims, NULL);
+    memspace_id = dspace_id;
     // Create the dataspace
 // #ifdef USEPARALLELHDF
 //     _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab);
-// #else
-//     dspace_id = H5Screate_simple(rank, dims, NULL);
-//     memspace_id = dspace_id;
 // #endif
 
     // Dataset creation properties
diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h
index bbe3901..1856563 100644
--- a/HDF5Wrapper.h
+++ b/HDF5Wrapper.h
@@ -39,6 +39,8 @@ static inline hid_t hdf5_type_from_string(std::string dummy)
 #endif
 
 ///\name HDF class to manage writing information
+///\todo need to look into whether one can open directly with
+/// full path or must open groups explicitly. If latter, updated needed
 class H5OutputFile
 {
 
@@ -65,6 +67,7 @@ class H5OutputFile
 
     /// parallel hdf5 mpi routines
 #ifdef USEPARALLELHDF
+    /// set the offset points for the mpi write
     void _set_mpi_dim_and_offset(MPI_Comm &comm,
         hsize_t rank, std::vector<hsize_t> &dims,
         std::vector<unsigned long long> &dims_single,
@@ -73,11 +76,12 @@ class H5OutputFile
         std::vector<unsigned long long> &mpi_hdf_dims_tot,
         bool flag_parallel, bool flag_first_dim_parallel
     );
+    /// select the hyperslab
     void _set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id,
         hsize_t rank, std::vector<hsize_t> &dims,
         std::vector<unsigned long long> &mpi_hdf_dims_tot,
         bool flag_parallel, bool flag_hyperslab);
-
+    /// and set the transfer properties
     void _set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite,
         hid_t &dspace_id, hid_t &memspace_id,
         hsize_t rank, std::vector<hsize_t> dims, std::vector<hsize_t> dims_offset,
@@ -286,14 +290,56 @@ class H5OutputFile
 
     /// writes to an existing data set
     /// with a hyperslab selection defined by count, start
-    void write_to_dataset(std::string name, hsize_t len, std::string data,
-        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
-        bool flag_parallel = true, bool flag_collective = true);
     void write_to_dataset(std::string name, hsize_t len, void *data,
         const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
         hid_t memtype_id=-1, hid_t filetype_id=-1,
         bool flag_parallel = true, bool flag_first_dim_parallel = true,
         bool flag_hyperslab = true, bool flag_collective = true);
+    template <typename T> void write_to_dataset(std::string name, hsize_t len, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        int rank = 1;
+        hsize_t dims[1] = {len};
+        if (memtype_id == -1) memtype_id = hdf5_type(T{});
+        write_to_dataset_nd(name, rank, dims, data,
+            count, start,
+            memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective);
+    }
+    /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of
+    /// the input data if not explicitly specified with the filetype_id parameter.
+    template <typename T> void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        memtype_id = hdf5_type(T{});
+        write_to_dataset_nd(name, rank, dims, (void*)data,
+            count, start,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
+    }
+    template <typename T> void write_to_dataset_nd(std::string name, std::vector<hsize_t> dims, T *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id = -1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true)
+    {
+        write_to_dataset_nd(name, dims.size(), dims.data(),(void*)data,
+            count, start,
+            memtype_id, filetype_id,
+            flag_parallel, flag_first_dim_parallel,
+            flag_hyperslab, flag_collective);
+    }
+    //write dataset with hyperslab selection
+    void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data,
+        const std::vector<hsize_t>& count, const std::vector<hsize_t>& start,
+        hid_t memtype_id = -1, hid_t filetype_id=-1,
+        bool flag_parallel = true, bool flag_first_dim_parallel = true,
+        bool flag_hyperslab = true, bool flag_collective = true);
 
     /// write 1D data sets of string or input data with defined data type
     /// without hyperslab selection, creates dataset, writes to it and closes

From f50eaff18cb6909b1c0ccbadc57bb0006edc4d7b Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 13:29:34 +0800
Subject: [PATCH 12/13] Update to use new HDF5 Wrapper

---
 MipMap.cc | 13 +++++--------
 MipMap.h  |  1 +
 Stats.cc  | 23 ++++++++---------------
 Stats.h   |  1 +
 4 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/MipMap.cc b/MipMap.cc
index 0ae092a..cacb4da 100644
--- a/MipMap.cc
+++ b/MipMap.cc
@@ -30,11 +30,10 @@ void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const s
 
     std::ostringstream mipMapName;
     mipMapName << "/MipMaps/DATA/DATA_XY_" << mip;
-    if (useChunks(datasetDims)) {
-        H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims);
-    } else {
-        H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims);
-    }
+    std::vector<hsize_t> chunks = chunkDims;
+    datasetfullpath = path+mipMapName.str();
+    if (~useChunks(datasetDims)) chunks.clear();
+    H5outputfile.create_dataset(datasetfullpath, H5T_NATIVE_FLOAT, datasetDims, chunks);
 }
 
 void MipMap::createBuffers(std::vector<hsize_t>& bufferDims) {
@@ -59,10 +58,8 @@ void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t cha
     int N = datasetDims.size();
     std::vector<hsize_t> count = trimAxes({1, depth, height, width}, N);
     std::vector<hsize_t> start = trimAxes({stokesOffset, channelOffset, 0, 0}, N);
-
     // writeHdf5Data(dataset, vals, bufferDims, count, start);
-    ///\todo need to figure out if I use the name or the id from which I get the name
-    // H5outputfile.write_dataset_nd(dataset, bufferDims, vals, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullpath, bufferDims, vals, count, start);
 }
 
 void MipMap::resetBuffers() {
diff --git a/MipMap.h b/MipMap.h
index d4835de..d949070 100644
--- a/MipMap.h
+++ b/MipMap.h
@@ -39,6 +39,7 @@ struct MipMap {
 
     // H5::DataSet dataset;
     hid_t dset_id;
+    std::string datasetfullpath;
 
     std::vector<hsize_t> bufferDims;
     hsize_t bufferSize;
diff --git a/Stats.cc b/Stats.cc
index 7c1ef68..ef625c0 100644
--- a/Stats.cc
+++ b/Stats.cc
@@ -44,9 +44,11 @@ void Stats::createDatasets(H5OutputFile &H5outputfile, std::string pathname, std
 //     }
 
     for (auto i = 0; i<datasetnames.size(); i++) {
+        datasetfullnames.push_back(pathname + "/Statistics/" + name + datasetnames[i]);
         datasetids.push_back(H5outputfile.create_dataset(pathname + "/Statistics/" + name + datasetnames[i], datasettypes[i], basicDatasetDims));
     }
     if (numBins) {
+        datasetfullnames.push_back(pathname + "/Statistics/" + name + "/HISTOGRAMS");
         datasetids.push_back(H5outputfile.create_dataset(pathname + "/Statistics/" + name + "/HISTOGRAMS",
         H5T_NATIVE_LLONG, extend(basicDatasetDims, {numBins})));
     }
@@ -105,24 +107,15 @@ void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector<hsize_t>& b
 //     writeHdf5Data(sumDset, sums, basicBufferDims, count, start);
 //     writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start);
 //     writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start);
-
-
-    ///\todo need to get the names of the ids stored in datasetids
-    ///or do I just use the names?
-    //???
-    for (auto i = 0; i<datasetnames.size(); i++) {
-        // get_name(datasetids);
-    }
-    // H5outputfile.write_dataset_nd(minDset, basicBufferDims, minVals, count, start);
-    // H5outputfile.write_dataset_nd(maxDset, basicBufferDims, maxVals, count, start);
-    // H5outputfile.write_dataset_nd(sumDset, basicBufferDims, sums, count, start);
-    // H5outputfile.write_dataset_nd(ssqDset, basicBufferDims, sumsSq, count, start);
-    // H5outputfile.write_dataset_nd(nanDset, basicBufferDims, nanCounts, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[0], basicBufferDims, minVals, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[1], basicBufferDims, maxVals, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[2], basicBufferDims, sums, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[3], basicBufferDims, sumsSq, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[4], basicBufferDims, nanCounts, count, start);
 }
 
 // void Stats::writeHistogram(const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector<hsize_t>& basicBufferDims, const std::vector<hsize_t>& count, const std::vector<hsize_t>& start) {
 //     writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start);
-    //???
-    // H5outputfile.write_dataset_nd(histDset, extend(basicBufferDims, {numBins}, histograms, count, start);
+    H5outputfile.write_to_dataset_nd(datasetfullnames[5], extend(basicBufferDims, {numBins}), histograms, count, start);
 }
diff --git a/Stats.h b/Stats.h
index 6a33002..a32fc65 100644
--- a/Stats.h
+++ b/Stats.h
@@ -120,6 +120,7 @@ struct Stats {
     std::vector<std::string> datasetnames = {"/MIN", "/MAX", "/SUM", "/SUM_SQ", "/NAN_COUNT"};
     std::vector<hid_t> datasettypes = {H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_LLONG};
     std::vector<hid_t> datasetids;
+    std::vector<std::string> datasetfullnames;
 
     hid_t minDset;
     hid_t maxDset;

From fbe8f57b838e38a5d24b565011207f59faaf0b30 Mon Sep 17 00:00:00 2001
From: Pascal <pascaljelahi@gmail.com>
Date: Tue, 4 Aug 2020 15:39:13 +0800
Subject: [PATCH 13/13] Update for backward compatibility

---
 HDF5Wrapper.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc
index bc0e3fb..3dc3cca 100644
--- a/HDF5Wrapper.cc
+++ b/HDF5Wrapper.cc
@@ -283,9 +283,9 @@ void H5OutputFile::_get_attribute(std::vector<hid_t> &ids, const std::vector<std
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        unsigned int fields = H5O_INFO_ALL;
         hid_t lapl_id = H5P_DEFAULT;
 #if H5_VERSION_GE(1,12,0)
+        unsigned int fields = H5O_INFO_ALL;
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
 #else
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);
@@ -332,9 +332,9 @@ void H5OutputFile::_get_dataset(std::vector<hid_t> &ids, const std::vector<std::
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        unsigned int fields = H5O_INFO_ALL;
         hid_t lapl_id = H5P_DEFAULT;
 #if H5_VERSION_GE(1,12,0)
+        unsigned int fields = H5O_INFO_ALL;
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
 #else
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);
@@ -381,9 +381,9 @@ void H5OutputFile::_get_hdf5_id(std::vector<hid_t> &ids, const std::vector<std::
         //otherwise enter group and recursively call funciotn
         H5O_info_t object_info;
         hid_t newid;
-        unsigned int fields = H5O_INFO_ALL;
         hid_t lapl_id = H5P_DEFAULT;
 #if H5_VERSION_GE(1,12,0)
+        unsigned int fields = H5O_INFO_ALL;
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id);
 #else
         H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id);