From 2278964d88734994de48345561b30b2ab29aa1f9 Mon Sep 17 00:00:00 2001 From: Pascal Jahan Elahi Date: Thu, 30 Jul 2020 09:20:02 +0800 Subject: [PATCH 01/13] Initial update to HDF5 C API --- CMakeLists.txt | 35 ++- Converter.cc | 48 ++-- Converter.h | 11 +- FastConverter.cc | 4 +- HDF5Wrapper.cc | 599 +++++++++++++++++++++++++++++++++++++++++++++++ HDF5Wrapper.h | 128 ++++++++++ MipMap.cc | 24 +- MipMap.h | 6 +- SlowConverter.cc | 6 +- Stats.cc | 53 +++-- Stats.h | 28 ++- Util.cc | 184 +++++++-------- Util.h | 24 +- 13 files changed, 969 insertions(+), 181 deletions(-) create mode 100644 HDF5Wrapper.cc create mode 100644 HDF5Wrapper.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 409fe63..074eee4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,32 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.13) project(hdf_convert) set(default_build_type "Release") set(CMAKE_CXX_STANDARD 11) + +# macro(find_hdf5) +# find_package(HDF5 REQUIRED COMPONENTS C) +# if (HDF5_FOUND) +# include_directories(${HDF5_INCLUDE_DIRS}) +# if (HDF5_IS_PARALLEL HDF5_CONVERTER_ALLOWPARALLELHDF5) +# set (ENV{HDF5_PREFER_PARALLEL} true) +# set(HDF5_CONVERTER_HAS_PARALLEL_HDF5 Yes) +# list(APPEND HDF5_CONVERTER_DEFINES USEPARALLELHDF) +# if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDF5_CONVERTER_ALLOWCOMPRESSIONPARALLELHDF5) +# set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes) +# list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION) +# list(APPEND HDF5_CONVERTER_DEFINES PARALLELCOMPRESSIONACTIVE) +# endif() +# else() +# if (HDF5_CONVERTER_ALLOWCOMPRESSIONHDF5) +# set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes) +# list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION) +# endif() +# endif() +# endif() +# endmacro() + FIND_PACKAGE(HDF5 REQUIRED COMPONENTS CXX) INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR}) find_package(OpenMP) @@ -26,13 +49,15 @@ set(LINK_LIBS ${LINK_LIBS} cfitsio ${HDF5_LIBRARIES}) set(SOURCE_FILES ${SOURCE_FILES} - main.cc - Stats.cc - MipMap.cc + HDF5Wrapper.cc Converter.cc FastConverter.cc SlowConverter.cc - Util.cc) + Stats.cc + MipMap.cc + Util.cc + main.cc + ) add_executable(hdf_convert ${SOURCE_FILES}) target_link_libraries(hdf_convert ${LINK_LIBS}) diff --git a/Converter.cc b/Converter.cc index 9fca065..587c0ec 100644 --- a/Converter.cc +++ b/Converter.cc @@ -43,7 +43,7 @@ Converter::Converter(std::string inputFileName, std::string outputFileName, bool Converter::~Converter() { // TODO this is probably unnecessary; the file object destructor should close the file properly. - outputFile.close(); + HDF5outputFile.close(); } std::unique_ptr Converter::getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress) { @@ -67,28 +67,34 @@ void Converter::convert() { // TODO dataset variables should be local and passed into the copy function? - outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC); - outputGroup = outputFile.createGroup("0"); +// outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC); +// outputGroup = outputFile.createGroup("0"); + + H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); + hid_t gid = H5outputfile.create_group("0"); std::vector chunkDims; if (useChunks(standardDims)) { chunkDims = tileDims; } - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); +//???? + + - statsXY.createDatasets(outputGroup, "XY"); +// statsXY.createDatasets(outputGroup, "XY"); if (depth > 1) { - statsXYZ.createDatasets(outputGroup, "XYZ"); - statsZ.createDatasets(outputGroup, "Z"); +// statsXYZ.createDatasets(outputGroup, "XYZ"); +// statsZ.createDatasets(outputGroup, "Z"); - auto swizzledGroup = outputGroup.createGroup("SwizzledData"); +// auto swizzledGroup = outputGroup.createGroup("SwizzledData"); // We use this name in papers because it sounds more serious. :) - outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); - createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); +// outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); +// createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); } mipMaps.createDatasets(outputGroup); @@ -97,9 +103,9 @@ void Converter::convert() { TIMER(timer.start("Headers");); - writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); - writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); - writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); +// writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); +// writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); +// writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); int numAttributes; readFitsHeader(inputFilePtr, numAttributes); @@ -123,16 +129,16 @@ void Converter::convert() { // STRING std::string attributeValueStr; readFitsStringAttribute(inputFilePtr, attributeName, attributeValueStr); - writeHdf5Attribute(outputGroup, attributeName, attributeValueStr); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueStr); } else if (attributeValue == "T" || attributeValue == "F") { // BOOLEAN bool attributeValueBool = (attributeValue == "T"); - writeHdf5Attribute(outputGroup, attributeName, attributeValueBool); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueBool); } else if (attributeValue.find('.') != std::string::npos) { // TRY TO PARSE AS DOUBLE try { double attributeValueDouble = std::stod(attributeValue); - writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as a float." << std::endl; parsingFailure = true; @@ -140,7 +146,7 @@ void Converter::convert() { // Special handling for subnormal numbers long double attributeValueLongDouble = std::stold(attributeValue); double attributeValueDouble = (double) attributeValueLongDouble; - writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); std::ostringstream ostream; ostream.precision(13); @@ -158,7 +164,7 @@ void Converter::convert() { // TRY TO PARSE AS INTEGER try { int64_t attributeValueInt = std::stoi(attributeValue); - writeHdf5Attribute(outputGroup, attributeName, attributeValueInt); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueInt); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as an integer." << std::endl; parsingFailure = true; @@ -167,7 +173,7 @@ void Converter::convert() { if (parsingFailure) { // FALL BACK TO STRING - writeHdf5Attribute(outputGroup, attributeName, attributeValue); +// writeHdf5Attribute(outputGroup, attributeName, attributeValue); } } } diff --git a/Converter.h b/Converter.h index 223d22e..e0f36e9 100644 --- a/Converter.h +++ b/Converter.h @@ -6,6 +6,7 @@ #include "MipMap.h" #include "Timer.h" #include "Util.h" +#include "HDF5Wrapper.h" class Converter { public: @@ -28,10 +29,12 @@ class Converter { fitsfile* inputFilePtr; // Main HDF5 objects - H5::H5File outputFile; - H5::Group outputGroup; - H5::DataSet standardDataSet; - H5::DataSet swizzledDataSet; +// H5::H5File outputFile; +// H5::Group outputGroup; +// H5::DataSet standardDataSet; +// H5::DataSet swizzledDataSet; + + H5OutputFile H5outputfile; float* standardCube; float* rotatedCube; diff --git a/FastConverter.cc b/FastConverter.cc index b32f1ed..ecf4af7 100644 --- a/FastConverter.cc +++ b/FastConverter.cc @@ -253,14 +253,14 @@ void FastConverter::copyAndCalculate() { std::vector memDims = {depth, height, width}; std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({currentStokes, 0, 0, 0}, N); - writeHdf5Data(standardDataSet, standardCube, memDims, count, start); +// writeHdf5Data(standardDataSet, standardCube, memDims, count, start); if (depth > 1) { // This all technically worked if we reused the standard filespace and memspace // But it's probably not a good idea to rely on two incorrect values cancelling each other out std::vector swizzledCount = trimAxes({1, width, height, depth}, N); std::vector swizzledMemDims = {width, height, depth}; - writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start); +// writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start); } // After writing and before mipmaps, we free the swizzled memory. We allocate it again next Stokes. diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc new file mode 100644 index 0000000..2e5bc7a --- /dev/null +++ b/HDF5Wrapper.cc @@ -0,0 +1,599 @@ +#include "HDF5Wrapper.h" + +void H5OutputFile::create(std::string filename, hid_t flag, + int taskID, bool iparallelopen) +{ + if(file_id >= 0) io_error("Attempted to create file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, parallel_access_id); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if(file_id < 0)io_error(std::string("Failed to create output file: ")+filename); +#endif + +} + +void H5OutputFile::append(std::string filename, hid_t flag, + int taskID, bool iparallelopen) +{ + if(file_id >= 0)io_error("Attempted to open and append to file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fopen(filename.c_str(), flag, parallel_access_id); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fopen(filename.c_str(),flag, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fopen(filename.c_str(), flag, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); +#endif +} + +// Close the file +void H5OutputFile::close() +{ +#ifdef USEPARALLELHDF + if(file_id < 0 && parallel_access_id == -1) io_error("Attempted to close file which is not open!"); + if (parallel_access_id == -1) H5Fclose(file_id); +#else + if(file_id < 0) io_error("Attempted to close file which is not open!"); + H5Fclose(file_id); +#endif + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif +} + +/// Write a new 1D dataset. Data type of the new dataset is taken to be the type of +/// the input data if not explicitly specified with the filetype_id parameter. +template void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); +} + +void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + int rank = 1; + hsize_t dims[1] = {len}; + + hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist; + herr_t status, ret; + memtype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (memtype_id, data.size()); + filetype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (filetype_id, data.size()); + + // Create the dataspace + dspace_id = H5Screate_simple(rank, dims, NULL); + + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); +#ifdef USEPARALLELHDF + if (flag_parallel) { + // set up the collective transfer properties list + xfer_plist = H5Pcreate(H5P_DATASET_XFER); + if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); + if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + // the result of above should be that all processors write to the same + // point of the hdf file. + } +#endif + // Write the data + if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) + io_error(std::string("Failed to write dataset: ")+name); + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) +#ifdef USEPARALLELHDF + if (flag_parallel) H5Pclose(xfer_plist); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} +void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, + hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); +} + + +/// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of +/// the input data if not explicitly specified with the filetype_id parameter. +template void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + + // Get HDF5 data type of the array in memory + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + if (flag_parallel) { + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i1 chunk + int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + } + else if (dims[0] > 0) + { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + +#else + // Write the data + if (dims[0] > 0) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } +#endif + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} +void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + // Get HDF5 data type of the array in memory + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + if (flag_parallel) { + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i1 chunk + unsigned int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + } + else if (dims[0] > 0) + { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + +#else + // Write the data + if (dims[0] > 0) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } +#endif + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} + +/// write an attribute +template void H5OutputFile::write_attribute(std::string parent, std::string name, std::vector data) +{ + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data[0]); + hsize_t size = data.size(); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SIMPLE); + hid_t dspace_extent = H5Sset_extent_simple(dspace_id, 1, &size, NULL); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.data()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); +} + +template void H5OutputFile::write_attribute(std::string parent, std::string name, T data) +{ + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, &data) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); +} + +void H5OutputFile::write_attribute(std::string parent, std::string name, std::string data) +{ + // Get HDF5 data type of the value to write + hid_t dtype_id = H5Tcopy(H5T_C_S1); + if (data.size() == 0) data=" "; + H5Tset_size(dtype_id, data.size()); + H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); +} diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h new file mode 100644 index 0000000..1cd5d3f --- /dev/null +++ b/HDF5Wrapper.h @@ -0,0 +1,128 @@ +#ifndef __HDF5WRAPPER_H +#define __HDF5WRAPPER_H + +#include "common.h" + +#ifdef USEMPI +#include +#endif + +// Overloaded function to return HDF5 type given a C type +static inline hid_t hdf5_type(float dummy) {return H5T_NATIVE_FLOAT;} +static inline hid_t hdf5_type(double dummy) {return H5T_NATIVE_DOUBLE;} +static inline hid_t hdf5_type(short dummy) {return H5T_NATIVE_SHORT;} +static inline hid_t hdf5_type(int dummy) {return H5T_NATIVE_INT;} +static inline hid_t hdf5_type(long dummy) {return H5T_NATIVE_LONG;} +static inline hid_t hdf5_type(long long dummy) {return H5T_NATIVE_LLONG;} +static inline hid_t hdf5_type(unsigned short dummy) {return H5T_NATIVE_USHORT;} +static inline hid_t hdf5_type(unsigned int dummy) {return H5T_NATIVE_UINT;} +static inline hid_t hdf5_type(unsigned long dummy) {return H5T_NATIVE_ULONG;} +static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;} +static inline hid_t hdf5_type(std::string dummy) {return H5T_C_S1;} +static inline hid_t hdf5_type_from_string(std::string dummy) +{ + if (dummy == std::string("float32")) return H5T_NATIVE_FLOAT; + else if (dummy == std::string("float64")) return H5T_NATIVE_DOUBLE; + else if (dummy == std::string("float64")) return H5T_NATIVE_INT; + else if (dummy == std::string("int64")) return H5T_NATIVE_LLONG; + else return H5T_C_S1; +} + +///\name HDF class to manage writing information +class H5OutputFile +{ + +private: + + hid_t file_id; +#ifdef USEPARALLELHDF + hid_t parallel_access_id; +#endif + +protected: + + // Called if a HDF5 call fails (might need to MPI_Abort) + void io_error(std::string message) { + std::cerr << message << std::endl; +#ifdef USEMPI + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + abort(); + } + unsigned int HDFOUTPUTCHUNKSIZE = 8192; +public: + + // Constructor + H5OutputFile() { + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif + } + + // Create a new file + void create(std::string filename, hid_t flag = H5F_ACC_TRUNC, + int taskID = -1, bool iparallelopen = true); + + void append(std::string filename, hid_t flag = H5F_ACC_RDWR, + int taskID = -1, bool iparallelopen = true); + + // Close the file + void close(); + + hid_t create_group(std::string groupname) { + hid_t group_id = H5Gcreate(file_id, groupname.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return group_id; + } + herr_t close_group(hid_t gid) { + herr_t status = H5Gclose(gid); + return status; + } + + //???? + hid_t create_dataset(std::string dsetname, hid_t file) { + hid_t dset_id H5Dcreate(file_id, datasetname, -1, -1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + hid_t group_id = H5Dcreate(file_id, groupname.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return group_id; + } + herr_t close_dataset(hid_t did) { + herr_t status = H5Gclose(gid); + return status; + } + + // Destructor closes the file if it's open + ~H5OutputFile() + { + if(file_id >= 0) close(); + } + + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, void *data, + hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// write an attribute + template void write_attribute(std::string parent, std::string name, std::vector data); + + template void write_attribute(std::string parent, std::string name, T data); + + void write_attribute(std::string parent, std::string name, std::string data); +}; + +#endif diff --git a/MipMap.cc b/MipMap.cc index fea92bc..35ffb5c 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -18,11 +18,13 @@ void MipMap::createDataset(H5::Group group, const std::vector& chunkDim std::ostringstream mipMapName; mipMapName << "MipMaps/DATA/DATA_XY_" << mip; - if (useChunks(datasetDims)) { - createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims); - } else { - createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims); - } +// if (useChunks(datasetDims)) { +// createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims); +// } else { +// createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims); +// } +//??? + } void MipMap::createBuffers(std::vector& bufferDims) { @@ -88,11 +90,17 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto return size; } -void MipMaps::createDatasets(H5::Group group) { +// void MipMaps::createDatasets(H5::Group group) { +// for (auto& mipMap : mipMaps) { +// mipMap.createDataset(group, chunkDims); +// } +// +// } + +void MipMaps::createDatasets(H5OutputFile H5outputfile , hid_t gid) { for (auto& mipMap : mipMaps) { - mipMap.createDataset(group, chunkDims); + mipMap.createDataset(H5outputfile, gid, chunkDims); } - } void MipMaps::createBuffers(const std::vector& standardBufferDims) { diff --git a/MipMap.h b/MipMap.h index ee2c7a8..5716c94 100644 --- a/MipMap.h +++ b/MipMap.h @@ -10,7 +10,8 @@ struct MipMap { MipMap(const std::vector& datasetDims, int mip); ~MipMap(); - void createDataset(H5::Group group, const std::vector& chunkDims); +// void createDataset(H5::Group group, const std::vector& chunkDims); + void createDataset(H5Outputfile h5outputfile, hid_t gid, const std::vector& chunkDims); void createBuffers(std::vector& bufferDims); void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { @@ -57,7 +58,8 @@ struct MipMaps { // We need the dataset dimensions to work out how many mipmaps we have static hsize_t size(const std::vector& standardDims, const std::vector& standardBufferDims); - void createDatasets(H5::Group group); +// void createDatasets(H5::Group group); + void createDatasets(H5OutputFile H5Outputfile, hid_t gid); void createBuffers(const std::vector& standardBufferDims); void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { diff --git a/SlowConverter.cc b/SlowConverter.cc index bbc75d9..ae09122 100644 --- a/SlowConverter.cc +++ b/SlowConverter.cc @@ -80,7 +80,7 @@ void SlowConverter::copyAndCalculate() { TIMER(timer.start("Write");); std::vector start = trimAxes({s, c, 0, 0}, N); - writeHdf5Data(standardDataSet, standardCube, memDims, count, start); +// writeHdf5Data(standardDataSet, standardCube, memDims, count, start); DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;); TIMER(timer.start(timerLabelStatsMipmaps);); @@ -293,7 +293,7 @@ void SlowConverter::copyAndCalculate() { auto standardCount = trimAxes({1, depth, ySize, xSize}, N); auto standardStart = trimAxes({s, 0, yOffset, xOffset}, N); - readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart); +// readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart); // rotate tile slice DEBUG(std::cout << " Calculating rotation..." << std::flush;); @@ -345,7 +345,7 @@ void SlowConverter::copyAndCalculate() { auto swizzledCount = trimAxes({1, xSize, ySize, depth}, N); auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N); - writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); +// writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); DEBUG(std::cout << " Writing Z statistics..." << std::endl;); // write Z statistics diff --git a/Stats.cc b/Stats.cc index 17ed207..f62d893 100644 --- a/Stats.cc +++ b/Stats.cc @@ -1,6 +1,7 @@ #include "Stats.h" -Stats::Stats(const std::vector& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) {} +Stats::Stats(const std::vector& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) { +} Stats::~Stats() { if (!fullBasicBufferDims.empty()) { @@ -21,22 +22,24 @@ hsize_t Stats::size(std::vector dims, hsize_t numBins, hsize_t partialH return (2 * sizeof(float) + 2 * sizeof(double) + sizeof(int64_t)) * statsSize + sizeof(int64_t) * (statsSize * numBins + statsSize * numBins * partialHistMultiplier); } -void Stats::createDatasets(H5::Group group, std::string name) { - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - - H5::IntType intType(H5::PredType::NATIVE_INT64); - intType.setOrder(H5T_ORDER_LE); - - createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims); - createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims); - createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims); - createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims); - createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims); +// void Stats::createDatasets(H5::Group group, std::string name) { +void Stats::createDatasets(H5OutputFile H5outputfile, hid_t gid, std::string name) { +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// +// H5::IntType intType(H5::PredType::NATIVE_INT64); +// intType.setOrder(H5T_ORDER_LE); +// +// createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims); +// createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims); +// createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims); +// createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims); +// createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims); +// +// if (numBins) { +// createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins})); +// } - if (numBins) { - createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins})); - } } void Stats::createBuffers(std::vector dims, hsize_t partialHistMultiplier) { @@ -84,14 +87,16 @@ void Stats::write(const std::vector& basicBufferDims, const std::vector } } -void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { - writeHdf5Data(minDset, minVals, basicBufferDims, count, start); - writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start); - writeHdf5Data(sumDset, sums, basicBufferDims, count, start); - writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); - writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); +// void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeBasic(H5OutputFile H5outputfile, std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +// writeHdf5Data(minDset, minVals, basicBufferDims, count, start); +// writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start); +// writeHdf5Data(sumDset, sums, basicBufferDims, count, start); +// writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); +// writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); } -void Stats::writeHistogram(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { - writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); +// void Stats::writeHistogram(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeHistogram(H5OutputFile H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +// writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); } diff --git a/Stats.h b/Stats.h index 50abda2..50dbde8 100644 --- a/Stats.h +++ b/Stats.h @@ -3,6 +3,7 @@ #include "common.h" #include "Util.h" +#include "HDF5Wrapper.h" struct StatsCounter { StatsCounter() : minVal(std::numeric_limits::max()), maxVal(-std::numeric_limits::max()), sum(0), sumSq(0), nanCount(0) { @@ -44,14 +45,15 @@ struct StatsCounter { }; struct Stats { - Stats() {} + Stats(); Stats(const std::vector& basicDatasetDims, hsize_t numBins = 0); ~Stats(); static hsize_t size(std::vector dims, hsize_t numBins = 0, hsize_t partialHistMultiplier = 0); // Setup - void createDatasets(H5::Group group, std::string name); + //void createDatasets(H5::Group group, std::string name); + void createDatasets(H5Outputfile H5outputfile, hid_t gid, std::string name); void createBuffers(std::vector dims, hsize_t partialHistMultiplier = 0); // Basic stats @@ -115,13 +117,22 @@ struct Stats { hsize_t numBins; // Datasets - H5::DataSet minDset; - H5::DataSet maxDset; - H5::DataSet sumDset; - H5::DataSet ssqDset; - H5::DataSet nanDset; +// H5::DataSet minDset; +// H5::DataSet maxDset; +// H5::DataSet sumDset; +// H5::DataSet ssqDset; +// H5::DataSet nanDset; +// +// H5::DataSet histDset; - H5::DataSet histDset; + hid_t minDset; + hid_t maxDset; + hid_t sumDset; + hid_t ssqDset; + hid_t nanDset; + + hid_t histDset; + // Buffer dimensions @@ -129,6 +140,7 @@ struct Stats { hsize_t partialHistMultiplier; // Buffers + //why not change this to vectors ??? float* minVals; float* maxVals; double* sums; diff --git a/Util.cc b/Util.cc index d5f42b5..2197ee5 100644 --- a/Util.cc +++ b/Util.cc @@ -145,95 +145,95 @@ void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize } // Only available in C++ API from 1.10.1 -bool hdf5Exists(H5::H5Location& location, const std::string& name) { - return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0; -} - -void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims) { - auto splitPath = split(path, '/'); - - auto name = splitPath.back(); - splitPath.pop_back(); - - for (auto& groupname : splitPath) { - if (!hdf5Exists(group, groupname)) { - group = group.createGroup(groupname); - } else { - group = group.openGroup(groupname); - } - } - - H5::DSetCreatPropList propList; - if (!chunkDims.empty()) { - propList.setChunk(chunkDims.size(), chunkDims.data()); - } - - auto dataSpace = H5::DataSpace(dims.size(), dims.data()); - dataset = group.createDataSet(name, dataType, dataSpace, propList); -} - -void writeHdf5Attribute(H5::Group group, std::string name, std::string value) { - H5::StrType strType(H5::PredType::C_S1, 256); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, strType, dataSpace); - attribute.write(strType, value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) { - H5::IntType intType(H5::PredType::NATIVE_INT64); - intType.setOrder(H5T_ORDER_LE); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, intType, dataSpace); - attribute.write(intType, &value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, double value) { - H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE); - doubleType.setOrder(H5T_ORDER_LE); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, doubleType, dataSpace); - attribute.write(doubleType, &value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, bool value) { - H5::IntType boolType(H5::PredType::NATIVE_HBOOL); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, boolType, dataSpace); - attribute.write(boolType, &value); -} - -void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); -} - -void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace); -} - -void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace); -} - -void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); -} +// bool hdf5Exists(H5::H5Location& location, const std::string& name) { +// return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0; +// } +// +// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims) { +// auto splitPath = split(path, '/'); +// +// auto name = splitPath.back(); +// splitPath.pop_back(); +// +// for (auto& groupname : splitPath) { +// if (!hdf5Exists(group, groupname)) { +// group = group.createGroup(groupname); +// } else { +// group = group.openGroup(groupname); +// } +// } +// +// H5::DSetCreatPropList propList; +// if (!chunkDims.empty()) { +// propList.setChunk(chunkDims.size(), chunkDims.data()); +// } +// +// auto dataSpace = H5::DataSpace(dims.size(), dims.data()); +// dataset = group.createDataSet(name, dataType, dataSpace, propList); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, std::string value) { +// H5::StrType strType(H5::PredType::C_S1, 256); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, strType, dataSpace); +// attribute.write(strType, value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) { +// H5::IntType intType(H5::PredType::NATIVE_INT64); +// intType.setOrder(H5T_ORDER_LE); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, intType, dataSpace); +// attribute.write(intType, &value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, double value) { +// H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE); +// doubleType.setOrder(H5T_ORDER_LE); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, doubleType, dataSpace); +// attribute.write(doubleType, &value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, bool value) { +// H5::IntType boolType(H5::PredType::NATIVE_HBOOL); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, boolType, dataSpace); +// attribute.write(boolType, &value); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace); +// } +// +// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); +// } diff --git a/Util.h b/Util.h index dbd119a..51f7773 100644 --- a/Util.h +++ b/Util.h @@ -29,17 +29,17 @@ void readFitsStringAttribute(fitsfile* filePtr, const std::string& name, std::st void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize_t size, float* destination); // Only available in C++ API from 1.10.1 -bool hdf5Exists(H5::H5Location& location, const std::string& name); -void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims = EMPTY_DIMS); -void writeHdf5Attribute(H5::Group group, std::string name, std::string value); -void writeHdf5Attribute(H5::Group group, std::string name, int64_t value); -void writeHdf5Attribute(H5::Group group, std::string name, double value); -void writeHdf5Attribute(H5::Group group, std::string name, bool value); - -void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); -void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); -void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - -void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// bool hdf5Exists(H5::H5Location& location, const std::string& name); +// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims = EMPTY_DIMS); +// void writeHdf5Attribute(H5::Group group, std::string name, std::string value); +// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value); +// void writeHdf5Attribute(H5::Group group, std::string name, double value); +// void writeHdf5Attribute(H5::Group group, std::string name, bool value); + +// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + +// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); #endif From 03a053b42253a34087a8ed9b00d15cdadb478e6c Mon Sep 17 00:00:00 2001 From: Pascal Date: Thu, 30 Jul 2020 14:27:37 +0800 Subject: [PATCH 02/13] Update to HDF5 Wrapper API --- .gitignore | 1 + CMakeLists.txt | 2 +- Converter.cc | 104 +++++++++++--------- Converter.h | 31 +++--- HDF5Wrapper.cc | 259 ++++++++++++++++++++++++++++++++++++++++++++++++- HDF5Wrapper.h | 128 +++++++++++++++++++----- MipMap.h | 32 +++--- Stats.cc | 21 ++-- Stats.h | 36 +++---- UI.h | 86 ++++++++++++++++ main.cc | 91 ++--------------- 11 files changed, 578 insertions(+), 213 deletions(-) create mode 100644 UI.h diff --git a/.gitignore b/.gitignore index 87140a3..e62ad2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ cmake-build-*/ .idea/ build/ +*.h.gch diff --git a/CMakeLists.txt b/CMakeLists.txt index 074eee4..b8764ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ set(CMAKE_CXX_STANDARD 11) # endif() # endmacro() -FIND_PACKAGE(HDF5 REQUIRED COMPONENTS CXX) +FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C) INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR}) find_package(OpenMP) diff --git a/Converter.cc b/Converter.cc index 587c0ec..4438c11 100644 --- a/Converter.cc +++ b/Converter.cc @@ -1,49 +1,51 @@ #include "Converter.h" -Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) : timer(), progress(progress) { +Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) : + timer(), progress(progress) +{ TIMER(timer.start("Setup");); - + openFitsFile(&inputFilePtr, inputFileName); - + long dims[4]; - + getFitsDims(inputFilePtr, N, dims); - + stokes = N == 4 ? dims[3] : 1; depth = N >= 3 ? dims[2] : 1; height = dims[1]; width = dims[0]; - + swizzledName = N == 3 ? "ZYX" : "ZYXW"; - + standardDims = trimAxes({stokes, depth, height, width}, N); tileDims = trimAxes({1, 1, TILE_SIZE, TILE_SIZE}, N); - + numBins = int(std::max(std::sqrt(width * height), 2.0)); - + // STATS OBJECTS auto statsXYDims = trimAxes({stokes, depth}, N - 2); statsXY = Stats(statsXYDims, numBins); - + if (depth > 1) { swizzledDims = trimAxes({stokes, width, height, depth}, N); statsZ = Stats(trimAxes({stokes, height, width}, N - 1)); auto statsXYZDims = trimAxes({stokes}, N - 3); statsXYZ = Stats(statsXYZDims, numBins); } - + // MIPMAPS mipMaps = MipMaps(standardDims, tileDims); - + // Prepare output file this->outputFileName = outputFileName; - tempOutputFileName = outputFileName + ".tmp"; + tempOutputFileName = outputFileName + ".tmp"; } Converter::~Converter() { // TODO this is probably unnecessary; the file object destructor should close the file properly. - HDF5outputFile.close(); + H5outputfile.close(); } std::unique_ptr Converter::getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress) { @@ -54,91 +56,100 @@ std::unique_ptr Converter::getConverter(std::string inputFileName, st } } -void Converter::copyAndCalculate() { - // implemented in subclasses -} - -void Converter::reportMemoryUsage() { - // implemented in subclasses -} +// void Converter::copyAndCalculate() { +// // implemented in subclasses +// } +// +// void Converter::reportMemoryUsage() { +// // implemented in subclasses +// } +// void Converter::convert() { // CREATE OUTPUT FILE - + // TODO dataset variables should be local and passed into the copy function? - + // outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC); // outputGroup = outputFile.createGroup("0"); H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); - hid_t gid = H5outputfile.create_group("0"); - + std::string parentpath = "0"; + hid_t gid = H5outputfile.create_group(parentpath); + std::vector chunkDims; if (useChunks(standardDims)) { chunkDims = tileDims; } - + // H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); // floatType.setOrder(H5T_ORDER_LE); // createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); //???? - - + // statsXY.createDatasets(outputGroup, "XY"); if (depth > 1) { // statsXYZ.createDatasets(outputGroup, "XYZ"); // statsZ.createDatasets(outputGroup, "Z"); - + // auto swizzledGroup = outputGroup.createGroup("SwizzledData"); // We use this name in papers because it sounds more serious. :) // outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); // createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); } - - mipMaps.createDatasets(outputGroup); - + + /// mipMaps.createDatasets(outputGroup); + // COPY HEADERS - + TIMER(timer.start("Headers");); - + // writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); // writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); // writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); + H5outputfile.write_attribute(parentpath, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); + H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); + H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); + int numAttributes; readFitsHeader(inputFilePtr, numAttributes); - + // IMPORTANT: This is 1-indexed! - for (int i = 1; i <= numAttributes; i++) { + for (int i = 1; i <= numAttributes; i++) { std::string attributeName; std::string attributeValue; readFitsAttribute(inputFilePtr, i, attributeName, attributeValue); - + if (attributeName.empty() || attributeName.find("COMMENT") == 0 || attributeName.find("HISTORY") == 0) { // TODO we should actually do something about these } else { - if (outputGroup.attrExists(attributeName)) { + // if (outputGroup.attrExists(attributeName)) { + if (H5outputfile.exists_attribute(parentpath, attributeName)) { std::cout << "Warning: Skipping duplicate attribute '" << attributeName << "'" << std::endl; } else { bool parsingFailure(false); - + if (attributeValue.length() >= 2 && attributeValue.find('\'') == 0 && attributeValue.find_last_of('\'') == attributeValue.length() - 1) { // STRING std::string attributeValueStr; readFitsStringAttribute(inputFilePtr, attributeName, attributeValueStr); // writeHdf5Attribute(outputGroup, attributeName, attributeValueStr); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueStr); } else if (attributeValue == "T" || attributeValue == "F") { // BOOLEAN bool attributeValueBool = (attributeValue == "T"); // writeHdf5Attribute(outputGroup, attributeName, attributeValueBool); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueBool); } else if (attributeValue.find('.') != std::string::npos) { // TRY TO PARSE AS DOUBLE try { double attributeValueDouble = std::stod(attributeValue); // writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as a float." << std::endl; parsingFailure = true; @@ -147,7 +158,8 @@ void Converter::convert() { long double attributeValueLongDouble = std::stold(attributeValue); double attributeValueDouble = (double) attributeValueLongDouble; // writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); - + H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble); + std::ostringstream ostream; ostream.precision(13); ostream << attributeValueDouble; @@ -155,7 +167,7 @@ void Converter::convert() { std::string round_trip(ostream.str()); transform(original.begin(), original.end(), original.begin(), ::toupper); transform(round_trip.begin(), round_trip.end(), round_trip.begin(), ::toupper); - + if (original != round_trip) { std::cout << "Warning: the value of attribute '" << attributeName << "' is not representable as a normalised double precision floating point number. Some precision has been lost.\nOriginal string representation:\n'" << original << "'\nFinal string representation:\n'" << round_trip << "'" << std::endl; } @@ -165,26 +177,28 @@ void Converter::convert() { try { int64_t attributeValueInt = std::stoi(attributeValue); // writeHdf5Attribute(outputGroup, attributeName, attributeValueInt); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueInt); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as an integer." << std::endl; parsingFailure = true; } } - + if (parsingFailure) { // FALL BACK TO STRING // writeHdf5Attribute(outputGroup, attributeName, attributeValue); + H5outputfile.write_attribute(parentpath, attributeName, attributeValue); } } } } - + // MAIN CONVERSION AND CALCULATION FUNCTION copyAndCalculate(); - + TIMER(timer.print(product(standardDims));); - + // Rename from temp file rename(tempOutputFileName.c_str(), outputFileName.c_str()); } diff --git a/Converter.h b/Converter.h index e0f36e9..a356f25 100644 --- a/Converter.h +++ b/Converter.h @@ -13,50 +13,51 @@ class Converter { Converter() {} Converter(std::string inputFileName, std::string outputFileName, bool progress); ~Converter(); - + static std::unique_ptr getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress); void convert(); virtual void reportMemoryUsage(); - + + H5OutputFile H5outputfile; + protected: virtual void copyAndCalculate(); - + Timer timer; bool progress; - + std::string tempOutputFileName; std::string outputFileName; fitsfile* inputFilePtr; - + // Main HDF5 objects // H5::H5File outputFile; // H5::Group outputGroup; // H5::DataSet standardDataSet; // H5::DataSet swizzledDataSet; - H5OutputFile H5outputfile; - + float* standardCube; float* rotatedCube; - + // Stats Stats statsXY; Stats statsZ; Stats statsXYZ; - + // MipMaps MipMaps mipMaps; - + int N; hsize_t stokes, depth, height, width; hsize_t numBins; - + // Dataset dimensions - + std::vector standardDims; std::vector swizzledDims; std::vector tileDims; - + std::string swizzledName; }; @@ -65,7 +66,7 @@ class FastConverter : public Converter { public: FastConverter(std::string inputFileName, std::string outputFileName, bool progress); void reportMemoryUsage() override; - + protected: void copyAndCalculate() override; }; @@ -75,7 +76,7 @@ class SlowConverter : public Converter { public: SlowConverter(std::string inputFileName, std::string outputFileName, bool progress); void reportMemoryUsage() override; - + protected: void copyAndCalculate() override; }; diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index 2e5bc7a..25d91ca 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -1,4 +1,4 @@ -#include "HDF5Wrapper.h" +#include "HDF5Wrapper.h" void H5OutputFile::create(std::string filename, hid_t flag, int taskID, bool iparallelopen) @@ -91,6 +91,263 @@ void H5OutputFile::close() #endif } +std::vector H5OutputFile::_tokenize(const std::string &s) +{ + std::string delims("/"); + std::string::size_type lastPos = s.find_first_not_of(delims, 0); + std::string::size_type pos = s.find_first_of(delims, lastPos); + + std::vector tokens; + while (std::string::npos != pos || std::string::npos != lastPos) { + tokens.push_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delims, pos); + pos = s.find_first_of(delims, lastPos); + } + return tokens; +} + +/// get an attribute going to list of hids +void H5OutputFile::_get_attribute(std::vector &ids, const std::string attr_name) +{ + //can use H5Aexists as it is the C interface but how to access it? + auto exists = H5Aexists(ids.back(), attr_name.c_str()); + if (exists == 0) { + throw std::invalid_argument(std::string("attribute not found ") + attr_name); + } + else if (exists < 0) { + throw std::runtime_error("Error on H5Aexists"); + } + auto attr = H5Aopen(ids.back(), attr_name.c_str(), H5P_DEFAULT); + ids.push_back(attr); +} +/// get attributes parsing list of ids and vector of strings +void H5OutputFile::_get_attribute(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_attribute(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT); + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else if (object_info.type == H5O_TYPE_DATASET) { + newid = H5Dopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_attribute(ids, subparts); + } +} + +/// get attribute in file, storing relevant ids in vector +void H5OutputFile::get_attribute(std::vector &ids, const std::string &name) +{ + std::vector parts = _tokenize(name); + ids.push_back(file_id); + _get_attribute(ids, parts); +} + +/// get an attribute going to list of hids +void H5OutputFile::_get_dataset(std::vector &ids, const std::string dset_name) +{ + auto dset_id = H5Dopen(ids.back(), dset_name.c_str(), H5P_DEFAULT); + if (dset_id < 0) { + throw std::invalid_argument(std::string("dataset not found ") + dset_name); + } + ids.push_back(dset_id); +} +/// get attributes parsing list of ids and vector of strings +void H5OutputFile::_get_dataset(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_dataset(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT); + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else if (object_info.type == H5O_TYPE_DATASET) { + throw std::runtime_error("Incorrect path to data set, encountered another data set in path"); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_dataset(ids, subparts); + } +} + +/// get attribute in file, storing relevant ids in vector +void H5OutputFile::get_dataset(std::vector &ids, const std::string &name) +{ + std::vector parts = _tokenize(name); + ids.push_back(file_id); + _get_dataset(ids, parts); +} + + +/// close open hids stored in vector +void H5OutputFile::close_hdf_ids(std::vector &ids) +{ + H5O_info_t object_info; + for (auto &id:ids) + { + H5Oget_info(id, &object_info); + if (object_info.type == H5O_TYPE_GROUP) { + H5Gclose(id); + } + else if (object_info.type == H5O_TYPE_GROUP) { + H5Dclose(id); + } + } +} + +/// read a scalar attribute +template void H5OutputFile::_do_read(const hid_t &attr, const hid_t &type, T &val) +{ + if (hdf5_type(T{}) == H5T_C_S1) + { + _do_read_string(attr, type, val); + } + else { + H5Aread(attr, type, &val); + } +} + +/// read a string attribute +void H5OutputFile::_do_read_string(const hid_t &attr, const hid_t &type, std::string &val) +{ + std::vector buf; + hid_t type_in_file = H5Aget_type(attr); + hid_t type_in_memory = H5Tcopy(type); // copy memory type because we'll need to modify it + size_t length = H5Tget_size(type_in_file); // get length of the string in the file + buf.resize(length+1); // resize buffer in memory, allowing for null terminator + H5Tset_size(type_in_memory, length+1); // tell HDF5 the length of the buffer in memory + H5Tset_strpad(type_in_memory, H5T_STR_NULLTERM); // specify that we want a null terminated string + H5Aread(attr, type_in_memory, buf.data()); + H5Tclose(type_in_memory); + H5Tclose(type_in_file); + val=std::string(buf.data()); +} + +/// read a vector attribute +template void H5OutputFile::_do_read_v(const hid_t &attr, const hid_t &type, std::vector &val) +{ + hid_t space = H5Aget_space (attr); + int npoints = H5Sget_simple_extent_npoints(space); + val.resize(npoints); + H5Aread(attr, type, val.data()); + H5Sclose(space); +} + +template const T H5OutputFile::read_attribute(const std::string &name) { + std::string attr_name; + T val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; +} + +//read vector attribute +template const std::vector H5OutputFile::read_attribute_v(const std::string &name) { + std::string attr_name; + std::vector val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read_v(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; +} + + +bool H5OutputFile::exists_attribute(const std::string &parent, const std::string &name) { + std::string attr_name = parent+std::string("/")+name; + std::vector ids; + bool exists = true; + //groups, data spaces, etc that have been opened. + try { + get_attribute(ids, attr_name); + } + catch (const std::invalid_argument& ia) + { + exists = false; + } + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //check if present + if (exists) { + H5Aclose(ids[0]); + ids.erase(ids.begin()); + } + close_hdf_ids(ids); + return exists; +} + +bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &name) { + std::string dset_name = parent+std::string("/")+name; + std::vector ids; + bool exists = true; + //groups, data spaces, etc that have been opened. + try { + get_dataset(ids, dset_name); + } + catch (const std::invalid_argument& ia) + { + exists = false; + } + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //check if present + if (exists) { + H5Dclose(ids[0]); + ids.erase(ids.begin()); + } + close_hdf_ids(ids); + return exists; +} + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. template void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data, diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index 1cd5d3f..2180a98 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -5,7 +5,7 @@ #ifdef USEMPI #include -#endif +#endif // Overloaded function to return HDF5 type given a C type static inline hid_t hdf5_type(float dummy) {return H5T_NATIVE_FLOAT;} @@ -19,15 +19,25 @@ static inline hid_t hdf5_type(unsigned int dummy) {return H5T_NATIVE_UINT; static inline hid_t hdf5_type(unsigned long dummy) {return H5T_NATIVE_ULONG;} static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;} static inline hid_t hdf5_type(std::string dummy) {return H5T_C_S1;} -static inline hid_t hdf5_type_from_string(std::string dummy) +static inline hid_t hdf5_type_from_string(std::string dummy) { if (dummy == std::string("float32")) return H5T_NATIVE_FLOAT; else if (dummy == std::string("float64")) return H5T_NATIVE_DOUBLE; - else if (dummy == std::string("float64")) return H5T_NATIVE_INT; + else if (dummy == std::string("int16")) return H5T_NATIVE_SHORT; + else if (dummy == std::string("int32")) return H5T_NATIVE_INT; else if (dummy == std::string("int64")) return H5T_NATIVE_LLONG; + else if (dummy == std::string("uint16")) return H5T_NATIVE_USHORT; + else if (dummy == std::string("uint32")) return H5T_NATIVE_UINT; + else if (dummy == std::string("uint64")) return H5T_NATIVE_ULLONG; else return H5T_C_S1; } +#if H5_VERSION_GE(1,10,1) +#endif + +#if H5_VERSION_GE(1,12,0) +#endif + ///\name HDF class to manage writing information class H5OutputFile { @@ -41,7 +51,10 @@ class H5OutputFile protected: - // Called if a HDF5 call fails (might need to MPI_Abort) + /// size of chunks when compressing + unsigned int HDFOUTPUTCHUNKSIZE = 8192; + + /// Called if a HDF5 call fails (might need to MPI_Abort) void io_error(std::string message) { std::cerr << message << std::endl; #ifdef USEMPI @@ -49,7 +62,25 @@ class H5OutputFile #endif abort(); } - unsigned int HDFOUTPUTCHUNKSIZE = 8192; + + /// tokenize a path given an input string + std::vector _tokenize(const std::string &s); + /// get attribute id + void _get_attribute(std::vector &ids, const std::string attr_name); + /// get attribute id from tokenized string + void _get_attribute(std::vector &ids, const std::vector &parts); + /// wrapper for reading scalar + template void _do_read(const hid_t &attr, const hid_t &type, T &val); + /// wrapper for reading string + void _do_read_string(const hid_t &attr, const hid_t &type, std::string &val); + /// wrapper for reading vector + template void _do_read_v(const hid_t &attr, const hid_t &type, std::vector &val); + + /// get dataset id + void _get_dataset(std::vector &ids, const std::string dset_name); + /// get attribute id from tokenized string + void _get_dataset(std::vector &ids, const std::vector &parts); + public: // Constructor @@ -60,44 +91,85 @@ class H5OutputFile #endif } - // Create a new file + // Destructor closes the file if it's open + ~H5OutputFile() + { + if(file_id >= 0) close(); + } + + /// Create a new file void create(std::string filename, hid_t flag = H5F_ACC_TRUNC, int taskID = -1, bool iparallelopen = true); - + /// Append to a file void append(std::string filename, hid_t flag = H5F_ACC_RDWR, int taskID = -1, bool iparallelopen = true); - // Close the file + /// Close the file void close(); + /// create a group hid_t create_group(std::string groupname) { hid_t group_id = H5Gcreate(file_id, groupname.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); return group_id; } + /// close group herr_t close_group(hid_t gid) { herr_t status = H5Gclose(gid); return status; } - //???? - hid_t create_dataset(std::string dsetname, hid_t file) { - hid_t dset_id H5Dcreate(file_id, datasetname, -1, -1, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - hid_t group_id = H5Dcreate(file_id, groupname.c_str(), - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - return group_id; + /// close hids stored in vector + void close_hdf_ids(std::vector &ids); + + /// create a dataspace + hid_t create_dataspace(const std::vector &dims) + { + hid_t dspace_id; + int rank = dims.size(); + dspace_id = H5Screate_simple(rank, dims.data(), NULL); + return dspace_id; } - herr_t close_dataset(hid_t did) { - herr_t status = H5Gclose(gid); + hid_t create_dataspace(int rank, hsize_t *dims) + { + hid_t dspace_id; + dspace_id = H5Screate_simple(rank, dims, NULL); + return dspace_id; + } + hid_t create_dataspace(hsize_t len) + { + int rank = 1; + hsize_t dims[1] = {len}; + hid_t dspace_id = H5Screate_simple(rank, dims, NULL); + return dspace_id; + } + /// close data set + herr_t close_dataspace(hid_t dspace_id) { + herr_t status = H5Dclose(dspace_id); return status; } - // Destructor closes the file if it's open - ~H5OutputFile() + /// create a dataset + hid_t create_dataset(std::string dsetname, hid_t type_id, hid_t dspace_id) { - if(file_id >= 0) close(); + hid_t dset_id; + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return dset_id; + } + template hid_t create_dataset(std::string dsetname, T *data, hid_t dspace_id) + { + hid_t dset_id; + hid_t type_id = hdf5_type(T{}); + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return dset_id; + } + /// close data set + herr_t close_dataset(hid_t dset_id) { + herr_t status = H5Dclose(dset_id); + return status; } - /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. template void write_dataset(std::string name, hsize_t len, T *data, @@ -117,12 +189,24 @@ class H5OutputFile bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + /// get a dataset with full path given by name + void get_dataset(std::vector &ids, const std::string &name); + /// check if dataset exits + bool exists_dataset(const std::string &parent, const std::string &name); + + /// get an attribute with full path given by name + void get_attribute(std::vector &ids, const std::string &name); + /// read scalar attribute + template const T read_attribute(const std::string &name); + /// read vector attribute + template const std::vector read_attribute_v(const std::string &name); + /// sees if attribute exits + bool exists_attribute(const std::string &parent, const std::string &name); /// write an attribute template void write_attribute(std::string parent, std::string name, std::vector data); - template void write_attribute(std::string parent, std::string name, T data); - void write_attribute(std::string parent, std::string name, std::string data); + }; #endif diff --git a/MipMap.h b/MipMap.h index 5716c94..d9195cc 100644 --- a/MipMap.h +++ b/MipMap.h @@ -9,11 +9,11 @@ struct MipMap { MipMap() {}; MipMap(const std::vector& datasetDims, int mip); ~MipMap(); - + // void createDataset(H5::Group group, const std::vector& chunkDims); - void createDataset(H5Outputfile h5outputfile, hid_t gid, const std::vector& chunkDims); + void createDataset(H5OutputFile &h5outputfile, hid_t gid, const std::vector& chunkDims); void createBuffers(std::vector& bufferDims); - + void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { hsize_t mipIndex = totalChannelOffset * width * height + (y / mip) * width + (x / mip); vals[mipIndex] += val; @@ -29,23 +29,23 @@ struct MipMap { } } } - + void write(hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); - + std::vector datasetDims; int mip; - + H5::DataSet dataset; - + std::vector bufferDims; hsize_t bufferSize; - + hsize_t width; hsize_t height; hsize_t depth; hsize_t stokes; - + double* vals; int* count; }; @@ -54,14 +54,14 @@ struct MipMap { struct MipMaps { MipMaps() {}; MipMaps(std::vector standardDims, const std::vector& chunkDims); - + // We need the dataset dimensions to work out how many mipmaps we have static hsize_t size(const std::vector& standardDims, const std::vector& standardBufferDims); - + // void createDatasets(H5::Group group); - void createDatasets(H5OutputFile H5Outputfile, hid_t gid); + void createDatasets(H5OutputFile &H5outputfile, hid_t gid); void createBuffers(const std::vector& standardBufferDims); - + void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { for (auto& mipMap : mipMaps) { mipMap.accumulate(val, x, y, totalChannelOffset); @@ -73,16 +73,16 @@ struct MipMaps { mipMap.calculate(); } } - + // TODO if we ever want a tiled mipmap calculation // we'll need to implement options to pass in custom buffer dims // and additional x and y offsets void write(hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); - + std::vector standardDims; std::vector chunkDims; - + std::vector mipMaps; }; diff --git a/Stats.cc b/Stats.cc index f62d893..f0ff618 100644 --- a/Stats.cc +++ b/Stats.cc @@ -23,35 +23,36 @@ hsize_t Stats::size(std::vector dims, hsize_t numBins, hsize_t partialH } // void Stats::createDatasets(H5::Group group, std::string name) { -void Stats::createDatasets(H5OutputFile H5outputfile, hid_t gid, std::string name) { +void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name) { + // H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); // floatType.setOrder(H5T_ORDER_LE); -// +// // H5::IntType intType(H5::PredType::NATIVE_INT64); // intType.setOrder(H5T_ORDER_LE); -// +// // createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims); // createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims); // createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims); // createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims); // createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims); -// +// // if (numBins) { // createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins})); // } - + } void Stats::createBuffers(std::vector dims, hsize_t partialHistMultiplier) { fullBasicBufferDims = dims; auto statsSize = product(dims); - + minVals = new float[statsSize]; maxVals = new float[statsSize]; sums = new double[statsSize]; sumsSq = new double[statsSize]; nanCounts = new int64_t[statsSize]; - + if (numBins) { histograms = new int64_t[statsSize * numBins]; partialHistograms = new int64_t[statsSize * numBins * partialHistMultiplier]; @@ -67,7 +68,7 @@ void Stats::clearHistogramBuffers() { void Stats::write() { writeBasic(fullBasicBufferDims); - + if (numBins) { writeHistogram(fullBasicBufferDims); } @@ -80,13 +81,13 @@ void Stats::write(const std::vector& count, const std::vector& void Stats::write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { auto basicN = basicDatasetDims.size(); writeBasic(basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN)); - + if (numBins) { auto histN = basicN + 1; writeHistogram(basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN)); } } - + // void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { void Stats::writeBasic(H5OutputFile H5outputfile, std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { // writeHdf5Data(minDset, minVals, basicBufferDims, count, start); diff --git a/Stats.h b/Stats.h index 50dbde8..a244a9d 100644 --- a/Stats.h +++ b/Stats.h @@ -8,7 +8,7 @@ struct StatsCounter { StatsCounter() : minVal(std::numeric_limits::max()), maxVal(-std::numeric_limits::max()), sum(0), sumSq(0), nanCount(0) { } - + void accumulateFinite(float val) { minVal = fmin(minVal, val); maxVal = fmax(maxVal, val); @@ -48,16 +48,16 @@ struct Stats { Stats(); Stats(const std::vector& basicDatasetDims, hsize_t numBins = 0); ~Stats(); - + static hsize_t size(std::vector dims, hsize_t numBins = 0, hsize_t partialHistMultiplier = 0); - + // Setup //void createDatasets(H5::Group group, std::string name); - void createDatasets(H5Outputfile H5outputfile, hid_t gid, std::string name); + void createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name); void createBuffers(std::vector dims, hsize_t partialHistMultiplier = 0); - + // Basic stats - + void accumulateStatsToCounter(StatsCounter& counter, hsize_t index) { if (std::isfinite(maxVals[index])) { counter.sum += sums[index]; @@ -67,7 +67,7 @@ struct Stats { } counter.nanCount += nanCounts[index]; } - + void copyStatsFromCounter(hsize_t index, hsize_t totalVals, const StatsCounter& counter) { if ((hsize_t)counter.nanCount == totalVals) { minVals[index] = NAN; @@ -80,11 +80,11 @@ struct Stats { sumsSq[index] = counter.sumSq; nanCounts[index] = counter.nanCount; } - + // Histograms // Histograms - + void clearHistogramBuffers(); void accumulateHistogram(float val, double min, double range, hsize_t offset) { @@ -104,38 +104,38 @@ struct Stats { } } } - + // Writing void write(); void write(const std::vector& count, const std::vector& start); void write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start); void writeBasic(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); void writeHistogram(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - + // Dataset dimensions std::vector basicDatasetDims; hsize_t numBins; - + // Datasets // H5::DataSet minDset; // H5::DataSet maxDset; // H5::DataSet sumDset; // H5::DataSet ssqDset; // H5::DataSet nanDset; -// +// // H5::DataSet histDset; - + hid_t minDset; hid_t maxDset; hid_t sumDset; hid_t ssqDset; hid_t nanDset; - + hid_t histDset; - + // Buffer dimensions - + std::vector fullBasicBufferDims; hsize_t partialHistMultiplier; @@ -146,7 +146,7 @@ struct Stats { double* sums; double* sumsSq; int64_t* nanCounts; - + int64_t* histograms; int64_t* partialHistograms; }; diff --git a/UI.h b/UI.h new file mode 100644 index 0000000..2926b9e --- /dev/null +++ b/UI.h @@ -0,0 +1,86 @@ +#ifndef __UI_H +#define __UI_H + +#include +#include "common.h" + +bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) { + extern int optind; + extern char *optarg; + + int opt; + bool err(false); + + std::ostringstream usage; + usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION + << " using IDIA schema version " << SCHEMA_VERSION << std::endl + << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl + << "Options:" << std::endl + << "-o\tOutput filename" << std::endl + << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl + << "-p\tPrint progress output (by default the program is silent)" << std::endl + << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl + << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl; + + while ((opt = getopt(argc, argv, ":o:spqm")) != -1) { + switch (opt) { + case 'o': + outputFileName.assign(optarg); + break; + case 's': + // use slower but less memory-intensive method + slow = true; + break; + case 'p': + progress = true; + break; + case 'q': + std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl; + break; + case 'm': + // only print memory usage and exit + onlyReportMemory = true; + break; + case ':': + err = true; + std::cerr << "Missing argument for option " << opt << "." << std::endl; + break; + case '?': + err = true; + std::cerr << "Unknown option " << opt << "." << std::endl; + break; + } + } + + if (optind >= argc) { + err = true; + std::cerr << "Missing input filename parameter." << std::endl; + } else { + inputFileName.assign(argv[optind]); + optind++; + } + + if (argc > optind) { + err = true; + std::cerr << "Unexpected additional parameters." << std::endl; + } + + if (err) { + std::cerr << std::endl << usage.str() << std::endl; + return false; + } + + if (outputFileName.empty()) { + auto fitsIndex = inputFileName.find_last_of(".fits"); + if (fitsIndex != std::string::npos) { + outputFileName = inputFileName.substr(0, fitsIndex - 4); + outputFileName += ".hdf5"; + } else { + outputFileName = inputFileName + ".hdf5"; + } + } + + return true; +} + +#endif diff --git a/main.cc b/main.cc index f0ac7a6..d816679 100644 --- a/main.cc +++ b/main.cc @@ -1,106 +1,27 @@ -#include +#include "UI.h" #include "Converter.h" -bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) { - extern int optind; - extern char *optarg; - - int opt; - bool err(false); - - std::ostringstream usage; - usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION - << " using IDIA schema version " << SCHEMA_VERSION << std::endl - << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl - << "Options:" << std::endl - << "-o\tOutput filename" << std::endl - << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl - << "-p\tPrint progress output (by default the program is silent)" << std::endl - << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl - << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl; - - while ((opt = getopt(argc, argv, ":o:spqm")) != -1) { - switch (opt) { - case 'o': - outputFileName.assign(optarg); - break; - case 's': - // use slower but less memory-intensive method - slow = true; - break; - case 'p': - progress = true; - break; - case 'q': - std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl; - break; - case 'm': - // only print memory usage and exit - onlyReportMemory = true; - break; - case ':': - err = true; - std::cerr << "Missing argument for option " << opt << "." << std::endl; - break; - case '?': - err = true; - std::cerr << "Unknown option " << opt << "." << std::endl; - break; - } - } - - if (optind >= argc) { - err = true; - std::cerr << "Missing input filename parameter." << std::endl; - } else { - inputFileName.assign(argv[optind]); - optind++; - } - - if (argc > optind) { - err = true; - std::cerr << "Unexpected additional parameters." << std::endl; - } - - if (err) { - std::cerr << std::endl << usage.str() << std::endl; - return false; - } - - if (outputFileName.empty()) { - auto fitsIndex = inputFileName.find_last_of(".fits"); - if (fitsIndex != std::string::npos) { - outputFileName = inputFileName.substr(0, fitsIndex - 4); - outputFileName += ".hdf5"; - } else { - outputFileName = inputFileName + ".hdf5"; - } - } - - return true; -} - int main(int argc, char** argv) { std::string inputFileName; std::string outputFileName; bool slow(false); bool progress(false); bool onlyReportMemory(false); - + if (!getOptions(argc, argv, inputFileName, outputFileName, slow, progress, onlyReportMemory)) { return 1; } - + std::unique_ptr converter; - + try { converter = Converter::getConverter(inputFileName, outputFileName, slow, progress); - + if (onlyReportMemory) { converter->reportMemoryUsage(); return 0; } - + DEBUG(std::cout << "Converting FITS file " << inputFileName << " to HDF5 file " << outputFileName << (slow ? " using slower, memory-efficient method" : "") << std::endl;); converter->convert(); From 81d4c03612d6ee1b6356dd647caae960ab231dd2 Mon Sep 17 00:00:00 2001 From: Pascal Date: Fri, 31 Jul 2020 09:38:46 +0800 Subject: [PATCH 03/13] Update to HDF Wrapper API ensured template classes defined in header, added some functionality. Still requires updates to Converter, Stats and MinMap classes to use appropriate calls --- Converter.cc | 24 +-- Converter.h | 6 +- FastConverter.cc | 129 +++++++-------- HDF5Wrapper.cc | 415 +++++++++++++---------------------------------- HDF5Wrapper.h | 398 +++++++++++++++++++++++++++++++++++++++++---- MipMap.cc | 51 +++--- MipMap.h | 8 +- SlowConverter.cc | 174 ++++++++++---------- Stats.cc | 23 +-- Stats.h | 10 +- 10 files changed, 694 insertions(+), 544 deletions(-) diff --git a/Converter.cc b/Converter.cc index 4438c11..b81a947 100644 --- a/Converter.cc +++ b/Converter.cc @@ -56,14 +56,14 @@ std::unique_ptr Converter::getConverter(std::string inputFileName, st } } -// void Converter::copyAndCalculate() { -// // implemented in subclasses -// } -// -// void Converter::reportMemoryUsage() { -// // implemented in subclasses -// } -// +void Converter::copyAndCalculate() { + // implemented in subclasses +} + +void Converter::reportMemoryUsage() { + // implemented in subclasses +} + void Converter::convert() { // CREATE OUTPUT FILE @@ -74,7 +74,7 @@ void Converter::convert() { // outputGroup = outputFile.createGroup("0"); H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); - std::string parentpath = "0"; + std::string parentpath("0"); hid_t gid = H5outputfile.create_group(parentpath); std::vector chunkDims; @@ -109,9 +109,9 @@ void Converter::convert() { // writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); // writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); // writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); - H5outputfile.write_attribute(parentpath, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); - H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); - H5outputfile.write_attribute(parentpath, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); + H5outputfile.write_attribute(parentpath, std::string("SCHEMA_VERSION"), std::string(SCHEMA_VERSION)); + H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER"), std::string(HDF5_CONVERTER)); + H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER_VERSION"), std::string(HDF5_CONVERTER_VERSION)); int numAttributes; diff --git a/Converter.h b/Converter.h index a356f25..d09dafe 100644 --- a/Converter.h +++ b/Converter.h @@ -2,15 +2,15 @@ #define __IMAGE_H #include "common.h" +#include "Util.h" +#include "HDF5Wrapper.h" #include "Stats.h" #include "MipMap.h" #include "Timer.h" -#include "Util.h" -#include "HDF5Wrapper.h" class Converter { public: - Converter() {} + Converter(){}; Converter(std::string inputFileName, std::string outputFileName, bool progress); ~Converter(); diff --git a/FastConverter.cc b/FastConverter.cc index ecf4af7..f4625c7 100644 --- a/FastConverter.cc +++ b/FastConverter.cc @@ -1,7 +1,8 @@ #include "Converter.h" // TODO do we need these? -FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) : Converter(inputFileName, outputFileName, progress) {} +FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) : +Converter(inputFileName, outputFileName, progress) {} void FastConverter::reportMemoryUsage() { std::unordered_map sizes; @@ -9,22 +10,22 @@ void FastConverter::reportMemoryUsage() { sizes["Main dataset"] = depth * height * width * sizeof(float); sizes["Mipmaps"] = MipMaps::size(standardDims, {depth, height, width}); sizes["XY stats"] = Stats::size({depth}, numBins); - + if (depth > 1) { sizes["Rotation"] = sizes["Main dataset"]; sizes["XYZ stats"] = Stats::size({}, numBins, depth); sizes["Z stats"] = Stats::size({height, width}); } - + hsize_t total(0); - + std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl; - + for (auto& kv : sizes) { std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl; total += kv.second; } - + std::string note = ""; if (depth > 1) { total -= std::min(sizes["Mipmaps"], sizes["Rotation"]); @@ -37,22 +38,22 @@ void FastConverter::reportMemoryUsage() { void FastConverter::copyAndCalculate() { const hsize_t pixelProgressStride = std::max((hsize_t)1, (hsize_t)(width * height / 100)); const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100)); - + TIMER(timer.start("Allocate");); - + // Process one stokes at a time hsize_t cubeSize = depth * height * width; standardCube = new float[cubeSize]; - + statsXY.createBuffers({depth}); - + if (depth > 1) { statsXYZ.createBuffers({}, depth); statsZ.createBuffers({height, width}); } - + mipMaps.createBuffers({depth, height, width}); - + std::string timerLabelXYRotation = depth > 1 ? "XY statistics and rotation" : "XY statistics"; for (unsigned int currentStokes = 0; currentStokes < stokes; currentStokes++) { @@ -63,48 +64,48 @@ void FastConverter::copyAndCalculate() { TIMER(timer.start("Read");); DEBUG(std::cout << "+ Reading main dataset..." << std::flush;); readFitsData(inputFilePtr, 0, currentStokes, cubeSize, standardCube); - + // We have to allocate the swizzled cube for each stokes because we free it to make room for mipmaps if (depth > 1) { TIMER(timer.start("Allocate");); rotatedCube = new float[cubeSize]; } - + DEBUG(std::cout << " " << timerLabelXYRotation << "..." << std::flush;); PROGRESS("\tMain loop\t"); TIMER(timer.start(timerLabelXYRotation);); // First loop calculates stats for each XY slice and rotates the dataset - + #pragma omp parallel for for (hsize_t i = 0; i < depth; i++) { PROGRESS_DECIMATED(i, channelProgressStride, "|"); StatsCounter counterXY; - + auto& indexXY = i; std::function accumulate; - + auto lazy_accumulate = [&] (float val) { counterXY.accumulateFiniteLazy(val); }; - + auto first_accumulate = [&] (float val) { counterXY.accumulateFiniteLazyFirst(val); accumulate = lazy_accumulate; }; - + accumulate = first_accumulate; - + for (hsize_t j = 0; j < height; j++) { for (hsize_t k = 0; k < width; k++) { auto sourceIndex = k + width * j + (height * width) * i; auto destIndex = i + depth * j + (height * depth) * k; auto& val = standardCube[sourceIndex]; - + if (depth > 1) { rotatedCube[destIndex] = val; } - + // Accumulate XY stats if (std::isfinite(val)) { accumulate(val); @@ -113,11 +114,11 @@ void FastConverter::copyAndCalculate() { } } } - + // Final correction of XY min and max statsXY.copyStatsFromCounter(indexXY, height * width, counterXY); } - + PROGRESS(std::endl); if (depth > 1) { @@ -125,7 +126,7 @@ void FastConverter::copyAndCalculate() { DEBUG(std::cout << " XYZ statistics..." << std::flush;); PROGRESS("\tXYZ stats" << std::endl); TIMER(timer.start("XYZ statistics");); - + StatsCounter counterXYZ; for (hsize_t i = 0; i < depth; i++) { @@ -136,7 +137,7 @@ void FastConverter::copyAndCalculate() { statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ); // Second loop calculates stats for each Z profile (i.e. average/min/max XY slices) - + DEBUG(std::cout << " Z statistics... " << std::flush;); PROGRESS("\tZ stats\t\t"); TIMER(timer.start("Z statistics");); @@ -145,10 +146,10 @@ void FastConverter::copyAndCalculate() { for (hsize_t j = 0; j < height; j++) { for (hsize_t k = 0; k < width; k++) { StatsCounter counterZ; - + auto indexZ = k + j * width; PROGRESS_DECIMATED(indexZ, pixelProgressStride, "."); - + for (hsize_t i = 0; i < depth; i++) { auto sourceIndex = k + width * j + (height * width) * i; auto& val = standardCube[sourceIndex]; @@ -160,7 +161,7 @@ void FastConverter::copyAndCalculate() { counterZ.accumulateNonFinite(); } } - + statsZ.copyStatsFromCounter(indexZ, depth, counterZ); } } @@ -169,62 +170,62 @@ void FastConverter::copyAndCalculate() { } // Third loop handles histograms - + DEBUG(std::cout << " Histograms..." << std::flush;); PROGRESS("\tHistograms\t"); TIMER(timer.start("Histograms");); - + double cubeMin; double cubeMax; double cubeRange; bool cubeHist(false); - + if (depth > 1) { cubeMin = statsXYZ.minVals[0]; cubeMax = statsXYZ.maxVals[0]; cubeRange = cubeMax - cubeMin; cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0; } - + statsXY.clearHistogramBuffers(); statsXYZ.clearHistogramBuffers(); #pragma omp parallel for for (hsize_t i = 0; i < depth; i++) { PROGRESS_DECIMATED(i, channelProgressStride, "|"); - + auto& indexXY = i; double chanMin = statsXY.minVals[indexXY]; double chanMax = statsXY.maxVals[indexXY]; double chanRange = chanMax - chanMin; - + bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0); - + if (!chanHist && !cubeHist) { continue; // skip the loop entirely } - + auto doChannelHistogram = [&] (float val) { // XY histogram statsXY.accumulateHistogram(val, chanMin, chanRange, i); }; - + auto doCubeHistogram = [&] (float val) { // Partial XYZ histogram statsXYZ.accumulatePartialHistogram(val, cubeMin, cubeRange, i); }; - + auto doNothing = [&] (float val) { UNUSED(val); }; - + std::function channelHistogramFunc = doChannelHistogram; std::function cubeHistogramFunc = doCubeHistogram; - + if (!chanHist) { channelHistogramFunc = doNothing; } - + if (!cubeHist) { cubeHistogramFunc = doNothing; } @@ -238,23 +239,23 @@ void FastConverter::copyAndCalculate() { } } // end of XY loop } // end of parallel Z loop - + if (depth > 1) { // Consolidate partial XYZ histograms into final histogram statsXYZ.consolidatePartialHistogram(); } - + PROGRESS(std::endl); DEBUG(std::cout << " Writing main and rotated datasets... " << std::flush;); PROGRESS("\tWrite data" << std::endl); TIMER(timer.start("Write");); - + std::vector memDims = {depth, height, width}; std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({currentStokes, 0, 0, 0}, N); // writeHdf5Data(standardDataSet, standardCube, memDims, count, start); - + if (depth > 1) { // This all technically worked if we reused the standard filespace and memspace // But it's probably not a good idea to rely on two incorrect values cancelling each other out @@ -267,17 +268,17 @@ void FastConverter::copyAndCalculate() { if (depth > 1) { DEBUG(std::cout << " Freeing memory from rotated dataset..." << std::flush;); TIMER(timer.start("Free");); - + delete[] rotatedCube; } - + // Fourth loop handles mipmaps - + // In the fast algorithm, we keep one Stokes of mipmaps in memory at once and parallelise by channel DEBUG(std::cout << " Mipmaps..." << std::endl;); PROGRESS("\tMipmaps\t\t"); TIMER(timer.start("Mipmaps");); - + #pragma omp parallel for for (hsize_t c = 0; c < depth; c++) { PROGRESS_DECIMATED(c, channelProgressStride, "|"); @@ -292,33 +293,33 @@ void FastConverter::copyAndCalculate() { } } // end of mipmap loop PROGRESS(std::endl); - + // Final mipmap calculation mipMaps.calculate(); - + TIMER(timer.start("Write");); PROGRESS("\tWrite stats & mipmaps" << std::endl); - + // Write the mipmaps - mipMaps.write(currentStokes, 0); - - // Write the statistics - statsXY.write({1, depth}, {currentStokes, 0}); - + mipMaps.write(H5outputfile, currentStokes, 0); + + // Write the statistics + statsXY.write(H5outputfile,{1, depth}, {currentStokes, 0}); + if (depth > 1) { - statsXYZ.write({1}, {currentStokes}); - statsZ.write({1, height, width}, {currentStokes, 0, 0}); + statsXYZ.write(H5outputfile, {1}, {currentStokes}); + statsZ.write(H5outputfile, {1, height, width}, {currentStokes, 0, 0}); } - + // Clear the mipmaps before the next Stokes TIMER(timer.start("Mipmaps");); mipMaps.resetBuffers(); - + } // end of Stokes loop - + // Free memory DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;); TIMER(timer.start("Free");); - + delete[] standardCube; } diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index 25d91ca..b358ee0 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -1,5 +1,20 @@ #include "HDF5Wrapper.h" +// Constructor +H5OutputFile::H5OutputFile() +{ + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif +} + +// Destructor closes the file if it's open +H5OutputFile::~H5OutputFile() +{ + if(file_id >= 0) close(); +} + void H5OutputFile::create(std::string filename, hid_t flag, int taskID, bool iparallelopen) { @@ -150,8 +165,7 @@ void H5OutputFile::_get_attribute(std::vector &ids, const std::vector &ids, const std::string &name) { - std::vector parts = _tokenize(name); - ids.push_back(file_id); + auto parts = _tokenize(name); _get_attribute(ids, parts); } @@ -164,7 +178,7 @@ void H5OutputFile::_get_dataset(std::vector &ids, const std::string dset_ } ids.push_back(dset_id); } -/// get attributes parsing list of ids and vector of strings +/// get dataset parsing list of ids and vector of strings void H5OutputFile::_get_dataset(std::vector &ids, const std::vector &parts) { // This is the attribute name, so open it and store the id @@ -194,8 +208,7 @@ void H5OutputFile::_get_dataset(std::vector &ids, const std::vector &ids, const std::string &name) { - std::vector parts = _tokenize(name); - ids.push_back(file_id); + auto parts = _tokenize(name); _get_dataset(ids, parts); } @@ -216,16 +229,13 @@ void H5OutputFile::close_hdf_ids(std::vector &ids) } } -/// read a scalar attribute -template void H5OutputFile::_do_read(const hid_t &attr, const hid_t &type, T &val) +/// close open hids stored in vector +void H5OutputFile::close_path(std::string path) { - if (hdf5_type(T{}) == H5T_C_S1) - { - _do_read_string(attr, type, val); - } - else { - H5Aread(attr, type, &val); - } + auto parts = _tokenize(path); + std::vector ids; + // _get_path(ids, parts); + close_hdf_ids(ids); } /// read a string attribute @@ -244,64 +254,6 @@ void H5OutputFile::_do_read_string(const hid_t &attr, const hid_t &type, std::st val=std::string(buf.data()); } -/// read a vector attribute -template void H5OutputFile::_do_read_v(const hid_t &attr, const hid_t &type, std::vector &val) -{ - hid_t space = H5Aget_space (attr); - int npoints = H5Sget_simple_extent_npoints(space); - val.resize(npoints); - H5Aread(attr, type, val.data()); - H5Sclose(space); -} - -template const T H5OutputFile::read_attribute(const std::string &name) { - std::string attr_name; - T val; - hid_t type; - H5O_info_t object_info; - std::vector ids; - //traverse the file to get to the attribute, storing the ids of the - //groups, data spaces, etc that have been opened. - get_attribute(ids, name); - //now reverse ids and load attribute - reverse(ids.begin(),ids.end()); - //determine hdf5 type of the array in memory - type = hdf5_type(T{}); - // read the data - _do_read(ids[0], type, val); - H5Aclose(ids[0]); - ids.erase(ids.begin()); - //now have hdf5 ids traversed to get to desired attribute so move along to close all - //based on their object type - close_hdf_ids(ids); - return val; -} - -//read vector attribute -template const std::vector H5OutputFile::read_attribute_v(const std::string &name) { - std::string attr_name; - std::vector val; - hid_t type; - H5O_info_t object_info; - std::vector ids; - //traverse the file to get to the attribute, storing the ids of the - //groups, data spaces, etc that have been opened. - get_attribute(ids, name); - //now reverse ids and load attribute - reverse(ids.begin(),ids.end()); - //determine hdf5 type of the array in memory - type = hdf5_type(T{}); - // read the data - _do_read_v(ids[0], type, val); - H5Aclose(ids[0]); - ids.erase(ids.begin()); - //now have hdf5 ids traversed to get to desired attribute so move along to close all - //based on their object type - close_hdf_ids(ids); - return val; -} - - bool H5OutputFile::exists_attribute(const std::string &parent, const std::string &name) { std::string attr_name = parent+std::string("/")+name; std::vector ids; @@ -348,95 +300,38 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string & return exists; } -/// Write a new 1D dataset. Data type of the new dataset is taken to be the type of -/// the input data if not explicitly specified with the filetype_id parameter. -template void H5OutputFile::write_dataset(std::string name, hsize_t len, T *data, - hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_hyperslab, bool flag_collective) -{ - int rank = 1; - hsize_t dims[1] = {len}; - if (memtype_id == -1) memtype_id = hdf5_type(T{}); - write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); -} - -void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective) +/// create a dataset +hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, + std::vector dims, const std::vector& chunkDims, + bool flag_parallel, bool flag_hyperslab, bool flag_collective) { #ifdef USEPARALLELHDF MPI_Comm comm = mpi_comm_write; MPI_Info info = MPI_INFO_NULL; #endif - int rank = 1; - hsize_t dims[1] = {len}; - - hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist; - herr_t status, ret; - memtype_id = H5Tcopy (H5T_C_S1); - status = H5Tset_size (memtype_id, data.size()); - filetype_id = H5Tcopy (H5T_C_S1); - status = H5Tset_size (filetype_id, data.size()); - - // Create the dataspace - dspace_id = H5Screate_simple(rank, dims, NULL); - - // Create the dataset - dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, - H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); -#ifdef USEPARALLELHDF - if (flag_parallel) { - // set up the collective transfer properties list - xfer_plist = H5Pcreate(H5P_DATASET_XFER); - if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); - if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); - else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); - if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); - // the result of above should be that all processors write to the same - // point of the hdf file. - } -#endif - // Write the data - if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) - io_error(std::string("Failed to write dataset: ")+name); - - // Clean up (note that dtype_id is NOT a new object so don't need to close it) -#ifdef USEPARALLELHDF - if (flag_parallel) H5Pclose(xfer_plist); -#endif - H5Sclose(dspace_id); - H5Dclose(dset_id); -} -void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, - hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) -{ - int rank = 1; - hsize_t dims[1] = {len}; - if (memtype_id == -1) { - throw std::runtime_error("Write data set called with void pointer but no type info passed."); + auto splitPath = _tokenize(path); + auto dsetname = splitPath.back(); + splitPath.pop_back(); + hid_t curr_id = file_id; + hid_t memspace_id, dspace_id, dset_id, prop_id = H5P_DEFAULT; + auto rank = dims.size(); + std::vector chunks = chunkDims; + + /// not certain what to do with the open groups here. + for (auto& groupname : splitPath) + { + if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0) + { + curr_id = create_group(groupname); + } + else + { + curr_id = open_group(groupname); + } } - write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); -} - - -/// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of -/// the input data if not explicitly specified with the filetype_id parameter. -template void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, - hid_t memtype_id, hid_t filetype_id, - bool flag_parallel, bool flag_first_dim_parallel, - bool flag_hyperslab, bool flag_collective) -{ -#ifdef USEPARALLELHDF - MPI_Comm comm = mpi_comm_write; - MPI_Info info = MPI_INFO_NULL; -#endif - hid_t dspace_id, dset_id, prop_id, memspace_id, ret; - std::vector chunks(rank); - - // Get HDF5 data type of the array in memory - if (memtype_id == -1) memtype_id = hdf5_type(T{}); - - // Determine type of the dataset to create - if(filetype_id < 0) filetype_id = memtype_id; #ifdef USEPARALLELHDF + /// will really have to update for MPI std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); if (flag_parallel) { //if parallel hdf5 get the full extent of the data @@ -475,31 +370,17 @@ template void H5OutputFile::write_dataset_nd(std::string name, int #endif } // Only chunk datasets where we would have >1 chunk - int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + for(auto i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + for(auto i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; -#endif - } - if(nonzero_size && large_dataset) - { -#ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i void H5OutputFile::write_dataset_nd(std::string name, int dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL); //allocate the memory space //allocate the memory space - memspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = H5Screate_simple(rank, dims.data(), NULL); } else { - dspace_id = H5Screate_simple(rank, dims, NULL); + dspace_id = H5Screate_simple(rank, dims.data(), NULL); memspace_id = dspace_id; } } else { - dspace_id = H5Screate_simple(rank, dims, NULL); + dspace_id = H5Screate_simple(rank, dims.data(), NULL); memspace_id = dspace_id; } #else - dspace_id = H5Screate_simple(rank, dims, NULL); + dspace_id = H5Screate_simple(rank, dims.data(), NULL); memspace_id = dspace_id; #endif - // Dataset creation properties - prop_id = H5P_DEFAULT; #ifdef USEHDFCOMPRESSION - // this defines compression - if(nonzero_size && large_dataset) - { - prop_id = H5Pcreate(H5P_DATASET_CREATE); - H5Pset_layout(prop_id, H5D_CHUNKED); - H5Pset_chunk(prop_id, rank, chunks.data()); - H5Pset_deflate(prop_id, HDFDEFLATE); + if (!chunk.empty()) { + prop_id = H5Pcreate(H5P_DATASET_CREATE); + H5Pset_layout(prop_id, H5D_CHUNKED); + H5Pset_chunk(prop_id, rank, chunks.data()); + H5Pset_deflate(prop_id, HDFDEFLATE); } #endif + + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + return dset_id; +} + +void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + int rank = 1; + hsize_t dims[1] = {len}; + + hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist; + herr_t status, ret; + memtype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (memtype_id, data.size()); + filetype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (filetype_id, data.size()); + + // Create the dataspace + dspace_id = H5Screate_simple(rank, dims, NULL); + // Create the dataset dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, - H5P_DEFAULT, prop_id, H5P_DEFAULT); - if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); - H5Pclose(prop_id); - - prop_id = H5P_DEFAULT; + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); #ifdef USEPARALLELHDF if (flag_parallel) { // set up the collective transfer properties list - prop_id = H5Pcreate(H5P_DATASET_XFER); - //if all tasks are participating in the writes - if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE); - else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT); - if (flag_hyperslab) { - H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL); - if (dims[0] == 0) { - H5Sselect_none(dspace_id); - H5Sselect_none(memspace_id); - } - - } - if (mpi_hdf_dims_tot[0] > 0) { - // Write the data - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); - } - } - else if (dims[0] > 0) - { - // Write the data - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); - } - -#else - // Write the data - if (dims[0] > 0) { - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + xfer_plist = H5Pcreate(H5P_DATASET_XFER); + if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); + if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + // the result of above should be that all processors write to the same + // point of the hdf file. } #endif + // Write the data + if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) + io_error(std::string("Failed to write dataset: ")+name); // Clean up (note that dtype_id is NOT a new object so don't need to close it) - H5Pclose(prop_id); #ifdef USEPARALLELHDF - if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); + if (flag_parallel) H5Pclose(xfer_plist); #endif H5Sclose(dspace_id); H5Dclose(dset_id); } + +void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, + hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); +} + void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, hid_t memtype_id, hid_t filetype_id, bool flag_parallel, bool flag_first_dim_parallel, @@ -770,87 +659,3 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v H5Sclose(dspace_id); H5Dclose(dset_id); } - -/// write an attribute -template void H5OutputFile::write_attribute(std::string parent, std::string name, std::vector data) -{ - // Get HDF5 data type of the value to write - hid_t dtype_id = hdf5_type(data[0]); - hsize_t size = data.size(); - - // Open the parent object - hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); - if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); - - // Create dataspace - hid_t dspace_id = H5Screate(H5S_SIMPLE); - hid_t dspace_extent = H5Sset_extent_simple(dspace_id, 1, &size, NULL); - - // Create attribute - hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); - if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); - - // Write the attribute - if(H5Awrite(attr_id, dtype_id, data.data()) < 0) - io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); - - // Clean up - H5Aclose(attr_id); - H5Sclose(dspace_id); - H5Oclose(parent_id); -} - -template void H5OutputFile::write_attribute(std::string parent, std::string name, T data) -{ - // Get HDF5 data type of the value to write - hid_t dtype_id = hdf5_type(data); - - // Open the parent object - hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); - if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); - - // Create dataspace - hid_t dspace_id = H5Screate(H5S_SCALAR); - - // Create attribute - hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); - if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); - - // Write the attribute - if(H5Awrite(attr_id, dtype_id, &data) < 0) - io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); - - // Clean up - H5Aclose(attr_id); - H5Sclose(dspace_id); - H5Oclose(parent_id); -} - -void H5OutputFile::write_attribute(std::string parent, std::string name, std::string data) -{ - // Get HDF5 data type of the value to write - hid_t dtype_id = H5Tcopy(H5T_C_S1); - if (data.size() == 0) data=" "; - H5Tset_size(dtype_id, data.size()); - H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); - - // Open the parent object - hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); - if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); - - // Create dataspace - hid_t dspace_id = H5Screate(H5S_SCALAR); - - // Create attribute - hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); - if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); - - // Write the attribute - if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0) - io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); - - // Clean up - H5Aclose(attr_id); - H5Sclose(dspace_id); - H5Oclose(parent_id); -} diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index 2180a98..80fb846 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -70,11 +70,27 @@ class H5OutputFile /// get attribute id from tokenized string void _get_attribute(std::vector &ids, const std::vector &parts); /// wrapper for reading scalar - template void _do_read(const hid_t &attr, const hid_t &type, T &val); + template void _do_read(const hid_t &attr, const hid_t &type, T &val) + { + if (hdf5_type(T{}) == H5T_C_S1) + { + _do_read_string(attr, type, val); + } + else { + H5Aread(attr, type, &val); + } + } /// wrapper for reading string void _do_read_string(const hid_t &attr, const hid_t &type, std::string &val); /// wrapper for reading vector - template void _do_read_v(const hid_t &attr, const hid_t &type, std::vector &val); + template void _do_read_v(const hid_t &attr, const hid_t &type, std::vector &val) + { + hid_t space = H5Aget_space (attr); + int npoints = H5Sget_simple_extent_npoints(space); + val.resize(npoints); + H5Aread(attr, type, val.data()); + H5Sclose(space); + } /// get dataset id void _get_dataset(std::vector &ids, const std::string dset_name); @@ -84,18 +100,9 @@ class H5OutputFile public: // Constructor - H5OutputFile() { - file_id = -1; -#ifdef USEPARALLELHDF - parallel_access_id = -1; -#endif - } - + H5OutputFile(); // Destructor closes the file if it's open - ~H5OutputFile() - { - if(file_id >= 0) close(); - } + ~H5OutputFile(); /// Create a new file void create(std::string filename, hid_t flag = H5F_ACC_TRUNC, @@ -113,12 +120,18 @@ class H5OutputFile H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); return group_id; } + hid_t open_group(std::string groupname) { + hid_t group_id = H5Gopen(file_id, groupname.c_str(), H5P_DEFAULT); + return group_id; + } /// close group herr_t close_group(hid_t gid) { herr_t status = H5Gclose(gid); return status; } + /// close path + void close_path(std::string path); /// close hids stored in vector void close_hdf_ids(std::vector &ids); @@ -165,25 +178,222 @@ class H5OutputFile H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); return dset_id; } + + template hid_t create_dataset(std::string path, T&data, + std::vector dims, const std::vector& chunkDims, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(path, hdf5_type(T{}), dims, chunkDims, + flag_parallel, flag_hyperslab, flag_collective); + } + hid_t create_dataset(std::string path, hid_t datatype, + std::vector dims, const std::vector& chunkDims, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); /// close data set herr_t close_dataset(hid_t dset_id) { herr_t status = H5Dclose(dset_id); return status; } - /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - template void write_dataset(std::string name, hsize_t len, T *data, - hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); - void write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel = true, bool flag_collective = true); + + + //write 1D data sets of string or input data with defined data type + void write_dataset(std::string name, hsize_t len, std::string data, + bool flag_parallel = true, bool flag_collective = true); void write_dataset(std::string name, hsize_t len, void *data, - hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + /// template function so defined here + template void write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); + } + + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + + // Get HDF5 data type of the array in memory + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + if (flag_parallel) { + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i1 chunk + int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + } + else if (dims[0] > 0) + { + // Write the data + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + +#else + // Write the data + if (dims[0] > 0) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } +#endif + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); + } - /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, - hid_t memtype_id = -1, hid_t filetype_id = -1, - bool flag_parallel = true, bool flag_first_dim_parallel = true, - bool flag_hyperslab = true, bool flag_collective = true); void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, @@ -196,16 +406,142 @@ class H5OutputFile /// get an attribute with full path given by name void get_attribute(std::vector &ids, const std::string &name); + /// read scalar attribute - template const T read_attribute(const std::string &name); + template const T read_attribute(const std::string &name) + { + std::string attr_name; + T val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; + } /// read vector attribute - template const std::vector read_attribute_v(const std::string &name); + template const std::vector read_attribute_v(const std::string &name) + { + std::string attr_name; + std::vector val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read_v(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; + } + /// sees if attribute exits bool exists_attribute(const std::string &parent, const std::string &name); - /// write an attribute - template void write_attribute(std::string parent, std::string name, std::vector data); - template void write_attribute(std::string parent, std::string name, T data); - void write_attribute(std::string parent, std::string name, std::string data); + + /// write an attribute, not that since these are template function, define here in the header + template void write_attribute(const std::string &parent, const std::string &name, const std::vector &data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data[0]); + hsize_t size = data.size(); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SIMPLE); + hid_t dspace_extent = H5Sset_extent_simple(dspace_id, 1, &size, NULL); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.data()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } + + template void write_attribute(const std::string &parent, const std::string &name, const T &data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, &data) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } + + void write_attribute(const std::string parent, const std::string &name, std::string data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = H5Tcopy(H5T_C_S1); + if (data.size() == 0) data=" "; + H5Tset_size(dtype_id, data.size()); + H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } }; diff --git a/MipMap.cc b/MipMap.cc index 35ffb5c..8708c12 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -11,46 +11,49 @@ MipMap::~MipMap() { } } -void MipMap::createDataset(H5::Group group, const std::vector& chunkDims) { - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - - std::ostringstream mipMapName; - mipMapName << "MipMaps/DATA/DATA_XY_" << mip; - +// void MipMap::createDataset(H5::Group group, const std::vector& chunkDims) { +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// +// std::ostringstream mipMapName; +// mipMapName << "MipMaps/DATA/DATA_XY_" << mip; +// // if (useChunks(datasetDims)) { // createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims); // } else { // createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims); // } -//??? - +// +// } + +void MipMap::createDataset(H5OutputFile &H5outputfile, hid_t gid, const std::vector& chunkDims) { } void MipMap::createBuffers(std::vector& bufferDims) { bufferSize = product(bufferDims); - + vals = new double[bufferSize]; count = new int[bufferSize]; - + resetBuffers(); - + this->bufferDims = bufferDims; - + auto N = bufferDims.size(); - + width = bufferDims[N - 1]; height = bufferDims[N - 2]; depth = N > 2 ? bufferDims[N - 3] : 1; - stokes = N > 3 ? bufferDims[N - 4] : 1; + stokes = N > 3 ? bufferDims[N - 4] : 1; } -void MipMap::write(hsize_t stokesOffset, hsize_t channelOffset) { +void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) { int N = datasetDims.size(); std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({stokesOffset, channelOffset, 0, 0}, N); - - writeHdf5Data(dataset, vals, bufferDims, count, start); + + // writeHdf5Data(dataset, vals, bufferDims, count, start); + //??? } void MipMap::resetBuffers() { @@ -64,7 +67,7 @@ MipMaps::MipMaps(std::vector standardDims, const std::vector& auto dims = standardDims; int N = dims.size(); int mip = 1; - + // We keep going until we have a mipmap which fits entirely within the minimum size while (dims[N - 1] > MIN_MIPMAP_SIZE || dims[N - 2] > MIN_MIPMAP_SIZE) { mip *= 2; @@ -79,7 +82,7 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto auto datasetDims = standardDims; auto bufferDims = standardBufferDims; int N = standardDims.size(); - + while (datasetDims[N - 1] > MIN_MIPMAP_SIZE || datasetDims[N - 2] > MIN_MIPMAP_SIZE) { mip *= 2; datasetDims = mipDims(datasetDims, 2); @@ -94,10 +97,10 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto // for (auto& mipMap : mipMaps) { // mipMap.createDataset(group, chunkDims); // } -// +// // } -void MipMaps::createDatasets(H5OutputFile H5outputfile , hid_t gid) { +void MipMaps::createDatasets(H5OutputFile &H5outputfile, hid_t gid) { for (auto& mipMap : mipMaps) { mipMap.createDataset(H5outputfile, gid, chunkDims); } @@ -110,9 +113,9 @@ void MipMaps::createBuffers(const std::vector& standardBufferDims) { } } -void MipMaps::write(hsize_t stokesOffset, hsize_t channelOffset) { +void MipMaps::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) { for (auto& mipMap : mipMaps) { - mipMap.write(stokesOffset, channelOffset); + mipMap.write(H5outputfile, stokesOffset, channelOffset); } } diff --git a/MipMap.h b/MipMap.h index d9195cc..b0e776f 100644 --- a/MipMap.h +++ b/MipMap.h @@ -3,6 +3,7 @@ #include "common.h" #include "Util.h" +#include "HDF5Wrapper.h" // A single mipmap struct MipMap { @@ -30,13 +31,14 @@ struct MipMap { } } - void write(hsize_t stokesOffset, hsize_t channelOffset); + void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); std::vector datasetDims; int mip; - H5::DataSet dataset; + // H5::DataSet dataset; + hid_t dset_id; std::vector bufferDims; hsize_t bufferSize; @@ -77,7 +79,7 @@ struct MipMaps { // TODO if we ever want a tiled mipmap calculation // we'll need to implement options to pass in custom buffer dims // and additional x and y offsets - void write(hsize_t stokesOffset, hsize_t channelOffset); + void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); std::vector standardDims; diff --git a/SlowConverter.cc b/SlowConverter.cc index ae09122..7cd0332 100644 --- a/SlowConverter.cc +++ b/SlowConverter.cc @@ -8,22 +8,22 @@ void SlowConverter::reportMemoryUsage() { sizes["Main dataset"] = height * width * sizeof(float); sizes["Mipmaps"] = MipMaps::size(standardDims, {1, height, width}); sizes["XY stats"] = Stats::size({depth}, numBins); - + if (depth > 1) { sizes["Rotation"] = 2 * product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N)) * sizeof(float); sizes["XYZ stats"] = Stats::size({}, numBins, depth); sizes["Z stats"] = Stats::size({TILE_SIZE, TILE_SIZE}); } - + hsize_t total(0); - + std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl; - + for (auto& kv : sizes) { std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl; total += kv.second; } - + std::string note = ""; if (depth > 1) { total -= std::min(sizes["Main dataset"], sizes["Rotation"] + sizes["Z stats"]); @@ -37,35 +37,35 @@ void SlowConverter::copyAndCalculate() { const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100)); hsize_t numTiles = std::ceil(width / TILE_SIZE) * std::ceil(height / TILE_SIZE); const hsize_t tileProgressStride = std::max((hsize_t)1, (hsize_t)(numTiles / 100)); - + // Allocate one channel at a time, and no swizzled data hsize_t cubeSize = height * width; TIMER(timer.start("Allocate");); standardCube = new float[cubeSize]; - + // Allocate one stokes of stats at a time statsXY.createBuffers({depth}); - + if (depth > 1) { statsXYZ.createBuffers({}, depth); } - + mipMaps.createBuffers({1, height, width}); std::vector count = trimAxes({1, 1, height, width}, N); std::vector memDims = {height, width}; - + std::string timerLabelStatsMipmaps = depth > 1 ? "XY and XYZ statistics and mipmaps" : "XY statistics and mipmaps"; - + for (unsigned int s = 0; s < stokes; s++) { DEBUG(std::cout << "Processing Stokes " << s << "... " << std::endl;); PROGRESS("Stokes " << s << ":" << std::endl); - + PROGRESS("\tMain loop\t"); - + StatsCounter counterXYZ; - + for (hsize_t c = 0; c < depth; c++) { PROGRESS_DECIMATED(c, channelProgressStride, "|"); // read one channel @@ -73,80 +73,80 @@ void SlowConverter::copyAndCalculate() { DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); readFitsData(inputFilePtr, c, s, cubeSize, standardCube); - + // Write the standard dataset - + DEBUG(std::cout << " Writing main dataset..." << std::flush;); TIMER(timer.start("Write");); - + std::vector start = trimAxes({s, c, 0, 0}, N); // writeHdf5Data(standardDataSet, standardCube, memDims, count, start); - + DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;); TIMER(timer.start(timerLabelStatsMipmaps);); StatsCounter counterXY; auto indexXY = c; std::function accumulate; - + auto lazy_accumulate = [&] (float val) { counterXY.accumulateFiniteLazy(val); }; - + auto first_accumulate = [&] (float val) { counterXY.accumulateFiniteLazyFirst(val); accumulate = lazy_accumulate; }; - + accumulate = first_accumulate; - + for (hsize_t y = 0; y < height; y++) { for (hsize_t x = 0; x < width; x++) { auto pos = y * width + x; // relative to channel slice auto& val = standardCube[pos]; - + if (std::isfinite(val)) { // XY statistics accumulate(val); - + // Accumulate mipmaps mipMaps.accumulate(val, x, y, 0); - + } else { counterXY.accumulateNonFinite(); } } } // end of XY loop - + // Final correction of XY min and max DEBUG(std::cout << " Final XY stats..." << std::flush;); statsXY.copyStatsFromCounter(indexXY, height * width, counterXY); - + // Accumulate XYZ statistics if (depth > 1) { DEBUG(std::cout << " Accumulating XYZ stats..." << std::flush;); statsXY.accumulateStatsToCounter(counterXYZ, indexXY); } - + // Final mipmap calculation DEBUG(std::cout << " Final mipmaps..." << std::flush;); mipMaps.calculate(); - - + + // Write the mipmaps DEBUG(std::cout << " Writing mipmaps..." << std::flush;); TIMER(timer.start("Write");); - mipMaps.write(s, c); - + mipMaps.write(H5outputfile, s, c); + // Reset mipmaps before next channel DEBUG(std::cout << " Resetting mipmap objects..." << std::endl;); TIMER(timer.start(timerLabelStatsMipmaps);); mipMaps.resetBuffers(); - + } // end of first channel loop - + PROGRESS(std::endl); - + if (depth > 1) { // Final correction of XYZ min and max DEBUG(std::cout << " Final XYZ stats..." << std::flush;); @@ -154,81 +154,81 @@ void SlowConverter::copyAndCalculate() { TIMER(timer.start(timerLabelStatsMipmaps);); statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ); } - + // XY and XYZ histograms // We need a second pass over all channels because we need cube min and max (and channel min and max per channel) // We do the second pass backwards to take advantage of caching DEBUG(std::cout << " Histograms..." << std::endl;); PROGRESS("\tHistograms\t"); TIMER(timer.start("Histograms");); - + double cubeMin; double cubeMax; double cubeRange; bool cubeHist(false); - + if (depth > 1) { cubeMin = statsXYZ.minVals[0]; cubeMax = statsXYZ.maxVals[0]; cubeRange = cubeMax - cubeMin; cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0; } - + statsXY.clearHistogramBuffers(); statsXYZ.clearHistogramBuffers(); - + DEBUG(std::cout << "+ Will " << (cubeHist ? "" : "not ") << "calculate cube histogram." << std::endl;); - + for (hsize_t c = depth; c-- > 0; ) { DEBUG(std::cout << "+ Processing channel " << c << "... " << std::flush;); PROGRESS_DECIMATED(c, channelProgressStride, "|"); auto indexXY = c; - + double chanMin = statsXY.minVals[indexXY]; double chanMax = statsXY.maxVals[indexXY]; double chanRange = chanMax - chanMin; - + bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0); DEBUG(std::cout << " Will " << (chanHist ? "" : "not ") << "calculate channel histogram." << std::flush;); - + if (!chanHist && !cubeHist) { continue; } - + auto doChannelHistogram = [&] (float val) { // XY histogram statsXY.accumulateHistogram(val, chanMin, chanRange, c); }; - + auto doCubeHistogram = [&] (float val) { // XYZ histogram statsXYZ.accumulateHistogram(val, cubeMin, cubeRange, 0); }; - + auto doNothing = [&] (float val) { UNUSED(val); }; - + std::function channelHistogramFunc = doChannelHistogram; std::function cubeHistogramFunc = doCubeHistogram; - + if (!chanHist) { channelHistogramFunc = doNothing; } - + if (!cubeHist) { cubeHistogramFunc = doNothing; } - + // read one channel DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); - + readFitsData(inputFilePtr, c, s, cubeSize, standardCube); DEBUG(std::cout << " Calculating histogram(s)..." << std::endl;); TIMER(timer.start("Histograms");); - + for (hsize_t p = 0; p < width * height; p++) { auto& val = standardCube[p]; if (std::isfinite(val)) { @@ -237,94 +237,94 @@ void SlowConverter::copyAndCalculate() { } } // end of XY loop } // end of second channel loop (XY and XYZ histograms) - + PROGRESS(std::endl); - + // Write the statistics TIMER(timer.start("Write");); PROGRESS("\tWrite stats & mipmaps" << std::endl); - - statsXY.write({1, depth}, {s, 0}); - + + statsXY.write(H5outputfile, {1, depth}, {s, 0}); + if (depth > 1) { - statsXYZ.write({1}, {s}); + statsXYZ.write(H5outputfile, {1}, {s}); } - + } // end of stokes - + // Free memory DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;); TIMER(timer.start("Free");); - + delete[] standardCube; - + // Swizzle if (depth > 1) { DEBUG(std::cout << "Performing tiled rotation." << std::endl;); PROGRESS("Tiled rotation & Z stats" << std::endl); TIMER(timer.start("Allocate");); - + hsize_t sliceSize = product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N)); float* standardSlice = new float[sliceSize]; float* rotatedSlice = new float[sliceSize]; - + statsZ.createBuffers({TILE_SIZE, TILE_SIZE}); - + for (unsigned int s = 0; s < stokes; s++) { DEBUG(std::cout << "Processing Stokes " << s << "..." << std::endl;); PROGRESS("\tStokes " << s << "\t"); - + hsize_t tileCount(0); - + for (hsize_t xOffset = 0; xOffset < width; xOffset += TILE_SIZE) { for (hsize_t yOffset = 0; yOffset < height; yOffset += TILE_SIZE) { tileCount++; hsize_t xSize = std::min(TILE_SIZE, width - xOffset); hsize_t ySize = std::min(TILE_SIZE, height - yOffset); - + DEBUG(std::cout << "+ Processing tile slice at " << xOffset << ", " << yOffset << "..." << std::flush;); PROGRESS_DECIMATED(tileCount, tileProgressStride, "#"); - + // read tile slice DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); - + auto standardMemDims = trimAxes({1, depth, ySize, xSize}, N); auto standardCount = trimAxes({1, depth, ySize, xSize}, N); auto standardStart = trimAxes({s, 0, yOffset, xOffset}, N); - + // readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart); - + // rotate tile slice DEBUG(std::cout << " Calculating rotation..." << std::flush;); TIMER(timer.start("Rotation");); - + for (hsize_t i = 0; i < depth; i++) { for (hsize_t j = 0; j < ySize; j++) { for (hsize_t k = 0; k < xSize; k++) { auto sourceIndex = k + xSize * j + (ySize * xSize) * i; auto& val = standardSlice[sourceIndex]; - + // rotation auto destIndex = i + depth * j + (ySize * depth) * k; rotatedSlice[destIndex] = val; } } } - - // A separate pass over the same slice depth-last + + // A separate pass over the same slice depth-last DEBUG(std::cout << " Calculating Z statistics..." << std::flush;); TIMER(timer.start("Z statistics");); - + for (hsize_t j = 0; j < ySize; j++) { for (hsize_t k = 0; k < xSize; k++) { StatsCounter counterZ; auto indexZ = k + xSize * j; - + for (hsize_t i = 0; i < depth; i++) { auto sourceIndex = k + xSize * j + (ySize * xSize) * i; auto& val = standardSlice[sourceIndex]; - + if (std::isfinite(val)) { // Not lazy; too much risk of encountering an ascending / descending sequence. counterZ.accumulateFinite(val); @@ -332,29 +332,29 @@ void SlowConverter::copyAndCalculate() { counterZ.accumulateNonFinite(); } } - + statsZ.copyStatsFromCounter(indexZ, depth, counterZ); } } - + // write tile slice DEBUG(std::cout << " Writing rotated dataset..." << std::endl;); TIMER(timer.start("Write");); - + auto swizzledMemDims = trimAxes({1, xSize, ySize, depth}, N); auto swizzledCount = trimAxes({1, xSize, ySize, depth}, N); auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N); - + // writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); - + DEBUG(std::cout << " Writing Z statistics..." << std::endl;); // write Z statistics - statsZ.write({ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset}); + statsZ.write(H5outputfile, {ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset}); } } PROGRESS(std::endl); } - + TIMER(timer.start("Free");); DEBUG(std::cout << "Freeing memory from main and rotated dataset slices... " << std::endl;); delete[] standardSlice; diff --git a/Stats.cc b/Stats.cc index f0ff618..a798e3b 100644 --- a/Stats.cc +++ b/Stats.cc @@ -1,5 +1,7 @@ #include "Stats.h" +Stats::Stats(){}; + Stats::Stats(const std::vector& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) { } @@ -66,30 +68,31 @@ void Stats::clearHistogramBuffers() { memset(partialHistograms, 0, sizeof(int64_t) * statsSize * numBins * partialHistMultiplier); } -void Stats::write() { - writeBasic(fullBasicBufferDims); +void Stats::write(H5OutputFile &H5outputfile) { + writeBasic(H5outputfile, fullBasicBufferDims); if (numBins) { - writeHistogram(fullBasicBufferDims); + writeHistogram(H5outputfile, fullBasicBufferDims); } } -void Stats::write(const std::vector& count, const std::vector& start) { - write(fullBasicBufferDims, count, start); +void Stats::write(H5OutputFile &H5outputfile, const std::vector& count, const std::vector& start) { + write(H5outputfile, fullBasicBufferDims, count, start); } -void Stats::write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::write(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { auto basicN = basicDatasetDims.size(); - writeBasic(basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN)); + writeBasic(H5outputfile, basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN)); if (numBins) { auto histN = basicN + 1; - writeHistogram(basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN)); + writeHistogram(H5outputfile, basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN)); } } // void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { -void Stats::writeBasic(H5OutputFile H5outputfile, std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) +{ // writeHdf5Data(minDset, minVals, basicBufferDims, count, start); // writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start); // writeHdf5Data(sumDset, sums, basicBufferDims, count, start); @@ -98,6 +101,6 @@ void Stats::writeBasic(H5OutputFile H5outputfile, std::vector& basicBuf } // void Stats::writeHistogram(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { -void Stats::writeHistogram(H5OutputFile H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { // writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); } diff --git a/Stats.h b/Stats.h index a244a9d..09b7528 100644 --- a/Stats.h +++ b/Stats.h @@ -106,11 +106,11 @@ struct Stats { } // Writing - void write(); - void write(const std::vector& count, const std::vector& start); - void write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start); - void writeBasic(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - void writeHistogram(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + void write(H5OutputFile &H5outputfile); + void write(H5OutputFile &H5outputfile, const std::vector& count, const std::vector& start); + void write(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start); + void writeBasic(H5OutputFile &H5outputfile, const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + void writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); // Dataset dimensions std::vector basicDatasetDims; From 3c793455f2d4e0417bcd0be929f1b7f8c7b78301 Mon Sep 17 00:00:00 2001 From: Pascal Date: Mon, 3 Aug 2020 11:59:26 +0800 Subject: [PATCH 04/13] Update to HDF5 Wrapper API Added in extra checks for creating groups, datasets, create links (which still needs work) and some code clean. Also updated dataset creation in Stats and MipMap. --- Converter.cc | 15 ++++++--- HDF5Wrapper.cc | 88 +++++++++++++++++++++++++++++++++++++++++++++++--- HDF5Wrapper.h | 27 +++++++++++++--- MipMap.cc | 14 ++++++-- MipMap.h | 4 +-- Stats.cc | 9 +++++- Stats.h | 12 +++---- 7 files changed, 142 insertions(+), 27 deletions(-) diff --git a/Converter.cc b/Converter.cc index b81a947..66767ae 100644 --- a/Converter.cc +++ b/Converter.cc @@ -74,7 +74,7 @@ void Converter::convert() { // outputGroup = outputFile.createGroup("0"); H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); - std::string parentpath("0"); + std::string parentpath("0/"); hid_t gid = H5outputfile.create_group(parentpath); std::vector chunkDims; @@ -85,19 +85,24 @@ void Converter::convert() { // H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); // floatType.setOrder(H5T_ORDER_LE); // createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); -//???? - - + H5outputfile.create_dataset(parentpath, H5T_NATIVE_FLOAT, + standardDims, chunkDims); // statsXY.createDatasets(outputGroup, "XY"); + statsXY.createDatasets(H5outputfile, parentpath, "XY"); if (depth > 1) { // statsXYZ.createDatasets(outputGroup, "XYZ"); // statsZ.createDatasets(outputGroup, "Z"); - // auto swizzledGroup = outputGroup.createGroup("SwizzledData"); // We use this name in papers because it sounds more serious. :) // outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); // createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); + statsXYZ.createDatasets(H5outputfile, parentpath, "XYZ"); + statsZ.createDatasets(H5outputfile, parentpath, "Z"); + auto swizzledGroup = H5outputfile.create_group("0/SwizzledData"); + // We use this name in papers because it sounds more serious. :) + H5outputfile.create_link("0/SwizzledData", "0/PermutedData"); + // ??? H5outputfile.create_dataset(); } /// mipMaps.createDatasets(outputGroup); diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index b358ee0..eac39d4 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -169,7 +169,7 @@ void H5OutputFile::get_attribute(std::vector &ids, const std::string &nam _get_attribute(ids, parts); } -/// get an attribute going to list of hids +/// get a dataset going to list of hids void H5OutputFile::_get_dataset(std::vector &ids, const std::string dset_name) { auto dset_id = H5Dopen(ids.back(), dset_name.c_str(), H5P_DEFAULT); @@ -205,13 +205,75 @@ void H5OutputFile::_get_dataset(std::vector &ids, const std::vector &ids, const std::string &name) { auto parts = _tokenize(name); _get_dataset(ids, parts); } +/// get a hdf5 id going to list of hids +void H5OutputFile::_get_hdf5_id(std::vector &ids, const std::string name) +{ + auto id = H5Oopen(ids.back(), name.c_str(), H5P_DEFAULT); + if (id < 0) { + throw std::invalid_argument(std::string("hdf5 object not found ") + name); + } + ids.push_back(id); +} +/// get hdf5 ids parsing list of ids and vector of strings +void H5OutputFile::_get_hdf5_id(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_hdf5_id(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, H5P_DEFAULT); + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else { + throw std::runtime_error("Incorrect path, encountered something other than group"); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_dataset(ids, subparts); + } +} + +/// get hdf5 id in file, storing relevant ids in vector +void H5OutputFile::get_hdf5_id(std::vector &ids, const std::string &name) +{ + auto parts = _tokenize(name); + _get_hdf5_id(ids, parts); +} +hid_t H5OutputFile::get_hdf5_id(std::string path, std::string name, bool closeids) +{ + return get_hdf5_id(path+name, closeids); +} +hid_t H5OutputFile::get_hdf5_id(std::string fullname, bool closeids) +{ + hid_t id; + std::vector ids; + try { + get_hdf5_id(ids, fullname); + id = ids.back(); + } + catch (const std::invalid_argument& ia) + { + id = -1; + } + ///\todo question of whether I should close the objects that have been opened + if (closeids) close_hdf_ids(ids); + return id; +} /// close open hids stored in vector void H5OutputFile::close_hdf_ids(std::vector &ids) @@ -234,7 +296,7 @@ void H5OutputFile::close_path(std::string path) { auto parts = _tokenize(path); std::vector ids; - // _get_path(ids, parts); + _get_hdf5_id(ids, parts); close_hdf_ids(ids); } @@ -302,7 +364,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string & /// create a dataset hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, - std::vector dims, const std::vector& chunkDims, + std::vector dims, std::vector chunkDims, bool flag_parallel, bool flag_hyperslab, bool flag_collective) { #ifdef USEPARALLELHDF @@ -424,6 +486,24 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, return dset_id; } +/// create a link +herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool ihard) { + std::vector orgids; + hid_t orgid = get_hdf5_id(orgname); + hid_t linkid; + + if (H5Lexists(linkid, linkname.c_str(), H5P_DEFAULT)) { + throw std::runtime_error("Link already exists, cannot create new link"); + } + if (ihard) { + return H5Lcreate_hard(orgid, orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT); + } + else { + return H5Lcreate_soft(orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT); + } +} + + void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective) { #ifdef USEPARALLELHDF diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index 80fb846..4ad1f55 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -65,10 +65,12 @@ class H5OutputFile /// tokenize a path given an input string std::vector _tokenize(const std::string &s); + /// get attribute id void _get_attribute(std::vector &ids, const std::string attr_name); /// get attribute id from tokenized string void _get_attribute(std::vector &ids, const std::vector &parts); + /// wrapper for reading scalar template void _do_read(const hid_t &attr, const hid_t &type, T &val) { @@ -94,9 +96,14 @@ class H5OutputFile /// get dataset id void _get_dataset(std::vector &ids, const std::string dset_name); - /// get attribute id from tokenized string + /// get dataset id from tokenized string void _get_dataset(std::vector &ids, const std::vector &parts); + /// get dataset id + void _get_hdf5_id(std::vector &ids, const std::string name); + /// get attribute id from tokenized string + void _get_hdf5_id(std::vector &ids, const std::vector &parts); + public: // Constructor @@ -116,7 +123,11 @@ class H5OutputFile /// create a group hid_t create_group(std::string groupname) { - hid_t group_id = H5Gcreate(file_id, groupname.c_str(), + hid_t group_id; + if (H5Lexists(group_id, groupname.c_str(), H5P_DEFAULT)) { + throw std::invalid_argument("Group "+groupname+"already present, not creating group"); + } + group_id = H5Gcreate(file_id, groupname.c_str(), H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); return group_id; } @@ -180,14 +191,14 @@ class H5OutputFile } template hid_t create_dataset(std::string path, T&data, - std::vector dims, const std::vector& chunkDims, + std::vector dims, std::vector chunkDims = std::vector(0), bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { return create_dataset(path, hdf5_type(T{}), dims, chunkDims, flag_parallel, flag_hyperslab, flag_collective); } hid_t create_dataset(std::string path, hid_t datatype, - std::vector dims, const std::vector& chunkDims, + std::vector dims, std::vector chunkDims = std::vector(0), bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); /// close data set herr_t close_dataset(hid_t dset_id) { @@ -195,6 +206,14 @@ class H5OutputFile return status; } + /// find and hdf5 object + void get_hdf5_id(std::vector &ids, const std::string &name); + hid_t get_hdf5_id(std::string path, std::string name, bool closeids = true); + hid_t get_hdf5_id(std::string fullname, bool closeids = true); + + /// create a link + ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL + herr_t create_link(std::string orgname, std::string linkname, bool ihard = true); //write 1D data sets of string or input data with defined data type void write_dataset(std::string name, hsize_t len, std::string data, diff --git a/MipMap.cc b/MipMap.cc index 8708c12..9d9e61a 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -26,7 +26,15 @@ MipMap::~MipMap() { // // } -void MipMap::createDataset(H5OutputFile &H5outputfile, hid_t gid, const std::vector& chunkDims) { +void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const std::vector& chunkDims) { + + std::ostringstream mipMapName; + mipMapName << "MipMaps/DATA/DATA_XY_" << mip; + if (useChunks(datasetDims)) { + H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims); + } else { + H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims); + } } void MipMap::createBuffers(std::vector& bufferDims) { @@ -100,9 +108,9 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto // // } -void MipMaps::createDatasets(H5OutputFile &H5outputfile, hid_t gid) { +void MipMaps::createDatasets(H5OutputFile &H5outputfile, std::string path) { for (auto& mipMap : mipMaps) { - mipMap.createDataset(H5outputfile, gid, chunkDims); + mipMap.createDataset(H5outputfile, path, chunkDims); } } diff --git a/MipMap.h b/MipMap.h index b0e776f..d4835de 100644 --- a/MipMap.h +++ b/MipMap.h @@ -12,7 +12,7 @@ struct MipMap { ~MipMap(); // void createDataset(H5::Group group, const std::vector& chunkDims); - void createDataset(H5OutputFile &h5outputfile, hid_t gid, const std::vector& chunkDims); + void createDataset(H5OutputFile &h5outputfile, std::string path, const std::vector& chunkDims); void createBuffers(std::vector& bufferDims); void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { @@ -61,7 +61,7 @@ struct MipMaps { static hsize_t size(const std::vector& standardDims, const std::vector& standardBufferDims); // void createDatasets(H5::Group group); - void createDatasets(H5OutputFile &H5outputfile, hid_t gid); + void createDatasets(H5OutputFile &H5outputfile, std::string path); void createBuffers(const std::vector& standardBufferDims); void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { diff --git a/Stats.cc b/Stats.cc index a798e3b..4774221 100644 --- a/Stats.cc +++ b/Stats.cc @@ -25,7 +25,7 @@ hsize_t Stats::size(std::vector dims, hsize_t numBins, hsize_t partialH } // void Stats::createDatasets(H5::Group group, std::string name) { -void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name) { +void Stats::createDatasets(H5OutputFile &H5outputfile, std::string pathname, std::string name) { // H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); // floatType.setOrder(H5T_ORDER_LE); @@ -43,6 +43,13 @@ void Stats::createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string na // createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins})); // } + for (auto i = 0; i dims, hsize_t partialHistMultiplier) { diff --git a/Stats.h b/Stats.h index 09b7528..6a33002 100644 --- a/Stats.h +++ b/Stats.h @@ -53,7 +53,7 @@ struct Stats { // Setup //void createDatasets(H5::Group group, std::string name); - void createDatasets(H5OutputFile &H5outputfile, hid_t gid, std::string name); + void createDatasets(H5OutputFile &H5outputfile, std::string path, std::string name); void createBuffers(std::vector dims, hsize_t partialHistMultiplier = 0); // Basic stats @@ -117,13 +117,9 @@ struct Stats { hsize_t numBins; // Datasets -// H5::DataSet minDset; -// H5::DataSet maxDset; -// H5::DataSet sumDset; -// H5::DataSet ssqDset; -// H5::DataSet nanDset; -// -// H5::DataSet histDset; + std::vector datasetnames = {"/MIN", "/MAX", "/SUM", "/SUM_SQ", "/NAN_COUNT"}; + std::vector datasettypes = {H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_LLONG}; + std::vector datasetids; hid_t minDset; hid_t maxDset; From de48d807f6b264f9d9df35de78957ebd97976238 Mon Sep 17 00:00:00 2001 From: Pascal Date: Mon, 3 Aug 2020 15:17:17 +0800 Subject: [PATCH 05/13] Update to HDF5 Wrapper API Update to HDF5 wrapper, some code clean up and as well, added CFITSIO cmake check --- CMakeLists.txt | 3 +- Converter.cc | 17 +- Converter.h | 2 + FastConverter.cc | 4 + HDF5Wrapper.cc | 338 +++++++++++++++++++++++++++------------- HDF5Wrapper.h | 299 ++++++++++++++++++++++++++--------- MipMap.cc | 5 +- README.md | 2 +- SlowConverter.cc | 3 + Stats.cc | 15 ++ cmake/FindCFITSIO.cmake | 74 +++++++++ 11 files changed, 572 insertions(+), 190 deletions(-) create mode 100644 cmake/FindCFITSIO.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index b8764ca..9179d76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.13) project(hdf_convert) set(default_build_type "Release") - +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_CXX_STANDARD 11) # macro(find_hdf5) @@ -27,6 +27,7 @@ set(CMAKE_CXX_STANDARD 11) # endif() # endmacro() +FIND_PACKAGE(CFITSIO 3.000 REQUIRED) FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C) INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR}) find_package(OpenMP) diff --git a/Converter.cc b/Converter.cc index 66767ae..7aaa0be 100644 --- a/Converter.cc +++ b/Converter.cc @@ -74,7 +74,7 @@ void Converter::convert() { // outputGroup = outputFile.createGroup("0"); H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); - std::string parentpath("0/"); + std::string parentpath("0"); hid_t gid = H5outputfile.create_group(parentpath); std::vector chunkDims; @@ -85,9 +85,10 @@ void Converter::convert() { // H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); // floatType.setOrder(H5T_ORDER_LE); // createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); - H5outputfile.create_dataset(parentpath, H5T_NATIVE_FLOAT, - standardDims, chunkDims); // statsXY.createDatasets(outputGroup, "XY"); + standardDataSet = parentpath + std::string("/DATA"); + standardDataSetID = H5outputfile.create_dataset(standardDataSet, H5T_NATIVE_FLOAT, + standardDims, chunkDims); statsXY.createDatasets(H5outputfile, parentpath, "XY"); if (depth > 1) { @@ -99,13 +100,17 @@ void Converter::convert() { // createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); statsXYZ.createDatasets(H5outputfile, parentpath, "XYZ"); statsZ.createDatasets(H5outputfile, parentpath, "Z"); - auto swizzledGroup = H5outputfile.create_group("0/SwizzledData"); + std::string swizzledpath = parentpath + "/SwizzledData"; + std::string linkpath = parentpath + "/PermutedData"; + auto swizzledGroup = H5outputfile.create_group(swizzledpath); // We use this name in papers because it sounds more serious. :) - H5outputfile.create_link("0/SwizzledData", "0/PermutedData"); - // ??? H5outputfile.create_dataset(); + H5outputfile.create_link(swizzledpath, linkpath); + swizzledDataSet = swizzledpath + "/" + swizzledName; + swizzledDataSetID = H5outputfile.create_dataset(swizzledDataSet, H5T_NATIVE_FLOAT, swizzledDims); } /// mipMaps.createDatasets(outputGroup); + mipMaps.createDatasets(H5outputfile, parentpath); // COPY HEADERS diff --git a/Converter.h b/Converter.h index d09dafe..492d9c5 100644 --- a/Converter.h +++ b/Converter.h @@ -35,6 +35,8 @@ class Converter { // H5::Group outputGroup; // H5::DataSet standardDataSet; // H5::DataSet swizzledDataSet; + std::string standardDataSet, swizzledDataSet; + hid_t standardDataSetID, swizzledDataSetID; float* standardCube; diff --git a/FastConverter.cc b/FastConverter.cc index f4625c7..7c41454 100644 --- a/FastConverter.cc +++ b/FastConverter.cc @@ -255,6 +255,8 @@ void FastConverter::copyAndCalculate() { std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({currentStokes, 0, 0, 0}, N); // writeHdf5Data(standardDataSet, standardCube, memDims, count, start); + H5outputfile.write_dataset_nd(standardDataSet, + memDims, standardCube, count, start); if (depth > 1) { // This all technically worked if we reused the standard filespace and memspace @@ -262,6 +264,8 @@ void FastConverter::copyAndCalculate() { std::vector swizzledCount = trimAxes({1, width, height, depth}, N); std::vector swizzledMemDims = {width, height, depth}; // writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start); + H5outputfile.write_dataset_nd(swizzledDataSet, + swizzledMemDims, rotatedCube, swizzledCount, start); } // After writing and before mipmaps, we free the swizzled memory. We allocate it again next Stokes. diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index eac39d4..ca1b3ae 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -106,6 +106,83 @@ void H5OutputFile::close() #endif } +#ifdef USEPARALLELHDF +void H5OutputFile::_set_mpi_dim_and_offset(MPI_Comm &comm, + hsize_t rank, std::vector &dims, + std::vector &dims_single, + std::vector &dims_offset + std::vector &mpi_hdf_dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_first_dim_parallel + ) +{ + if (!flag_parallel) return; + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i &dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_hyperslab) +{ + if (flag_parallel) { + //then all threads create the same simple data space + //so the meta information is the same + if (flag_hyperslab) { + //allocate the space spanning the file + dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL); + //allocate the memory space + //allocate the memory space + memspace_id = H5Screate_simple(rank, dims.data(), NULL); + } + else { + dspace_id = H5Screate_simple(rank, dims.data(), NULL); + memspace_id = dspace_id; + } + } + else { + dspace_id = H5Screate_simple(rank, dims.data(), NULL); + memspace_id = dspace_id; + } +} + +void H5OutputFile::_set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, + hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector dims, std::vector dims_offset, + bool flag_parallel, bool flag_collective, bool flag_hyperslab) +{ + if (!flag_parallel) return; + // set up the collective transfer properties list + prop_id = H5Pcreate(H5P_DATASET_XFER); + //if all tasks are participating in the writes + if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT); + if (flag_hyperslab) { + H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL); + if (dims[0] == 0) { + H5Sselect_none(dspace_id); + H5Sselect_none(memspace_id); + } + } + iwrite = (mpi_hdf_dims_tot[0] > 0); +} + +#endif + std::vector H5OutputFile::_tokenize(const std::string &s) { std::string delims("/"); @@ -363,7 +440,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string & } /// create a dataset -hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, +hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id, std::vector dims, std::vector chunkDims, bool flag_parallel, bool flag_hyperslab, bool flag_collective) { @@ -371,7 +448,7 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, MPI_Comm comm = mpi_comm_write; MPI_Info info = MPI_INFO_NULL; #endif - auto splitPath = _tokenize(path); + auto splitPath = _tokenize(fullname); auto dsetname = splitPath.back(); splitPath.pop_back(); hid_t curr_id = file_id; @@ -393,26 +470,8 @@ hid_t H5OutputFile::create_dataset(std::string path, hid_t type_id, } #ifdef USEPARALLELHDF - /// will really have to update for MPI std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); - if (flag_parallel) { - //if parallel hdf5 get the full extent of the data - //this bit of code communicating information can probably be done elsewhere - //minimize number of mpi communications - for (auto i=0;i 0) continue; - for (auto j=1;j<=ThisWriteTask;j++) { - dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; - } - } - if (flag_first_dim_parallel && rank > 1) { - for (auto i=1; i mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); - //if parallel hdf5 get the full extent of the data - //this bit of code communicating information can probably be done elsewhere - //minimize number of mpi communications - if (flag_parallel) { - //if parallel hdf5 get the full extent of the data - //this bit of code communicating information can probably be done elsewhere - //minimize number of mpi communications - for (auto i=0;i 0) continue; - for (auto j=1;j<=ThisWriteTask;j++) { - dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; - } - } - if (flag_first_dim_parallel && rank > 1) { - for (auto i=1; i 0); +#ifdef USEPARALLELHDF + _set_mpi_dataset_properties(prop_id, iwrite, + dspace_id, memspace_id, + rank, dims, dims_offset, + flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); #ifdef USEPARALLELHDF - if (flag_parallel) { - // set up the collective transfer properties list - prop_id = H5Pcreate(H5P_DATASET_XFER); - //if all tasks are participating in the writes - if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE); - else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT); - if (flag_hyperslab) { - H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL); - if (dims[0] == 0) { - H5Sselect_none(dspace_id); - H5Sselect_none(memspace_id); - } + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} +/// Write data set with hyperslab set by count and start +void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + write_dataset_nd(name, rank, dims, data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); +} + +///\todo need to update this to general mpi hyper slab selection +void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + // Get HDF5 data type of the array in memory + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + int nonzero_size = 1; + for(int i=0; i 0) { - // Write the data - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + else { + if(dims[i]==0) nonzero_size = 0; } +#else + if(dims[i]==0) nonzero_size = 0; +#endif } - else if (dims[0] > 0) + // Only chunk datasets where we would have >1 chunk + unsigned int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif } - + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + for(auto i=0; i 0); + + //set hyperslab + if (!count.empty() && !start.empty()) { + herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + } + +#ifdef USEPARALLELHDF + // _set_mpi_dataset_properties(prop_id, iwrite, + // dspace_id, memspace_id, + // rank, dims, dims_offset, + // flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); } -#endif // Clean up (note that dtype_id is NOT a new object so don't need to close it) H5Pclose(prop_id); #ifdef USEPARALLELHDF - if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); + // if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); #endif H5Sclose(dspace_id); H5Dclose(dset_id); diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index 4ad1f55..c21a29c 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -63,6 +63,27 @@ class H5OutputFile abort(); } + /// parallel hdf5 mpi routines +#ifdef USEPARALLELHDF + void _set_mpi_dim_and_offset(MPI_Comm &comm, + hsize_t rank, std::vector &dims, + std::vector &dims_single, + std::vector &dims_offset, + std::vector &mpi_hdf_dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_first_dim_parallel + ); + void _set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector &dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_hyperslab); + + void _set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, + hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector dims, std::vector dims_offset, + bool flag_parallel, bool flag_collective, bool flag_hyperslab) +#endif + /// tokenize a path given an input string std::vector _tokenize(const std::string &s); @@ -190,16 +211,31 @@ class H5OutputFile return dset_id; } - template hid_t create_dataset(std::string path, T&data, - std::vector dims, std::vector chunkDims = std::vector(0), - bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + template hid_t create_dataset(std::string path, std::string name, T&data, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(path+"/"+name, hdf5_type(T{}), dims, chunkDims, + flag_parallel, flag_hyperslab, flag_collective); + } + hid_t create_dataset(std::string path, std::string name, hid_t datatype, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(path+"/"+name, datatype, dims, chunkDims, + flag_parallel, flag_hyperslab, flag_collective); + } + template hid_t create_dataset(std::string fullname, T&data, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { - return create_dataset(path, hdf5_type(T{}), dims, chunkDims, - flag_parallel, flag_hyperslab, flag_collective); + return create_dataset(fullname, hdf5_type(T{}), dims, chunkDims, + flag_parallel, flag_hyperslab, flag_collective); } - hid_t create_dataset(std::string path, hid_t datatype, + hid_t create_dataset(std::string fullname, hid_t datatype, std::vector dims, std::vector chunkDims = std::vector(0), bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + /// close data set herr_t close_dataset(hid_t dset_id) { herr_t status = H5Dclose(dset_id); @@ -215,13 +251,23 @@ class H5OutputFile ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL herr_t create_link(std::string orgname, std::string linkname, bool ihard = true); - //write 1D data sets of string or input data with defined data type + /// write 1D data sets of string or input data with defined data type + /// without hyperslab selection void write_dataset(std::string name, hsize_t len, std::string data, - bool flag_parallel = true, bool flag_collective = true); + bool flag_parallel = true, bool flag_collective = true); void write_dataset(std::string name, hsize_t len, void *data, - hid_t memtype_id=-1, hid_t filetype_id=-1, - bool flag_parallel = true, bool flag_first_dim_parallel = true, - bool flag_hyperslab = true, bool flag_collective = true); + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + /// with a hyperslab selection defined by count, start + void write_dataset(std::string name, hsize_t len, std::string data, + const std::vector& count, const std::vector& start, + bool flag_parallel = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. @@ -235,7 +281,6 @@ class H5OutputFile if (memtype_id == -1) memtype_id = hdf5_type(T{}); write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); } - /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, @@ -258,24 +303,7 @@ class H5OutputFile #ifdef USEPARALLELHDF std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); - if (flag_parallel) { - //if parallel hdf5 get the full extent of the data - //this bit of code communicating information can probably be done elsewhere - //minimize number of mpi communications - for (auto i=0;i 0) continue; - for (auto j=1;j<=ThisWriteTask;j++) { - dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; - } - } - if (flag_first_dim_parallel && rank > 1) { - for (auto i=1; i 0); +#ifdef USEPARALLELHDF + _set_mpi_dataset_properties(prop_id, iwrite, + dspace_id, memspace_id, + rank, dims, dims_offset, + flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); #ifdef USEPARALLELHDF - if (flag_parallel) { - // set up the collective transfer properties list - prop_id = H5Pcreate(H5P_DATASET_XFER); - //if all tasks are participating in the writes - if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE); - else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT); - if (flag_hyperslab) { - H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL); - if (dims[0] == 0) { - H5Sselect_none(dspace_id); - H5Sselect_none(memspace_id); - } + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); + } + + //write dataset with hyperslab selection + void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + /// template function so defined here + template void write_dataset(std::string name, hsize_t len, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, + count, start, memtype_id, filetype_id, + flag_parallel, flag_hyperslab, flag_collective); + } + template void write_dataset_nd(std::string name, std::vector dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_dataset_nd(name, dims.size(), dims.data(), data, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, std::vector dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_dataset_nd(name, dims.size(), dims.data(), data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + + // Get HDF5 data type of the array in memory + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + int nonzero_size = 1; + for(int i=0; i 0) { - // Write the data - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + else { + if(dims[i]==0) nonzero_size = 0; } +#else + if(dims[i]==0) nonzero_size = 0; +#endif } - else if (dims[0] > 0) + // Only chunk datasets where we would have >1 chunk + int large_dataset = 0; + for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } + else { + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; + } +#else + if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; +#endif + } + if(nonzero_size && large_dataset) + { +#ifdef USEPARALLELHDF + if (flag_parallel) { + for(auto i=0; i 0) { + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; +#endif + + // Dataset creation properties + prop_id = H5P_DEFAULT; +#ifdef USEHDFCOMPRESSION + // this defines compression + if(nonzero_size && large_dataset) + { + prop_id = H5Pcreate(H5P_DATASET_CREATE); + H5Pset_layout(prop_id, H5D_CHUNKED); + H5Pset_chunk(prop_id, rank, chunks.data()); + H5Pset_deflate(prop_id, HDFDEFLATE); + } +#endif + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); + H5Pclose(prop_id); + + prop_id = H5P_DEFAULT; + bool iwrite = (dims[0] > 0); + if (!count.empty() && !start.empty()) { + herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + } +#ifdef USEPARALLELHDF + // _set_mpi_dataset_properties(prop_id, iwrite, + // dspace_id, memspace_id, + // rank, dims, dims_offset, + // flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); } -#endif // Clean up (note that dtype_id is NOT a new object so don't need to close it) H5Pclose(prop_id); @@ -411,9 +563,10 @@ class H5OutputFile #endif H5Sclose(dspace_id); H5Dclose(dset_id); - } + } void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); diff --git a/MipMap.cc b/MipMap.cc index 9d9e61a..0ae092a 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -29,7 +29,7 @@ MipMap::~MipMap() { void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const std::vector& chunkDims) { std::ostringstream mipMapName; - mipMapName << "MipMaps/DATA/DATA_XY_" << mip; + mipMapName << "/MipMaps/DATA/DATA_XY_" << mip; if (useChunks(datasetDims)) { H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims); } else { @@ -61,7 +61,8 @@ void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t cha std::vector start = trimAxes({stokesOffset, channelOffset, 0, 0}, N); // writeHdf5Data(dataset, vals, bufferDims, count, start); - //??? + ///\todo need to figure out if I use the name or the id from which I get the name + // H5outputfile.write_dataset_nd(dataset, bufferDims, vals, count, start); } void MipMap::resetBuffers() { diff --git a/README.md b/README.md index 9017529..bf1a1c9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ C++ implementation of FITS to IDIA-HDF5 converter, optimised using OpenMP ## Installation -Dependencies: CFITSIO, HDF5, HDF5 C++ bindings +Dependencies: CFITSIO, HDF5 To install: diff --git a/SlowConverter.cc b/SlowConverter.cc index 7cd0332..7cd8626 100644 --- a/SlowConverter.cc +++ b/SlowConverter.cc @@ -81,6 +81,8 @@ void SlowConverter::copyAndCalculate() { std::vector start = trimAxes({s, c, 0, 0}, N); // writeHdf5Data(standardDataSet, standardCube, memDims, count, start); + H5outputfile.write_dataset_nd(standardDataSet, + memDims.size(), memDims.data(), standardCube, count, start); DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;); TIMER(timer.start(timerLabelStatsMipmaps);); @@ -346,6 +348,7 @@ void SlowConverter::copyAndCalculate() { auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N); // writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); + H5outputfile.write_dataset_nd(swizzledDataSet, swizzledMemDims, rotatedSlice, swizzledCount, swizzledStart); DEBUG(std::cout << " Writing Z statistics..." << std::endl;); // write Z statistics diff --git a/Stats.cc b/Stats.cc index 4774221..7c1ef68 100644 --- a/Stats.cc +++ b/Stats.cc @@ -105,9 +105,24 @@ void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector& b // writeHdf5Data(sumDset, sums, basicBufferDims, count, start); // writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); // writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); + + + ///\todo need to get the names of the ids stored in datasetids + ///or do I just use the names? + //??? + for (auto i = 0; i& basicBufferDims, const std::vector& count, const std::vector& start) { void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { // writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); + //??? + // H5outputfile.write_dataset_nd(histDset, extend(basicBufferDims, {numBins}, histograms, count, start); } diff --git a/cmake/FindCFITSIO.cmake b/cmake/FindCFITSIO.cmake new file mode 100644 index 0000000..a749701 --- /dev/null +++ b/cmake/FindCFITSIO.cmake @@ -0,0 +1,74 @@ +# - Try to find CFITSIO. +# Variables used by this module: +# CFITSIO_ROOT_DIR - CFITSIO root directory +# Variables defined by this module: +# CFITSIO_FOUND - system has CFITSIO +# CFITSIO_INCLUDE_DIR - the CFITSIO include directory (cached) +# CFITSIO_INCLUDE_DIRS - the CFITSIO include directories +# (identical to CFITSIO_INCLUDE_DIR) +# CFITSIO_LIBRARY - the CFITSIO library (cached) +# CFITSIO_LIBRARIES - the CFITSIO libraries +# (identical to CFITSIO_LIBRARY) +# CFITSIO_VERSION_STRING the found version of CFITSIO, padded to 3 digits + +# Copyright (C) 2009 +# ASTRON (Netherlands Institute for Radio Astronomy) +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see . +# +# $Id$ + +if(NOT CFITSIO_FOUND) + + find_path(CFITSIO_INCLUDE_DIR fitsio.h + HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES include include/cfitsio + include/libcfitsio0) + + if(CFITSIO_INCLUDE_DIR) + FILE(READ "${CFITSIO_INCLUDE_DIR}/fitsio.h" CFITSIO_H) + set(CFITSIO_VERSION_REGEX ".*#define CFITSIO_VERSION[^0-9]*([0-9]+)\\.([0-9]+).*") + if ("${CFITSIO_H}" MATCHES ${CFITSIO_VERSION_REGEX}) + # Pad CFITSIO minor version to three digit because 3.181 is older than 3.35 + STRING(REGEX REPLACE ${CFITSIO_VERSION_REGEX} + "\\1.\\200" CFITSIO_VERSION_STRING "${CFITSIO_H}") + STRING(SUBSTRING ${CFITSIO_VERSION_STRING} 0 5 CFITSIO_VERSION_STRING) + STRING(REGEX REPLACE "^([0-9]+)[.]([0-9]+)" "\\1" CFITSIO_VERSION_MAJOR ${CFITSIO_VERSION_STRING}) + # CFITSIO_VERSION_MINOR will contain 80 for 3.08, 181 for 3.181 and 200 for 3.2 + STRING(REGEX REPLACE "^([0-9]+)[.]0*([0-9]+)" "\\2" CFITSIO_VERSION_MINOR ${CFITSIO_VERSION_STRING}) + else () + set(CFITSIO_VERSION_STRING "Unknown") + endif() + endif(CFITSIO_INCLUDE_DIR) + + find_library(CFITSIO_LIBRARY cfitsio + HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES lib) + find_library(M_LIBRARY m) + mark_as_advanced(CFITSIO_INCLUDE_DIR CFITSIO_LIBRARY M_LIBRARY) + + if(CMAKE_VERSION VERSION_LESS "2.8.3") + find_package_handle_standard_args(CFITSIO DEFAULT_MSG + CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR) + else () + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(CFITSIO + REQUIRED_VARS CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR + VERSION_VAR CFITSIO_VERSION_STRING) + endif () + + set(CFITSIO_INCLUDE_DIRS ${CFITSIO_INCLUDE_DIR}) + set(CFITSIO_LIBRARIES ${CFITSIO_LIBRARY} ${M_LIBRARY}) + +endif(NOT CFITSIO_FOUND) From 2783620043781d93788b7397f9a802cc3d2d29a6 Mon Sep 17 00:00:00 2001 From: Pascal Date: Mon, 3 Aug 2020 16:13:43 +0800 Subject: [PATCH 06/13] Added some comments --- Converter.h | 8 ++++++++ HDF5Wrapper.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/Converter.h b/Converter.h index 492d9c5..a24f6d8 100644 --- a/Converter.h +++ b/Converter.h @@ -64,6 +64,10 @@ class Converter { }; +/*! +Converter class that makes use of OpenMP parallelisation +when calculating statistics and histograms +*/ class FastConverter : public Converter { public: FastConverter(std::string inputFileName, std::string outputFileName, bool progress); @@ -74,6 +78,10 @@ class FastConverter : public Converter { }; +/*! +Converter class that has minimmal memory overhead but has no parallelisation +when calculating statistics and histograms +*/ class SlowConverter : public Converter { public: SlowConverter(std::string inputFileName, std::string outputFileName, bool progress); diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index c21a29c..e769875 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -251,6 +251,10 @@ class H5OutputFile ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL herr_t create_link(std::string orgname, std::string linkname, bool ihard = true); + ///\todo add a write_to_dataset which is similar to below but provide the + ///dataset id. Does require storing the data space somewhere to close + ///it elsewhere. + /// write 1D data sets of string or input data with defined data type /// without hyperslab selection void write_dataset(std::string name, hsize_t len, std::string data, From 3e753dc8c16e1aa7e2f788d80cbabed41e005d41 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 10:28:44 +0800 Subject: [PATCH 07/13] Update to HDF5 Wrapper --- HDF5Wrapper.cc | 248 +++++++++++++++-------------------- HDF5Wrapper.h | 350 +++++++++++++++++++++---------------------------- 2 files changed, 256 insertions(+), 342 deletions(-) diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index ca1b3ae..c65f77c 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -183,6 +183,73 @@ void H5OutputFile::_set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, #endif +void H5OutputFile::_set_chunks(std::vector &chunks, + int rank, hsize_t *dims, +#ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, +#endif + bool flag_parallel +) +{ + // check if data set has non-zero size and if it large + auto nonzero_size = true; + auto large_dataset = true; + for(auto i=0; i &chunks) +{ +#ifdef USEHDFCOMPRESSION + if (chunks.empty()) return H5P_DEFAULT; + hid_t prop_id = H5Pcreate(H5P_DATASET_CREATE); + H5Pset_layout(prop_id, H5D_CHUNKED); + H5Pset_chunk(prop_id, rank, chunks.data()); + H5Pset_deflate(prop_id, HDFDEFLATE); +#else + return H5P_DEFAULT; +#endif +} + std::vector H5OutputFile::_tokenize(const std::string &s) { std::string delims("/"); @@ -442,6 +509,7 @@ bool H5OutputFile::exists_dataset(const std::string &parent, const std::string & /// create a dataset hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id, std::vector dims, std::vector chunkDims, + bool flag_closedataset, bool flag_parallel, bool flag_hyperslab, bool flag_collective) { #ifdef USEPARALLELHDF @@ -455,57 +523,31 @@ hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id, hid_t memspace_id, dspace_id, dset_id, prop_id = H5P_DEFAULT; auto rank = dims.size(); std::vector chunks = chunkDims; + std::vector ids; - /// not certain what to do with the open groups here. for (auto& groupname : splitPath) { - if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0) - { - curr_id = create_group(groupname); - } - else - { - curr_id = open_group(groupname); - } + if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0) curr_id = create_group(groupname); + else curr_id = open_group(groupname); + ids.push_back(curr_id); } #ifdef USEPARALLELHDF std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); - if (flag_parallel) _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_first_dim_parallel); + _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); #endif // Determine if going to compress data in chunks // Only chunk non-zero size datasets - int nonzero_size = 1; - for(int i=0; i1 chunk - if(nonzero_size && !chunks.empty()) - { -#ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i1 chunk - unsigned int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } - else { - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } -#else - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; -#endif - } - if(nonzero_size && large_dataset) - { -#ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i1 chunk - unsigned int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } - else { - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } -#else - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; -#endif - } - if(nonzero_size && large_dataset) - { + _set_chunks(chunks, rank, dims, #ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i &chunks, + int rank, hsize_t *dims, + #ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, + #endif + bool flag_parallel + ); + void _set_chunks(std::vector &chunks, + std::vector &dims, +#ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, +#endif + bool flag_parallel + ) + { + _set_chunks(chunks, dims.size(), dims.data(), +#ifdef USEPARALLELHDF + mpi_hdf_dims_tot, +#endif + flag_parallel + ); + } + hid_t _set_compression(int rank, std::vector &chunks); + /// tokenize a path given an input string std::vector _tokenize(const std::string &s); @@ -188,9 +213,9 @@ class H5OutputFile hid_t dspace_id = H5Screate_simple(rank, dims, NULL); return dspace_id; } - /// close data set + /// close data space herr_t close_dataspace(hid_t dspace_id) { - herr_t status = H5Dclose(dspace_id); + herr_t status = H5Sclose(dspace_id); return status; } @@ -210,30 +235,38 @@ class H5OutputFile H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); return dset_id; } - + /// create data set and return hid + /// and possibly close all groups, data spaces and data set itself after creation template hid_t create_dataset(std::string path, std::string name, T&data, std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { return create_dataset(path+"/"+name, hdf5_type(T{}), dims, chunkDims, + flag_closedataset, flag_parallel, flag_hyperslab, flag_collective); } hid_t create_dataset(std::string path, std::string name, hid_t datatype, std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { return create_dataset(path+"/"+name, datatype, dims, chunkDims, + flag_closedataset, flag_parallel, flag_hyperslab, flag_collective); } template hid_t create_dataset(std::string fullname, T&data, std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { return create_dataset(fullname, hdf5_type(T{}), dims, chunkDims, + flag_closedataset, flag_parallel, flag_hyperslab, flag_collective); } hid_t create_dataset(std::string fullname, hid_t datatype, std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); /// close data set @@ -251,164 +284,140 @@ class H5OutputFile ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL herr_t create_link(std::string orgname, std::string linkname, bool ihard = true); - ///\todo add a write_to_dataset which is similar to below but provide the - ///dataset id. Does require storing the data space somewhere to close - ///it elsewhere. - - /// write 1D data sets of string or input data with defined data type - /// without hyperslab selection - void write_dataset(std::string name, hsize_t len, std::string data, + /// writes to an existing data set + /// with a hyperslab selection defined by count, start + void write_to_dataset(std::string name, hsize_t len, std::string data, + const std::vector& count, const std::vector& start, bool flag_parallel = true, bool flag_collective = true); - void write_dataset(std::string name, hsize_t len, void *data, + void write_to_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); - /// with a hyperslab selection defined by count, start + + /// write 1D data sets of string or input data with defined data type + /// without hyperslab selection, creates dataset, writes to it and closes void write_dataset(std::string name, hsize_t len, std::string data, - const std::vector& count, const std::vector& start, bool flag_parallel = true, bool flag_collective = true); void write_dataset(std::string name, hsize_t len, void *data, - const std::vector& count, const std::vector& start, hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); - - /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - /// template function so defined here - template void write_dataset(std::string name, hsize_t len, T *data, - hid_t memtype_id = -1, hid_t filetype_id=-1, - bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) - { - int rank = 1; - hsize_t dims[1] = {len}; - if (memtype_id == -1) memtype_id = hdf5_type(T{}); - write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); - } - /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of - /// the input data if not explicitly specified with the filetype_id parameter. - template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, - hid_t memtype_id = -1, hid_t filetype_id = -1, - bool flag_parallel = true, bool flag_first_dim_parallel = true, - bool flag_hyperslab = true, bool flag_collective = true) - { -#ifdef USEPARALLELHDF - MPI_Comm comm = mpi_comm_write; - MPI_Info info = MPI_INFO_NULL; -#endif - hid_t dspace_id, dset_id, prop_id, memspace_id, ret; - std::vector chunks(rank); - - // Get HDF5 data type of the array in memory - if (memtype_id == -1) memtype_id = hdf5_type(T{}); - - // Determine type of the dataset to create - if(filetype_id < 0) filetype_id = memtype_id; - -#ifdef USEPARALLELHDF - std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); - _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); -#endif - - // Determine if going to compress data in chunks - // Only chunk non-zero size datasets - int nonzero_size = 1; - for(int i=0; i1 chunk - int large_dataset = 0; - for(int i=0; i void write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) { -#ifdef USEPARALLELHDF - if (flag_parallel) { - if(mpi_hdf_dims_tot[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } - else { - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } -#else - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; -#endif + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); } - if(nonzero_size && large_dataset) + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) { -#ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i chunks; +// +// // Get HDF5 data type of the array in memory +// if (memtype_id == -1) memtype_id = hdf5_type(T{}); +// +// // Determine type of the dataset to create +// if(filetype_id < 0) filetype_id = memtype_id; +// +// #ifdef USEPARALLELHDF +// std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); +// _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +// #endif +// +// // Determine if going to compress data in chunks +// // Only chunk non-zero size datasets +// _set_chunks(chunks, rank, dims, +// #ifdef USEPARALLELHDF +// mpi_hdf_dims_tot, +// #endif +// flag_parallel +// ); +// +// // Create the dataspace +// #ifdef USEPARALLELHDF +// _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +// #else +// dspace_id = H5Screate_simple(rank, dims, NULL); +// memspace_id = dspace_id; +// #endif +// +// // Dataset creation properties +// prop_id = H5P_DEFAULT; +// #ifdef USEHDFCOMPRESSION +// prop_id = _set_compression(rank, chunks); +// #endif +// // Create the dataset +// dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, +// H5P_DEFAULT, prop_id, H5P_DEFAULT); +// if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); +// H5Pclose(prop_id); +// +// prop_id = H5P_DEFAULT; +// bool iwrite = (dims[0] > 0); +// #ifdef USEPARALLELHDF +// _set_mpi_dataset_properties(prop_id, iwrite, +// dspace_id, memspace_id, +// rank, dims, dims_offset, +// flag_parallel, flag_collective, flag_hyperslab); +// #endif +// if (iwrite) { +// ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); +// if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); +// } +// // Clean up (note that dtype_id is NOT a new object so don't need to close it) +// H5Pclose(prop_id); +// #ifdef USEPARALLELHDF +// if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +// #endif +// H5Sclose(dspace_id); +// H5Dclose(dset_id); } - - // Create the dataspace -#ifdef USEPARALLELHDF - _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); -#else - dspace_id = H5Screate_simple(rank, dims, NULL); - memspace_id = dspace_id; -#endif - - // Dataset creation properties - prop_id = H5P_DEFAULT; -#ifdef USEHDFCOMPRESSION - // this defines compression - if(nonzero_size && large_dataset) + template void write_dataset_nd(std::string name, std::vector dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) { - prop_id = H5Pcreate(H5P_DATASET_CREATE); - H5Pset_layout(prop_id, H5D_CHUNKED); - H5Pset_chunk(prop_id, rank, chunks.data()); - H5Pset_deflate(prop_id, HDFDEFLATE); - } -#endif - // Create the dataset - dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, - H5P_DEFAULT, prop_id, H5P_DEFAULT); - if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); - H5Pclose(prop_id); - - prop_id = H5P_DEFAULT; - bool iwrite = (dims[0] > 0); -#ifdef USEPARALLELHDF - _set_mpi_dataset_properties(prop_id, iwrite, - dspace_id, memspace_id, - rank, dims, dims_offset, - flag_parallel, flag_collective, flag_hyperslab); -#endif - if (iwrite) { - ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); - if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + write_dataset_nd(name, dims.size(), dims.data(), data, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); } - - // Clean up (note that dtype_id is NOT a new object so don't need to close it) - H5Pclose(prop_id); -#ifdef USEPARALLELHDF - if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); -#endif - H5Sclose(dspace_id); - H5Dclose(dset_id); - } - //write dataset with hyperslab selection void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, hid_t memtype_id = -1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + /// with a hyperslab selection defined by count, start + void write_dataset(std::string name, hsize_t len, std::string data, + const std::vector& count, const std::vector& start, + bool flag_parallel = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. /// template function so defined here @@ -424,16 +433,6 @@ class H5OutputFile count, start, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); } - template void write_dataset_nd(std::string name, std::vector dims, T *data, - hid_t memtype_id = -1, hid_t filetype_id = -1, - bool flag_parallel = true, bool flag_first_dim_parallel = true, - bool flag_hyperslab = true, bool flag_collective = true) - { - write_dataset_nd(name, dims.size(), dims.data(), data, - memtype_id, filetype_id, - flag_parallel, flag_first_dim_parallel, - flag_hyperslab, flag_collective); - } /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of /// the input data if not explicitly specified with the filetype_id parameter. @@ -475,48 +474,12 @@ class H5OutputFile // Determine if going to compress data in chunks // Only chunk non-zero size datasets - int nonzero_size = 1; - for(int i=0; i1 chunk - int large_dataset = 0; - for(int i=0; i HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } - else { - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; - } -#else - if(dims[i] > HDFOUTPUTCHUNKSIZE) large_dataset = 1; -#endif - } - if(nonzero_size && large_dataset) - { -#ifdef USEPARALLELHDF - if (flag_parallel) { - for(auto i=0; i Date: Tue, 4 Aug 2020 11:21:24 +0800 Subject: [PATCH 08/13] Update to hdf include --- common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/common.h b/common.h index dfc21d6..3cc0b2f 100644 --- a/common.h +++ b/common.h @@ -14,7 +14,8 @@ #include #include -#include +// #include +#include #include #define SCHEMA_VERSION "0.3" From 7cbc69b7c416bfbf3214a1b21ef476fa7f4f1a18 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 11:21:54 +0800 Subject: [PATCH 09/13] Update to cmake Added report, allowed possibility of parallel compressed hdf5 --- CMakeLists.txt | 129 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 104 insertions(+), 25 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9179d76..8b73644 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,37 +5,72 @@ set(default_build_type "Release") set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_CXX_STANDARD 11) -# macro(find_hdf5) -# find_package(HDF5 REQUIRED COMPONENTS C) -# if (HDF5_FOUND) -# include_directories(${HDF5_INCLUDE_DIRS}) -# if (HDF5_IS_PARALLEL HDF5_CONVERTER_ALLOWPARALLELHDF5) -# set (ENV{HDF5_PREFER_PARALLEL} true) -# set(HDF5_CONVERTER_HAS_PARALLEL_HDF5 Yes) -# list(APPEND HDF5_CONVERTER_DEFINES USEPARALLELHDF) -# if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDF5_CONVERTER_ALLOWCOMPRESSIONPARALLELHDF5) -# set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes) -# list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION) -# list(APPEND HDF5_CONVERTER_DEFINES PARALLELCOMPRESSIONACTIVE) -# endif() -# else() -# if (HDF5_CONVERTER_ALLOWCOMPRESSIONHDF5) -# set(HDF5_CONVERTER_HAS_COMPRESSED_HDF5 Yes) -# list(APPEND HDF5_CONVERTER_DEFINES USEHDFCOMPRESSION) -# endif() -# endif() -# endif() -# endmacro() - -FIND_PACKAGE(CFITSIO 3.000 REQUIRED) -FIND_PACKAGE(HDF5 REQUIRED COMPONENTS C) -INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR}) +# +# Options users can give on the command line via -D +# +macro(hdfconvert_option optname optdesc status) + option(HDFCONVERT_${optname} "${optdesc}" "${status}") +endmacro() + +# set default options +hdfconvert_option(USEMPI "Use MPI" OFF) +hdfconvert_option(USEOPENMP "Use OpenMP" ON) +hdfconvert_option(ALLOWCOMPRESSIONHDF5 "Attempt to include HDF5 compression support " ON) +hdfconvert_option(ALLOWPARALLELHDF5 "Attempt to include parallel HDF5 support " ON) +hdfconvert_option(ALLOWCOMPRESSIONPARALLELHDF5 "Attempt to include parallel HDF5 compression support " OFF) + +# find hdf5 macro where flags set if parallel, if particular version, if compression +macro(find_hdf5) + find_package(HDF5 REQUIRED COMPONENTS C) + if (HDF5_FOUND) + include_directories(${HDF5_INCLUDE_DIRS}) + if (HDF5_IS_PARALLEL AND HDFCONVERT_ALLOWPARALLELHDF5) + set (ENV{HDF5_PREFER_PARALLEL} true) + set(HDFCONVERT_HAS_PARALLEL_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEPARALLELHDF) + if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDFCONVERT_ALLOWCOMPRESSIONPARALLELHDF5) + set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION) + list(APPEND HDFCONVERT_DEFINES PARALLELCOMPRESSIONACTIVE) + endif() + else() + if (HDFCONVERT_ALLOWCOMPRESSIONHDF5) + set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION) + endif() + endif() + endif() +endmacro() + +# +# How we find MPI and set it up +# +macro(find_mpi) + find_package(MPI) + if (MPI_FOUND) + include_directories(${MPI_CXX_INCLUDE_PATH}) + list(APPEND HDF5_CONVERT_DEFINES USEMPI) + set(HDFCONVERT_HAS_MPI Yes) + endif() +endmacro() + +find_package(CFITSIO 3.000 REQUIRED) +set(HDFCONVERT_HAS_OPENMP No) find_package(OpenMP) +set(HDFCONVERT_HAS_COMPRESSED_HDF5 No) +set(HDFCONVERT_HAS_PARALLEL_HDF5 No) +find_hdf5() +set(HDFCONVERT_HAS_MPI No) +if (HDFCONVERT_MPI) + find_mpi() +endif() + if (OPENMP_FOUND) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set (HDFCONVERT_USEOPENMP) endif() if (Verbose) @@ -46,6 +81,50 @@ if (Timer) ADD_DEFINITIONS(-D_TIMER_) endif() +# +# Tell the world what what we are doing +# +macro(hdf_convert_report feature) + + # Output feature name and underscore it in the next line + message("\n${feature}") + string(REGEX REPLACE "." "-" _underscores ${feature}) + message("${_underscores}\n") + + set(_args "${ARGN}") + list(LENGTH _args _nargs) + math(EXPR _nargs "${_nargs} - 1") + foreach(_idx RANGE 0 ${_nargs} 2) + + # Items in the list come with a message first, then the variable name + list(GET _args ${_idx} _msg) + math(EXPR _idx2 "${_idx} + 1") + list(GET _args ${_idx2} _varname) + + # We try to keep things up to 80 cols + string(LENGTH ${_msg} _len) + math(EXPR _nspaces "75 - ${_len}") + string(RANDOM LENGTH ${_nspaces} _spaces) + string(REGEX REPLACE "." " " _spaces "${_spaces}") + string(CONCAT _msg "${_msg}" ${_spaces}) + message(" ${_msg} ${HDFCONVERT_HAS_${_varname}}") + endforeach() +endmacro() + +message("\nHDF Converter successfully configured with the following settings:") +hdf_convert_report("HDF5 Settings" + "Compressed HDF5" COMPRESSED_HDF5 + "Parallel HDF5" PARALLEL_HDF5 +) +if (HDFCONVERT_HAS_COMPRESSED_HDF5 AND HDFCONVERT_HAS_PARALLEL_HDF5) + message("\n WARNING: Parallel Compression HDF5 active, use with caution as it is unstable!\n") +endif() +hdf_convert_report("Parallel settings" + "OpenMP" OPENMP + "MPI" MPI +) + + set(LINK_LIBS ${LINK_LIBS} cfitsio ${HDF5_LIBRARIES}) set(SOURCE_FILES From 14a956cd8c613f585a644e8ce087348d1280daf3 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 11:40:41 +0800 Subject: [PATCH 10/13] Update for HDF5 1.12 compatibility --- HDF5Wrapper.cc | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index c65f77c..4fe91e7 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -291,7 +291,13 @@ void H5OutputFile::_get_attribute(std::vector &ids, const std::vector &ids, const std::vector &ids, const std::vector &ids) H5O_info_t object_info; for (auto &id:ids) { + hid_t lapl_id = H5P_DEFAULT; +#if H5_VERSION_GE(1,12,0) + H5Oget_info(id, &object_info, lapl_id); +#else H5Oget_info(id, &object_info); +#endif if (object_info.type == H5O_TYPE_GROUP) { H5Gclose(id); } From 680ded823d2b61bd6c2abd10b572f967d906c077 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 12:27:33 +0800 Subject: [PATCH 11/13] Update to HDF5 Wrapper Added functionality to write to existing data set --- HDF5Wrapper.cc | 80 +++++++++++++++++++++++++++++++++----------------- HDF5Wrapper.h | 54 +++++++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 31 deletions(-) diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index 4fe91e7..bc0e3fb 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -139,24 +139,16 @@ void H5OutputFile::_set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id, std::vector &mpi_hdf_dims_tot, bool flag_parallel, bool flag_hyperslab) { - if (flag_parallel) { - //then all threads create the same simple data space - //so the meta information is the same - if (flag_hyperslab) { - //allocate the space spanning the file - dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL); - //allocate the memory space - //allocate the memory space - memspace_id = H5Screate_simple(rank, dims.data(), NULL); - } - else { - dspace_id = H5Screate_simple(rank, dims.data(), NULL); - memspace_id = dspace_id; - } - } - else { - dspace_id = H5Screate_simple(rank, dims.data(), NULL); - memspace_id = dspace_id; + // if parallel and doing parallel hyperslab + // then all threads create the same simple data space + // so the meta information is the same + if (flag_parallel && flag_hyperslab) { + /// close the previously allocated space + H5Sclose(dspace_id); + //allocate the space spanning the file + dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL); + //allocate the memory space + memspace_id = H5Screate_simple(rank, dims.data(), NULL); } } @@ -571,11 +563,10 @@ hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id, // Create the dataspace where the data space might have a hyperslab // selection if using parallel hdf5 -#ifdef USEPARALLELHDF - _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); -#else dspace_id = H5Screate_simple(rank, dims.data(), NULL); memspace_id = dspace_id; +#ifdef USEPARALLELHDF + _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); #endif #ifdef USEHDFCOMPRESSION @@ -613,6 +604,42 @@ herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool } } +//write dataset with hyperslab selection +void H5OutputFile::write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ + // Open the dataset + hid_t dspace_id, memspace_id, prop_id, dset_id; + herr_t ret; + prop_id = H5P_DEFAULT; + dset_id = H5Dopen(file_id, name.c_str(), H5P_DEFAULT); + // create simple data space + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; + ///\todo question of what to do in the case of a parallel data space regarding hyperslab + ///selection +#ifdef USEPARALLELHDF +#endif + ///\todo question regarding compression since data set could be created with + /// some compression or chunked data. Should already be set + + //set local hyperslab + if (!count.empty() && !start.empty()) { + ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + if (ret < 0) io_error(std::string("Failed to select hyperslab for writing to dataset: ")+name); + } + ///\todo question of what to do in terms of mpi transfer properties +#ifdef USEPARALLELHDF +#endif + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + if (memspace_id != dspace_id) H5Sclose(memspace_id); + H5Sclose(dspace_id); + H5Dclose(dset_id); +} void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, bool flag_parallel, bool flag_collective) @@ -685,6 +712,7 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v #endif hid_t dspace_id, dset_id, prop_id, memspace_id, ret; std::vector chunks(rank); + prop_id = H5P_DEFAULT; // Get HDF5 data type of the array in memory if (memtype_id == -1) { throw std::runtime_error("Write data set called with void pointer but no type info passed."); @@ -707,11 +735,10 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v ); // Create the dataspace -#ifdef USEPARALLELHDF - _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); -#else dspace_id = H5Screate_simple(rank, dims, NULL); memspace_id = dspace_id; +#ifdef USEPARALLELHDF + _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); #endif // Dataset creation properties @@ -799,12 +826,11 @@ void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, v flag_parallel ); + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; // Create the dataspace // #ifdef USEPARALLELHDF // _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); -// #else -// dspace_id = H5Screate_simple(rank, dims, NULL); -// memspace_id = dspace_id; // #endif // Dataset creation properties diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h index bbe3901..1856563 100644 --- a/HDF5Wrapper.h +++ b/HDF5Wrapper.h @@ -39,6 +39,8 @@ static inline hid_t hdf5_type_from_string(std::string dummy) #endif ///\name HDF class to manage writing information +///\todo need to look into whether one can open directly with +/// full path or must open groups explicitly. If latter, updated needed class H5OutputFile { @@ -65,6 +67,7 @@ class H5OutputFile /// parallel hdf5 mpi routines #ifdef USEPARALLELHDF + /// set the offset points for the mpi write void _set_mpi_dim_and_offset(MPI_Comm &comm, hsize_t rank, std::vector &dims, std::vector &dims_single, @@ -73,11 +76,12 @@ class H5OutputFile std::vector &mpi_hdf_dims_tot, bool flag_parallel, bool flag_first_dim_parallel ); + /// select the hyperslab void _set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id, hsize_t rank, std::vector &dims, std::vector &mpi_hdf_dims_tot, bool flag_parallel, bool flag_hyperslab); - + /// and set the transfer properties void _set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, hid_t &dspace_id, hid_t &memspace_id, hsize_t rank, std::vector dims, std::vector dims_offset, @@ -286,14 +290,56 @@ class H5OutputFile /// writes to an existing data set /// with a hyperslab selection defined by count, start - void write_to_dataset(std::string name, hsize_t len, std::string data, - const std::vector& count, const std::vector& start, - bool flag_parallel = true, bool flag_collective = true); void write_to_dataset(std::string name, hsize_t len, void *data, const std::vector& count, const std::vector& start, hid_t memtype_id=-1, hid_t filetype_id=-1, bool flag_parallel = true, bool flag_first_dim_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + template void write_to_dataset(std::string name, hsize_t len, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_to_dataset_nd(name, rank, dims, data, + count, start, + memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); + } + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + memtype_id = hdf5_type(T{}); + write_to_dataset_nd(name, rank, dims, (void*)data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + template void write_to_dataset_nd(std::string name, std::vector dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_to_dataset_nd(name, dims.size(), dims.data(),(void*)data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + //write dataset with hyperslab selection + void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); /// write 1D data sets of string or input data with defined data type /// without hyperslab selection, creates dataset, writes to it and closes From f50eaff18cb6909b1c0ccbadc57bb0006edc4d7b Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 13:29:34 +0800 Subject: [PATCH 12/13] Update to use new HDF5 Wrapper --- MipMap.cc | 13 +++++-------- MipMap.h | 1 + Stats.cc | 23 ++++++++--------------- Stats.h | 1 + 4 files changed, 15 insertions(+), 23 deletions(-) diff --git a/MipMap.cc b/MipMap.cc index 0ae092a..cacb4da 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -30,11 +30,10 @@ void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const s std::ostringstream mipMapName; mipMapName << "/MipMaps/DATA/DATA_XY_" << mip; - if (useChunks(datasetDims)) { - H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims, chunkDims); - } else { - H5outputfile.create_dataset(path+mipMapName.str(), H5T_NATIVE_FLOAT, datasetDims); - } + std::vector chunks = chunkDims; + datasetfullpath = path+mipMapName.str(); + if (~useChunks(datasetDims)) chunks.clear(); + H5outputfile.create_dataset(datasetfullpath, H5T_NATIVE_FLOAT, datasetDims, chunks); } void MipMap::createBuffers(std::vector& bufferDims) { @@ -59,10 +58,8 @@ void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t cha int N = datasetDims.size(); std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({stokesOffset, channelOffset, 0, 0}, N); - // writeHdf5Data(dataset, vals, bufferDims, count, start); - ///\todo need to figure out if I use the name or the id from which I get the name - // H5outputfile.write_dataset_nd(dataset, bufferDims, vals, count, start); + H5outputfile.write_to_dataset_nd(datasetfullpath, bufferDims, vals, count, start); } void MipMap::resetBuffers() { diff --git a/MipMap.h b/MipMap.h index d4835de..d949070 100644 --- a/MipMap.h +++ b/MipMap.h @@ -39,6 +39,7 @@ struct MipMap { // H5::DataSet dataset; hid_t dset_id; + std::string datasetfullpath; std::vector bufferDims; hsize_t bufferSize; diff --git a/Stats.cc b/Stats.cc index 7c1ef68..ef625c0 100644 --- a/Stats.cc +++ b/Stats.cc @@ -44,9 +44,11 @@ void Stats::createDatasets(H5OutputFile &H5outputfile, std::string pathname, std // } for (auto i = 0; i& b // writeHdf5Data(sumDset, sums, basicBufferDims, count, start); // writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); // writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); - - - ///\todo need to get the names of the ids stored in datasetids - ///or do I just use the names? - //??? - for (auto i = 0; i& basicBufferDims, const std::vector& count, const std::vector& start) { void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { // writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); - //??? - // H5outputfile.write_dataset_nd(histDset, extend(basicBufferDims, {numBins}, histograms, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[5], extend(basicBufferDims, {numBins}), histograms, count, start); } diff --git a/Stats.h b/Stats.h index 6a33002..a32fc65 100644 --- a/Stats.h +++ b/Stats.h @@ -120,6 +120,7 @@ struct Stats { std::vector datasetnames = {"/MIN", "/MAX", "/SUM", "/SUM_SQ", "/NAN_COUNT"}; std::vector datasettypes = {H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_LLONG}; std::vector datasetids; + std::vector datasetfullnames; hid_t minDset; hid_t maxDset; From fbe8f57b838e38a5d24b565011207f59faaf0b30 Mon Sep 17 00:00:00 2001 From: Pascal Date: Tue, 4 Aug 2020 15:39:13 +0800 Subject: [PATCH 13/13] Update for backward compatibility --- HDF5Wrapper.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc index bc0e3fb..3dc3cca 100644 --- a/HDF5Wrapper.cc +++ b/HDF5Wrapper.cc @@ -283,9 +283,9 @@ void H5OutputFile::_get_attribute(std::vector &ids, const std::vector &ids, const std::vector &ids, const std::vector