diff --git a/.gitignore b/.gitignore index 87140a3..e62ad2f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ cmake-build-*/ .idea/ build/ +*.h.gch diff --git a/CMakeLists.txt b/CMakeLists.txt index 409fe63..8b73644 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,76 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.13) project(hdf_convert) set(default_build_type "Release") - +set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") set(CMAKE_CXX_STANDARD 11) -FIND_PACKAGE(HDF5 REQUIRED COMPONENTS CXX) -INCLUDE_DIRECTORIES(${HDF5_INCLUDE_DIR}) + +# +# Options users can give on the command line via -D +# +macro(hdfconvert_option optname optdesc status) + option(HDFCONVERT_${optname} "${optdesc}" "${status}") +endmacro() + +# set default options +hdfconvert_option(USEMPI "Use MPI" OFF) +hdfconvert_option(USEOPENMP "Use OpenMP" ON) +hdfconvert_option(ALLOWCOMPRESSIONHDF5 "Attempt to include HDF5 compression support " ON) +hdfconvert_option(ALLOWPARALLELHDF5 "Attempt to include parallel HDF5 support " ON) +hdfconvert_option(ALLOWCOMPRESSIONPARALLELHDF5 "Attempt to include parallel HDF5 compression support " OFF) + +# find hdf5 macro where flags set if parallel, if particular version, if compression +macro(find_hdf5) + find_package(HDF5 REQUIRED COMPONENTS C) + if (HDF5_FOUND) + include_directories(${HDF5_INCLUDE_DIRS}) + if (HDF5_IS_PARALLEL AND HDFCONVERT_ALLOWPARALLELHDF5) + set (ENV{HDF5_PREFER_PARALLEL} true) + set(HDFCONVERT_HAS_PARALLEL_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEPARALLELHDF) + if (HDF5_VERSION VERSION_GREATER "1.10.0" AND HDFCONVERT_ALLOWCOMPRESSIONPARALLELHDF5) + set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION) + list(APPEND HDFCONVERT_DEFINES PARALLELCOMPRESSIONACTIVE) + endif() + else() + if (HDFCONVERT_ALLOWCOMPRESSIONHDF5) + set(HDFCONVERT_HAS_COMPRESSED_HDF5 Yes) + list(APPEND HDFCONVERT_DEFINES USEHDFCOMPRESSION) + endif() + endif() + endif() +endmacro() + +# +# How we find MPI and set it up +# +macro(find_mpi) + find_package(MPI) + if (MPI_FOUND) + include_directories(${MPI_CXX_INCLUDE_PATH}) + list(APPEND HDF5_CONVERT_DEFINES USEMPI) + set(HDFCONVERT_HAS_MPI Yes) + endif() +endmacro() + +find_package(CFITSIO 3.000 REQUIRED) +set(HDFCONVERT_HAS_OPENMP No) find_package(OpenMP) +set(HDFCONVERT_HAS_COMPRESSED_HDF5 No) +set(HDFCONVERT_HAS_PARALLEL_HDF5 No) +find_hdf5() +set(HDFCONVERT_HAS_MPI No) +if (HDFCONVERT_MPI) + find_mpi() +endif() + if (OPENMP_FOUND) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") + set (HDFCONVERT_USEOPENMP) endif() if (Verbose) @@ -22,17 +81,63 @@ if (Timer) ADD_DEFINITIONS(-D_TIMER_) endif() +# +# Tell the world what what we are doing +# +macro(hdf_convert_report feature) + + # Output feature name and underscore it in the next line + message("\n${feature}") + string(REGEX REPLACE "." "-" _underscores ${feature}) + message("${_underscores}\n") + + set(_args "${ARGN}") + list(LENGTH _args _nargs) + math(EXPR _nargs "${_nargs} - 1") + foreach(_idx RANGE 0 ${_nargs} 2) + + # Items in the list come with a message first, then the variable name + list(GET _args ${_idx} _msg) + math(EXPR _idx2 "${_idx} + 1") + list(GET _args ${_idx2} _varname) + + # We try to keep things up to 80 cols + string(LENGTH ${_msg} _len) + math(EXPR _nspaces "75 - ${_len}") + string(RANDOM LENGTH ${_nspaces} _spaces) + string(REGEX REPLACE "." " " _spaces "${_spaces}") + string(CONCAT _msg "${_msg}" ${_spaces}) + message(" ${_msg} ${HDFCONVERT_HAS_${_varname}}") + endforeach() +endmacro() + +message("\nHDF Converter successfully configured with the following settings:") +hdf_convert_report("HDF5 Settings" + "Compressed HDF5" COMPRESSED_HDF5 + "Parallel HDF5" PARALLEL_HDF5 +) +if (HDFCONVERT_HAS_COMPRESSED_HDF5 AND HDFCONVERT_HAS_PARALLEL_HDF5) + message("\n WARNING: Parallel Compression HDF5 active, use with caution as it is unstable!\n") +endif() +hdf_convert_report("Parallel settings" + "OpenMP" OPENMP + "MPI" MPI +) + + set(LINK_LIBS ${LINK_LIBS} cfitsio ${HDF5_LIBRARIES}) set(SOURCE_FILES ${SOURCE_FILES} - main.cc - Stats.cc - MipMap.cc + HDF5Wrapper.cc Converter.cc FastConverter.cc SlowConverter.cc - Util.cc) + Stats.cc + MipMap.cc + Util.cc + main.cc + ) add_executable(hdf_convert ${SOURCE_FILES}) target_link_libraries(hdf_convert ${LINK_LIBS}) diff --git a/Converter.cc b/Converter.cc index 9fca065..7aaa0be 100644 --- a/Converter.cc +++ b/Converter.cc @@ -1,49 +1,51 @@ #include "Converter.h" -Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) : timer(), progress(progress) { +Converter::Converter(std::string inputFileName, std::string outputFileName, bool progress) : + timer(), progress(progress) +{ TIMER(timer.start("Setup");); - + openFitsFile(&inputFilePtr, inputFileName); - + long dims[4]; - + getFitsDims(inputFilePtr, N, dims); - + stokes = N == 4 ? dims[3] : 1; depth = N >= 3 ? dims[2] : 1; height = dims[1]; width = dims[0]; - + swizzledName = N == 3 ? "ZYX" : "ZYXW"; - + standardDims = trimAxes({stokes, depth, height, width}, N); tileDims = trimAxes({1, 1, TILE_SIZE, TILE_SIZE}, N); - + numBins = int(std::max(std::sqrt(width * height), 2.0)); - + // STATS OBJECTS auto statsXYDims = trimAxes({stokes, depth}, N - 2); statsXY = Stats(statsXYDims, numBins); - + if (depth > 1) { swizzledDims = trimAxes({stokes, width, height, depth}, N); statsZ = Stats(trimAxes({stokes, height, width}, N - 1)); auto statsXYZDims = trimAxes({stokes}, N - 3); statsXYZ = Stats(statsXYZDims, numBins); } - + // MIPMAPS mipMaps = MipMaps(standardDims, tileDims); - + // Prepare output file this->outputFileName = outputFileName; - tempOutputFileName = outputFileName + ".tmp"; + tempOutputFileName = outputFileName + ".tmp"; } Converter::~Converter() { // TODO this is probably unnecessary; the file object destructor should close the file properly. - outputFile.close(); + H5outputfile.close(); } std::unique_ptr Converter::getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress) { @@ -62,77 +64,102 @@ void Converter::reportMemoryUsage() { // implemented in subclasses } + void Converter::convert() { // CREATE OUTPUT FILE - + // TODO dataset variables should be local and passed into the copy function? - - outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC); - outputGroup = outputFile.createGroup("0"); - + +// outputFile = H5::H5File(tempOutputFileName, H5F_ACC_TRUNC); +// outputGroup = outputFile.createGroup("0"); + + H5outputfile.create(tempOutputFileName, H5F_ACC_TRUNC); + std::string parentpath("0"); + hid_t gid = H5outputfile.create_group(parentpath); + std::vector chunkDims; if (useChunks(standardDims)) { chunkDims = tileDims; } - - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); - - statsXY.createDatasets(outputGroup, "XY"); + +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// createHdf5Dataset(standardDataSet, outputGroup, "DATA", floatType, standardDims, chunkDims); +// statsXY.createDatasets(outputGroup, "XY"); + standardDataSet = parentpath + std::string("/DATA"); + standardDataSetID = H5outputfile.create_dataset(standardDataSet, H5T_NATIVE_FLOAT, + standardDims, chunkDims); + statsXY.createDatasets(H5outputfile, parentpath, "XY"); if (depth > 1) { - statsXYZ.createDatasets(outputGroup, "XYZ"); - statsZ.createDatasets(outputGroup, "Z"); - - auto swizzledGroup = outputGroup.createGroup("SwizzledData"); +// statsXYZ.createDatasets(outputGroup, "XYZ"); +// statsZ.createDatasets(outputGroup, "Z"); +// auto swizzledGroup = outputGroup.createGroup("SwizzledData"); + // We use this name in papers because it sounds more serious. :) +// outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); +// createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); + statsXYZ.createDatasets(H5outputfile, parentpath, "XYZ"); + statsZ.createDatasets(H5outputfile, parentpath, "Z"); + std::string swizzledpath = parentpath + "/SwizzledData"; + std::string linkpath = parentpath + "/PermutedData"; + auto swizzledGroup = H5outputfile.create_group(swizzledpath); // We use this name in papers because it sounds more serious. :) - outputGroup.link(H5L_TYPE_HARD, "SwizzledData", "PermutedData"); - createHdf5Dataset(swizzledDataSet, swizzledGroup, swizzledName, floatType, swizzledDims); + H5outputfile.create_link(swizzledpath, linkpath); + swizzledDataSet = swizzledpath + "/" + swizzledName; + swizzledDataSetID = H5outputfile.create_dataset(swizzledDataSet, H5T_NATIVE_FLOAT, swizzledDims); } - - mipMaps.createDatasets(outputGroup); - + + /// mipMaps.createDatasets(outputGroup); + mipMaps.createDatasets(H5outputfile, parentpath); + // COPY HEADERS - + TIMER(timer.start("Headers");); - - writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); - writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); - writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); + +// writeHdf5Attribute(outputGroup, "SCHEMA_VERSION", std::string(SCHEMA_VERSION)); +// writeHdf5Attribute(outputGroup, "HDF5_CONVERTER", std::string(HDF5_CONVERTER)); +// writeHdf5Attribute(outputGroup, "HDF5_CONVERTER_VERSION", std::string(HDF5_CONVERTER_VERSION)); + H5outputfile.write_attribute(parentpath, std::string("SCHEMA_VERSION"), std::string(SCHEMA_VERSION)); + H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER"), std::string(HDF5_CONVERTER)); + H5outputfile.write_attribute(parentpath, std::string("HDF5_CONVERTER_VERSION"), std::string(HDF5_CONVERTER_VERSION)); + int numAttributes; readFitsHeader(inputFilePtr, numAttributes); - + // IMPORTANT: This is 1-indexed! - for (int i = 1; i <= numAttributes; i++) { + for (int i = 1; i <= numAttributes; i++) { std::string attributeName; std::string attributeValue; readFitsAttribute(inputFilePtr, i, attributeName, attributeValue); - + if (attributeName.empty() || attributeName.find("COMMENT") == 0 || attributeName.find("HISTORY") == 0) { // TODO we should actually do something about these } else { - if (outputGroup.attrExists(attributeName)) { + // if (outputGroup.attrExists(attributeName)) { + if (H5outputfile.exists_attribute(parentpath, attributeName)) { std::cout << "Warning: Skipping duplicate attribute '" << attributeName << "'" << std::endl; } else { bool parsingFailure(false); - + if (attributeValue.length() >= 2 && attributeValue.find('\'') == 0 && attributeValue.find_last_of('\'') == attributeValue.length() - 1) { // STRING std::string attributeValueStr; readFitsStringAttribute(inputFilePtr, attributeName, attributeValueStr); - writeHdf5Attribute(outputGroup, attributeName, attributeValueStr); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueStr); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueStr); } else if (attributeValue == "T" || attributeValue == "F") { // BOOLEAN bool attributeValueBool = (attributeValue == "T"); - writeHdf5Attribute(outputGroup, attributeName, attributeValueBool); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueBool); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueBool); } else if (attributeValue.find('.') != std::string::npos) { // TRY TO PARSE AS DOUBLE try { double attributeValueDouble = std::stod(attributeValue); - writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as a float." << std::endl; parsingFailure = true; @@ -140,8 +167,9 @@ void Converter::convert() { // Special handling for subnormal numbers long double attributeValueLongDouble = std::stold(attributeValue); double attributeValueDouble = (double) attributeValueLongDouble; - writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); - +// writeHdf5Attribute(outputGroup, attributeName, attributeValueDouble); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueDouble); + std::ostringstream ostream; ostream.precision(13); ostream << attributeValueDouble; @@ -149,7 +177,7 @@ void Converter::convert() { std::string round_trip(ostream.str()); transform(original.begin(), original.end(), original.begin(), ::toupper); transform(round_trip.begin(), round_trip.end(), round_trip.begin(), ::toupper); - + if (original != round_trip) { std::cout << "Warning: the value of attribute '" << attributeName << "' is not representable as a normalised double precision floating point number. Some precision has been lost.\nOriginal string representation:\n'" << original << "'\nFinal string representation:\n'" << round_trip << "'" << std::endl; } @@ -158,27 +186,29 @@ void Converter::convert() { // TRY TO PARSE AS INTEGER try { int64_t attributeValueInt = std::stoi(attributeValue); - writeHdf5Attribute(outputGroup, attributeName, attributeValueInt); +// writeHdf5Attribute(outputGroup, attributeName, attributeValueInt); + H5outputfile.write_attribute(parentpath, attributeName, attributeValueInt); } catch (const std::invalid_argument& ia) { std::cout << "Warning: could not parse attribute '" << attributeName << "' as an integer." << std::endl; parsingFailure = true; } } - + if (parsingFailure) { // FALL BACK TO STRING - writeHdf5Attribute(outputGroup, attributeName, attributeValue); +// writeHdf5Attribute(outputGroup, attributeName, attributeValue); + H5outputfile.write_attribute(parentpath, attributeName, attributeValue); } } } } - + // MAIN CONVERSION AND CALCULATION FUNCTION copyAndCalculate(); - + TIMER(timer.print(product(standardDims));); - + // Rename from temp file rename(tempOutputFileName.c_str(), outputFileName.c_str()); } diff --git a/Converter.h b/Converter.h index 223d22e..a24f6d8 100644 --- a/Converter.h +++ b/Converter.h @@ -2,77 +2,91 @@ #define __IMAGE_H #include "common.h" +#include "Util.h" +#include "HDF5Wrapper.h" #include "Stats.h" #include "MipMap.h" #include "Timer.h" -#include "Util.h" class Converter { public: - Converter() {} + Converter(){}; Converter(std::string inputFileName, std::string outputFileName, bool progress); ~Converter(); - + static std::unique_ptr getConverter(std::string inputFileName, std::string outputFileName, bool slow, bool progress); void convert(); virtual void reportMemoryUsage(); - + + H5OutputFile H5outputfile; + protected: virtual void copyAndCalculate(); - + Timer timer; bool progress; - + std::string tempOutputFileName; std::string outputFileName; fitsfile* inputFilePtr; - + // Main HDF5 objects - H5::H5File outputFile; - H5::Group outputGroup; - H5::DataSet standardDataSet; - H5::DataSet swizzledDataSet; - +// H5::H5File outputFile; +// H5::Group outputGroup; +// H5::DataSet standardDataSet; +// H5::DataSet swizzledDataSet; + std::string standardDataSet, swizzledDataSet; + hid_t standardDataSetID, swizzledDataSetID; + + float* standardCube; float* rotatedCube; - + // Stats Stats statsXY; Stats statsZ; Stats statsXYZ; - + // MipMaps MipMaps mipMaps; - + int N; hsize_t stokes, depth, height, width; hsize_t numBins; - + // Dataset dimensions - + std::vector standardDims; std::vector swizzledDims; std::vector tileDims; - + std::string swizzledName; }; +/*! +Converter class that makes use of OpenMP parallelisation +when calculating statistics and histograms +*/ class FastConverter : public Converter { public: FastConverter(std::string inputFileName, std::string outputFileName, bool progress); void reportMemoryUsage() override; - + protected: void copyAndCalculate() override; }; +/*! +Converter class that has minimmal memory overhead but has no parallelisation +when calculating statistics and histograms +*/ class SlowConverter : public Converter { public: SlowConverter(std::string inputFileName, std::string outputFileName, bool progress); void reportMemoryUsage() override; - + protected: void copyAndCalculate() override; }; diff --git a/FastConverter.cc b/FastConverter.cc index b32f1ed..7c41454 100644 --- a/FastConverter.cc +++ b/FastConverter.cc @@ -1,7 +1,8 @@ #include "Converter.h" // TODO do we need these? -FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) : Converter(inputFileName, outputFileName, progress) {} +FastConverter::FastConverter(std::string inputFileName, std::string outputFileName, bool progress) : +Converter(inputFileName, outputFileName, progress) {} void FastConverter::reportMemoryUsage() { std::unordered_map sizes; @@ -9,22 +10,22 @@ void FastConverter::reportMemoryUsage() { sizes["Main dataset"] = depth * height * width * sizeof(float); sizes["Mipmaps"] = MipMaps::size(standardDims, {depth, height, width}); sizes["XY stats"] = Stats::size({depth}, numBins); - + if (depth > 1) { sizes["Rotation"] = sizes["Main dataset"]; sizes["XYZ stats"] = Stats::size({}, numBins, depth); sizes["Z stats"] = Stats::size({height, width}); } - + hsize_t total(0); - + std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl; - + for (auto& kv : sizes) { std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl; total += kv.second; } - + std::string note = ""; if (depth > 1) { total -= std::min(sizes["Mipmaps"], sizes["Rotation"]); @@ -37,22 +38,22 @@ void FastConverter::reportMemoryUsage() { void FastConverter::copyAndCalculate() { const hsize_t pixelProgressStride = std::max((hsize_t)1, (hsize_t)(width * height / 100)); const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100)); - + TIMER(timer.start("Allocate");); - + // Process one stokes at a time hsize_t cubeSize = depth * height * width; standardCube = new float[cubeSize]; - + statsXY.createBuffers({depth}); - + if (depth > 1) { statsXYZ.createBuffers({}, depth); statsZ.createBuffers({height, width}); } - + mipMaps.createBuffers({depth, height, width}); - + std::string timerLabelXYRotation = depth > 1 ? "XY statistics and rotation" : "XY statistics"; for (unsigned int currentStokes = 0; currentStokes < stokes; currentStokes++) { @@ -63,48 +64,48 @@ void FastConverter::copyAndCalculate() { TIMER(timer.start("Read");); DEBUG(std::cout << "+ Reading main dataset..." << std::flush;); readFitsData(inputFilePtr, 0, currentStokes, cubeSize, standardCube); - + // We have to allocate the swizzled cube for each stokes because we free it to make room for mipmaps if (depth > 1) { TIMER(timer.start("Allocate");); rotatedCube = new float[cubeSize]; } - + DEBUG(std::cout << " " << timerLabelXYRotation << "..." << std::flush;); PROGRESS("\tMain loop\t"); TIMER(timer.start(timerLabelXYRotation);); // First loop calculates stats for each XY slice and rotates the dataset - + #pragma omp parallel for for (hsize_t i = 0; i < depth; i++) { PROGRESS_DECIMATED(i, channelProgressStride, "|"); StatsCounter counterXY; - + auto& indexXY = i; std::function accumulate; - + auto lazy_accumulate = [&] (float val) { counterXY.accumulateFiniteLazy(val); }; - + auto first_accumulate = [&] (float val) { counterXY.accumulateFiniteLazyFirst(val); accumulate = lazy_accumulate; }; - + accumulate = first_accumulate; - + for (hsize_t j = 0; j < height; j++) { for (hsize_t k = 0; k < width; k++) { auto sourceIndex = k + width * j + (height * width) * i; auto destIndex = i + depth * j + (height * depth) * k; auto& val = standardCube[sourceIndex]; - + if (depth > 1) { rotatedCube[destIndex] = val; } - + // Accumulate XY stats if (std::isfinite(val)) { accumulate(val); @@ -113,11 +114,11 @@ void FastConverter::copyAndCalculate() { } } } - + // Final correction of XY min and max statsXY.copyStatsFromCounter(indexXY, height * width, counterXY); } - + PROGRESS(std::endl); if (depth > 1) { @@ -125,7 +126,7 @@ void FastConverter::copyAndCalculate() { DEBUG(std::cout << " XYZ statistics..." << std::flush;); PROGRESS("\tXYZ stats" << std::endl); TIMER(timer.start("XYZ statistics");); - + StatsCounter counterXYZ; for (hsize_t i = 0; i < depth; i++) { @@ -136,7 +137,7 @@ void FastConverter::copyAndCalculate() { statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ); // Second loop calculates stats for each Z profile (i.e. average/min/max XY slices) - + DEBUG(std::cout << " Z statistics... " << std::flush;); PROGRESS("\tZ stats\t\t"); TIMER(timer.start("Z statistics");); @@ -145,10 +146,10 @@ void FastConverter::copyAndCalculate() { for (hsize_t j = 0; j < height; j++) { for (hsize_t k = 0; k < width; k++) { StatsCounter counterZ; - + auto indexZ = k + j * width; PROGRESS_DECIMATED(indexZ, pixelProgressStride, "."); - + for (hsize_t i = 0; i < depth; i++) { auto sourceIndex = k + width * j + (height * width) * i; auto& val = standardCube[sourceIndex]; @@ -160,7 +161,7 @@ void FastConverter::copyAndCalculate() { counterZ.accumulateNonFinite(); } } - + statsZ.copyStatsFromCounter(indexZ, depth, counterZ); } } @@ -169,62 +170,62 @@ void FastConverter::copyAndCalculate() { } // Third loop handles histograms - + DEBUG(std::cout << " Histograms..." << std::flush;); PROGRESS("\tHistograms\t"); TIMER(timer.start("Histograms");); - + double cubeMin; double cubeMax; double cubeRange; bool cubeHist(false); - + if (depth > 1) { cubeMin = statsXYZ.minVals[0]; cubeMax = statsXYZ.maxVals[0]; cubeRange = cubeMax - cubeMin; cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0; } - + statsXY.clearHistogramBuffers(); statsXYZ.clearHistogramBuffers(); #pragma omp parallel for for (hsize_t i = 0; i < depth; i++) { PROGRESS_DECIMATED(i, channelProgressStride, "|"); - + auto& indexXY = i; double chanMin = statsXY.minVals[indexXY]; double chanMax = statsXY.maxVals[indexXY]; double chanRange = chanMax - chanMin; - + bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0); - + if (!chanHist && !cubeHist) { continue; // skip the loop entirely } - + auto doChannelHistogram = [&] (float val) { // XY histogram statsXY.accumulateHistogram(val, chanMin, chanRange, i); }; - + auto doCubeHistogram = [&] (float val) { // Partial XYZ histogram statsXYZ.accumulatePartialHistogram(val, cubeMin, cubeRange, i); }; - + auto doNothing = [&] (float val) { UNUSED(val); }; - + std::function channelHistogramFunc = doChannelHistogram; std::function cubeHistogramFunc = doCubeHistogram; - + if (!chanHist) { channelHistogramFunc = doNothing; } - + if (!cubeHist) { cubeHistogramFunc = doNothing; } @@ -238,46 +239,50 @@ void FastConverter::copyAndCalculate() { } } // end of XY loop } // end of parallel Z loop - + if (depth > 1) { // Consolidate partial XYZ histograms into final histogram statsXYZ.consolidatePartialHistogram(); } - + PROGRESS(std::endl); DEBUG(std::cout << " Writing main and rotated datasets... " << std::flush;); PROGRESS("\tWrite data" << std::endl); TIMER(timer.start("Write");); - + std::vector memDims = {depth, height, width}; std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({currentStokes, 0, 0, 0}, N); - writeHdf5Data(standardDataSet, standardCube, memDims, count, start); - +// writeHdf5Data(standardDataSet, standardCube, memDims, count, start); + H5outputfile.write_dataset_nd(standardDataSet, + memDims, standardCube, count, start); + if (depth > 1) { // This all technically worked if we reused the standard filespace and memspace // But it's probably not a good idea to rely on two incorrect values cancelling each other out std::vector swizzledCount = trimAxes({1, width, height, depth}, N); std::vector swizzledMemDims = {width, height, depth}; - writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start); +// writeHdf5Data(swizzledDataSet, rotatedCube, swizzledMemDims, swizzledCount, start); + H5outputfile.write_dataset_nd(swizzledDataSet, + swizzledMemDims, rotatedCube, swizzledCount, start); } // After writing and before mipmaps, we free the swizzled memory. We allocate it again next Stokes. if (depth > 1) { DEBUG(std::cout << " Freeing memory from rotated dataset..." << std::flush;); TIMER(timer.start("Free");); - + delete[] rotatedCube; } - + // Fourth loop handles mipmaps - + // In the fast algorithm, we keep one Stokes of mipmaps in memory at once and parallelise by channel DEBUG(std::cout << " Mipmaps..." << std::endl;); PROGRESS("\tMipmaps\t\t"); TIMER(timer.start("Mipmaps");); - + #pragma omp parallel for for (hsize_t c = 0; c < depth; c++) { PROGRESS_DECIMATED(c, channelProgressStride, "|"); @@ -292,33 +297,33 @@ void FastConverter::copyAndCalculate() { } } // end of mipmap loop PROGRESS(std::endl); - + // Final mipmap calculation mipMaps.calculate(); - + TIMER(timer.start("Write");); PROGRESS("\tWrite stats & mipmaps" << std::endl); - + // Write the mipmaps - mipMaps.write(currentStokes, 0); - - // Write the statistics - statsXY.write({1, depth}, {currentStokes, 0}); - + mipMaps.write(H5outputfile, currentStokes, 0); + + // Write the statistics + statsXY.write(H5outputfile,{1, depth}, {currentStokes, 0}); + if (depth > 1) { - statsXYZ.write({1}, {currentStokes}); - statsZ.write({1, height, width}, {currentStokes, 0, 0}); + statsXYZ.write(H5outputfile, {1}, {currentStokes}); + statsZ.write(H5outputfile, {1, height, width}, {currentStokes, 0, 0}); } - + // Clear the mipmaps before the next Stokes TIMER(timer.start("Mipmaps");); mipMaps.resetBuffers(); - + } // end of Stokes loop - + // Free memory DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;); TIMER(timer.start("Free");); - + delete[] standardCube; } diff --git a/HDF5Wrapper.cc b/HDF5Wrapper.cc new file mode 100644 index 0000000..3dc3cca --- /dev/null +++ b/HDF5Wrapper.cc @@ -0,0 +1,872 @@ +#include "HDF5Wrapper.h" + +// Constructor +H5OutputFile::H5OutputFile() +{ + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif +} + +// Destructor closes the file if it's open +H5OutputFile::~H5OutputFile() +{ + if(file_id >= 0) close(); +} + +void H5OutputFile::create(std::string filename, hid_t flag, + int taskID, bool iparallelopen) +{ + if(file_id >= 0) io_error("Attempted to create file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, parallel_access_id); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fcreate(filename.c_str(), flag, H5P_DEFAULT, H5P_DEFAULT); + if(file_id < 0)io_error(std::string("Failed to create output file: ")+filename); +#endif + +} + +void H5OutputFile::append(std::string filename, hid_t flag, + int taskID, bool iparallelopen) +{ + if(file_id >= 0)io_error("Attempted to open and append to file when already open!"); +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; + if (iparallelopen && taskID ==-1) { + parallel_access_id = H5Pcreate (H5P_FILE_ACCESS); + if (parallel_access_id < 0) io_error("Parallel access creation failed"); + herr_t ret = H5Pset_fapl_mpio(parallel_access_id, comm, info); + if (ret < 0) io_error("Parallel access failed"); + // create the file collectively + file_id = H5Fopen(filename.c_str(), flag, parallel_access_id); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + ret = H5Pclose(parallel_access_id); + if (ret < 0) io_error("Parallel release failed"); + parallel_access_id = -1; + } + else { + if (taskID <0 || taskID > NProcsWrite) io_error(std::string("MPI Task ID asked to create file out of range. Task ID is ")+to_std::string(taskID)); + if (ThisWriteTask == taskID) { + file_id = H5Fopen(filename.c_str(),flag, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); + parallel_access_id = -1; + } + else { + parallel_access_id = -2; + } + MPI_Barrier(comm); + } +#else + file_id = H5Fopen(filename.c_str(), flag, H5P_DEFAULT); + if (file_id < 0) io_error(std::string("Failed to create output file: ")+filename); +#endif +} + +// Close the file +void H5OutputFile::close() +{ +#ifdef USEPARALLELHDF + if(file_id < 0 && parallel_access_id == -1) io_error("Attempted to close file which is not open!"); + if (parallel_access_id == -1) H5Fclose(file_id); +#else + if(file_id < 0) io_error("Attempted to close file which is not open!"); + H5Fclose(file_id); +#endif + file_id = -1; +#ifdef USEPARALLELHDF + parallel_access_id = -1; +#endif +} + +#ifdef USEPARALLELHDF +void H5OutputFile::_set_mpi_dim_and_offset(MPI_Comm &comm, + hsize_t rank, std::vector &dims, + std::vector &dims_single, + std::vector &dims_offset + std::vector &mpi_hdf_dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_first_dim_parallel + ) +{ + if (!flag_parallel) return; + //if parallel hdf5 get the full extent of the data + //this bit of code communicating information can probably be done elsewhere + //minimize number of mpi communications + for (auto i=0;i 0) continue; + for (auto j=1;j<=ThisWriteTask;j++) { + dims_offset[i] += mpi_hdf_dims[i*NProcs+j-1]; + } + } + if (flag_first_dim_parallel && rank > 1) { + for (auto i=1; i &dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_hyperslab) +{ + // if parallel and doing parallel hyperslab + // then all threads create the same simple data space + // so the meta information is the same + if (flag_parallel && flag_hyperslab) { + /// close the previously allocated space + H5Sclose(dspace_id); + //allocate the space spanning the file + dspace_id = H5Screate_simple(rank, mpi_hdf_dims_tot.data(), NULL); + //allocate the memory space + memspace_id = H5Screate_simple(rank, dims.data(), NULL); + } +} + +void H5OutputFile::_set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, + hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector dims, std::vector dims_offset, + bool flag_parallel, bool flag_collective, bool flag_hyperslab) +{ + if (!flag_parallel) return; + // set up the collective transfer properties list + prop_id = H5Pcreate(H5P_DATASET_XFER); + //if all tasks are participating in the writes + if (flag_collective) ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(prop_id, H5FD_MPIO_INDEPENDENT); + if (flag_hyperslab) { + H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, dims_offset.data(), NULL, dims, NULL); + if (dims[0] == 0) { + H5Sselect_none(dspace_id); + H5Sselect_none(memspace_id); + } + } + iwrite = (mpi_hdf_dims_tot[0] > 0); +} + +#endif + +void H5OutputFile::_set_chunks(std::vector &chunks, + int rank, hsize_t *dims, +#ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, +#endif + bool flag_parallel +) +{ + // check if data set has non-zero size and if it large + auto nonzero_size = true; + auto large_dataset = true; + for(auto i=0; i &chunks) +{ +#ifdef USEHDFCOMPRESSION + if (chunks.empty()) return H5P_DEFAULT; + hid_t prop_id = H5Pcreate(H5P_DATASET_CREATE); + H5Pset_layout(prop_id, H5D_CHUNKED); + H5Pset_chunk(prop_id, rank, chunks.data()); + H5Pset_deflate(prop_id, HDFDEFLATE); +#else + return H5P_DEFAULT; +#endif +} + +std::vector H5OutputFile::_tokenize(const std::string &s) +{ + std::string delims("/"); + std::string::size_type lastPos = s.find_first_not_of(delims, 0); + std::string::size_type pos = s.find_first_of(delims, lastPos); + + std::vector tokens; + while (std::string::npos != pos || std::string::npos != lastPos) { + tokens.push_back(s.substr(lastPos, pos - lastPos)); + lastPos = s.find_first_not_of(delims, pos); + pos = s.find_first_of(delims, lastPos); + } + return tokens; +} + +/// get an attribute going to list of hids +void H5OutputFile::_get_attribute(std::vector &ids, const std::string attr_name) +{ + //can use H5Aexists as it is the C interface but how to access it? + auto exists = H5Aexists(ids.back(), attr_name.c_str()); + if (exists == 0) { + throw std::invalid_argument(std::string("attribute not found ") + attr_name); + } + else if (exists < 0) { + throw std::runtime_error("Error on H5Aexists"); + } + auto attr = H5Aopen(ids.back(), attr_name.c_str(), H5P_DEFAULT); + ids.push_back(attr); +} +/// get attributes parsing list of ids and vector of strings +void H5OutputFile::_get_attribute(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_attribute(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + hid_t lapl_id = H5P_DEFAULT; +#if H5_VERSION_GE(1,12,0) + unsigned int fields = H5O_INFO_ALL; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id); +#else + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id); +#endif + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else if (object_info.type == H5O_TYPE_DATASET) { + newid = H5Dopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_attribute(ids, subparts); + } +} + +/// get attribute in file, storing relevant ids in vector +void H5OutputFile::get_attribute(std::vector &ids, const std::string &name) +{ + auto parts = _tokenize(name); + _get_attribute(ids, parts); +} + +/// get a dataset going to list of hids +void H5OutputFile::_get_dataset(std::vector &ids, const std::string dset_name) +{ + auto dset_id = H5Dopen(ids.back(), dset_name.c_str(), H5P_DEFAULT); + if (dset_id < 0) { + throw std::invalid_argument(std::string("dataset not found ") + dset_name); + } + ids.push_back(dset_id); +} +/// get dataset parsing list of ids and vector of strings +void H5OutputFile::_get_dataset(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_dataset(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + hid_t lapl_id = H5P_DEFAULT; +#if H5_VERSION_GE(1,12,0) + unsigned int fields = H5O_INFO_ALL; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id); +#else + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id); +#endif + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else if (object_info.type == H5O_TYPE_DATASET) { + throw std::runtime_error("Incorrect path to data set, encountered another data set in path"); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_dataset(ids, subparts); + } +} + +/// get dataset in file, storing relevant ids in vector +void H5OutputFile::get_dataset(std::vector &ids, const std::string &name) +{ + auto parts = _tokenize(name); + _get_dataset(ids, parts); +} + +/// get a hdf5 id going to list of hids +void H5OutputFile::_get_hdf5_id(std::vector &ids, const std::string name) +{ + auto id = H5Oopen(ids.back(), name.c_str(), H5P_DEFAULT); + if (id < 0) { + throw std::invalid_argument(std::string("hdf5 object not found ") + name); + } + ids.push_back(id); +} +/// get hdf5 ids parsing list of ids and vector of strings +void H5OutputFile::_get_hdf5_id(std::vector &ids, const std::vector &parts) +{ + // This is the attribute name, so open it and store the id + if (parts.size() == 1) { + _get_hdf5_id(ids, parts[0]); + } + else + { + //otherwise enter group and recursively call funciotn + H5O_info_t object_info; + hid_t newid; + hid_t lapl_id = H5P_DEFAULT; +#if H5_VERSION_GE(1,12,0) + unsigned int fields = H5O_INFO_ALL; + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, fields, lapl_id); +#else + H5Oget_info_by_name(ids.back(), parts[0].c_str(), &object_info, lapl_id); +#endif + if (object_info.type == H5O_TYPE_GROUP) { + newid = H5Gopen(ids.back(),parts[0].c_str(),H5P_DEFAULT); + } + else { + throw std::runtime_error("Incorrect path, encountered something other than group"); + } + ids.push_back(newid); + //get the substring + std::vector subparts(parts.begin() + 1, parts.end()); + //call function again + _get_dataset(ids, subparts); + } +} + +/// get hdf5 id in file, storing relevant ids in vector +void H5OutputFile::get_hdf5_id(std::vector &ids, const std::string &name) +{ + auto parts = _tokenize(name); + _get_hdf5_id(ids, parts); +} +hid_t H5OutputFile::get_hdf5_id(std::string path, std::string name, bool closeids) +{ + return get_hdf5_id(path+name, closeids); +} +hid_t H5OutputFile::get_hdf5_id(std::string fullname, bool closeids) +{ + hid_t id; + std::vector ids; + try { + get_hdf5_id(ids, fullname); + id = ids.back(); + } + catch (const std::invalid_argument& ia) + { + id = -1; + } + ///\todo question of whether I should close the objects that have been opened + if (closeids) close_hdf_ids(ids); + return id; +} + +/// close open hids stored in vector +void H5OutputFile::close_hdf_ids(std::vector &ids) +{ + H5O_info_t object_info; + for (auto &id:ids) + { + hid_t lapl_id = H5P_DEFAULT; +#if H5_VERSION_GE(1,12,0) + H5Oget_info(id, &object_info, lapl_id); +#else + H5Oget_info(id, &object_info); +#endif + if (object_info.type == H5O_TYPE_GROUP) { + H5Gclose(id); + } + else if (object_info.type == H5O_TYPE_GROUP) { + H5Dclose(id); + } + } +} + +/// close open hids stored in vector +void H5OutputFile::close_path(std::string path) +{ + auto parts = _tokenize(path); + std::vector ids; + _get_hdf5_id(ids, parts); + close_hdf_ids(ids); +} + +/// read a string attribute +void H5OutputFile::_do_read_string(const hid_t &attr, const hid_t &type, std::string &val) +{ + std::vector buf; + hid_t type_in_file = H5Aget_type(attr); + hid_t type_in_memory = H5Tcopy(type); // copy memory type because we'll need to modify it + size_t length = H5Tget_size(type_in_file); // get length of the string in the file + buf.resize(length+1); // resize buffer in memory, allowing for null terminator + H5Tset_size(type_in_memory, length+1); // tell HDF5 the length of the buffer in memory + H5Tset_strpad(type_in_memory, H5T_STR_NULLTERM); // specify that we want a null terminated string + H5Aread(attr, type_in_memory, buf.data()); + H5Tclose(type_in_memory); + H5Tclose(type_in_file); + val=std::string(buf.data()); +} + +bool H5OutputFile::exists_attribute(const std::string &parent, const std::string &name) { + std::string attr_name = parent+std::string("/")+name; + std::vector ids; + bool exists = true; + //groups, data spaces, etc that have been opened. + try { + get_attribute(ids, attr_name); + } + catch (const std::invalid_argument& ia) + { + exists = false; + } + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //check if present + if (exists) { + H5Aclose(ids[0]); + ids.erase(ids.begin()); + } + close_hdf_ids(ids); + return exists; +} + +bool H5OutputFile::exists_dataset(const std::string &parent, const std::string &name) { + std::string dset_name = parent+std::string("/")+name; + std::vector ids; + bool exists = true; + //groups, data spaces, etc that have been opened. + try { + get_dataset(ids, dset_name); + } + catch (const std::invalid_argument& ia) + { + exists = false; + } + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //check if present + if (exists) { + H5Dclose(ids[0]); + ids.erase(ids.begin()); + } + close_hdf_ids(ids); + return exists; +} + +/// create a dataset +hid_t H5OutputFile::create_dataset(std::string fullname, hid_t type_id, + std::vector dims, std::vector chunkDims, + bool flag_closedataset, + bool flag_parallel, bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + auto splitPath = _tokenize(fullname); + auto dsetname = splitPath.back(); + splitPath.pop_back(); + hid_t curr_id = file_id; + hid_t memspace_id, dspace_id, dset_id, prop_id = H5P_DEFAULT; + auto rank = dims.size(); + std::vector chunks = chunkDims; + std::vector ids; + + for (auto& groupname : splitPath) + { + if (H5Lexists(curr_id, groupname.c_str(), H5P_DEFAULT) == 0) curr_id = create_group(groupname); + else curr_id = open_group(groupname); + ids.push_back(curr_id); + } + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + _set_chunks(chunks, dims, + #ifdef USEPARALLELHDF + mpi_hdf_dims_tot, + #endif + flag_parallel + ); + + // Create the dataspace where the data space might have a hyperslab + // selection if using parallel hdf5 + dspace_id = H5Screate_simple(rank, dims.data(), NULL); + memspace_id = dspace_id; +#ifdef USEPARALLELHDF + _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +#endif + +#ifdef USEHDFCOMPRESSION + prop_id = _set_compression(rank, chunks); +#endif + + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + H5Pclose(prop_id); + + if (flag_closedataset) + { + H5Sclose(dspace_id); + H5Dclose(dset_id); + reverse(ids.begin(),ids.end()); + close_hdf_ids(ids); + } + return dset_id; +} + +/// create a link +herr_t H5OutputFile::create_link(std::string orgname, std::string linkname, bool ihard) { + std::vector orgids; + hid_t orgid = get_hdf5_id(orgname); + hid_t linkid; + + if (H5Lexists(linkid, linkname.c_str(), H5P_DEFAULT)) { + throw std::runtime_error("Link already exists, cannot create new link"); + } + if (ihard) { + return H5Lcreate_hard(orgid, orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT); + } + else { + return H5Lcreate_soft(orgname.c_str(), linkid, linkname.c_str(), H5P_DEFAULT, H5P_DEFAULT); + } +} + +//write dataset with hyperslab selection +void H5OutputFile::write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ + // Open the dataset + hid_t dspace_id, memspace_id, prop_id, dset_id; + herr_t ret; + prop_id = H5P_DEFAULT; + dset_id = H5Dopen(file_id, name.c_str(), H5P_DEFAULT); + // create simple data space + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; + ///\todo question of what to do in the case of a parallel data space regarding hyperslab + ///selection +#ifdef USEPARALLELHDF +#endif + ///\todo question regarding compression since data set could be created with + /// some compression or chunked data. Should already be set + + //set local hyperslab + if (!count.empty() && !start.empty()) { + ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + if (ret < 0) io_error(std::string("Failed to select hyperslab for writing to dataset: ")+name); + } + ///\todo question of what to do in terms of mpi transfer properties +#ifdef USEPARALLELHDF +#endif + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + if (memspace_id != dspace_id) H5Sclose(memspace_id); + H5Sclose(dspace_id); + H5Dclose(dset_id); +} + +void H5OutputFile::write_dataset(std::string name, hsize_t len, std::string data, + bool flag_parallel, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + int rank = 1; + hsize_t dims[1] = {len}; + + hid_t memtype_id, filetype_id, dspace_id, dset_id, xfer_plist; + herr_t status, ret; + memtype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (memtype_id, data.size()); + filetype_id = H5Tcopy (H5T_C_S1); + status = H5Tset_size (filetype_id, data.size()); + + // Create the dataspace + dspace_id = H5Screate_simple(rank, dims, NULL); + + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); +#ifdef USEPARALLELHDF + if (flag_parallel) { + // set up the collective transfer properties list + xfer_plist = H5Pcreate(H5P_DATASET_XFER); + if (xfer_plist < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + if (flag_collective) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + else ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); + if (ret < 0) io_error(std::string("Failed to set up parallel transfer: ")+name); + // the result of above should be that all processors write to the same + // point of the hdf file. + } +#endif + // Write the data + if(H5Dwrite(dset_id, memtype_id, dspace_id, H5S_ALL, H5P_DEFAULT, data.c_str()) < 0) + io_error(std::string("Failed to write dataset: ")+name); + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) +#ifdef USEPARALLELHDF + if (flag_parallel) H5Pclose(xfer_plist); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} + +void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); +} + +void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + prop_id = H5P_DEFAULT; + // Get HDF5 data type of the array in memory + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + _set_chunks(chunks, rank, dims, +#ifdef USEPARALLELHDF + mpi_hdf_dims_tot, +#endif + flag_parallel + ); + + // Create the dataspace + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; +#ifdef USEPARALLELHDF + _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +#endif + + // Dataset creation properties + prop_id = H5P_DEFAULT; +#ifdef USEHDFCOMPRESSION + prop_id = _set_compression(rank, chunks); +#endif + + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); + H5Pclose(prop_id); + + prop_id = H5P_DEFAULT; + bool iwrite = (dims[0] > 0); +#ifdef USEPARALLELHDF + _set_mpi_dataset_properties(prop_id, iwrite, + dspace_id, memspace_id, + rank, dims, dims_offset, + flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} + +/// Write data set with hyperslab set by count and start +void H5OutputFile::write_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, bool flag_hyperslab, bool flag_collective) +{ + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + write_dataset_nd(name, rank, dims, data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, flag_hyperslab, flag_collective); +} + +///\todo need to update this to general mpi hyper slab selection +void H5OutputFile::write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id, hid_t filetype_id, + bool flag_parallel, bool flag_first_dim_parallel, + bool flag_hyperslab, bool flag_collective) +{ +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + // Get HDF5 data type of the array in memory + if (memtype_id == -1) { + throw std::runtime_error("Write data set called with void pointer but no type info passed."); + } + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + _set_chunks(chunks, rank, dims, +#ifdef USEPARALLELHDF + mpi_hdf_dims_tot, +#endif + flag_parallel + ); + + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; + // Create the dataspace +// #ifdef USEPARALLELHDF +// _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +// #endif + + // Dataset creation properties + prop_id = H5P_DEFAULT; +#ifdef USEHDFCOMPRESSION + prop_id = _set_compression(rank, chunks); +#endif + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); + H5Pclose(prop_id); + + prop_id = H5P_DEFAULT; + bool iwrite = (dims[0] > 0); + + //set hyperslab + if (!count.empty() && !start.empty()) { + herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + } + +#ifdef USEPARALLELHDF + // _set_mpi_dataset_properties(prop_id, iwrite, + // dspace_id, memspace_id, + // rank, dims, dims_offset, + // flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + // if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); +} diff --git a/HDF5Wrapper.h b/HDF5Wrapper.h new file mode 100644 index 0000000..1856563 --- /dev/null +++ b/HDF5Wrapper.h @@ -0,0 +1,726 @@ +#ifndef __HDF5WRAPPER_H +#define __HDF5WRAPPER_H + +#include "common.h" + +#ifdef USEMPI +#include +#endif + +// Overloaded function to return HDF5 type given a C type +static inline hid_t hdf5_type(float dummy) {return H5T_NATIVE_FLOAT;} +static inline hid_t hdf5_type(double dummy) {return H5T_NATIVE_DOUBLE;} +static inline hid_t hdf5_type(short dummy) {return H5T_NATIVE_SHORT;} +static inline hid_t hdf5_type(int dummy) {return H5T_NATIVE_INT;} +static inline hid_t hdf5_type(long dummy) {return H5T_NATIVE_LONG;} +static inline hid_t hdf5_type(long long dummy) {return H5T_NATIVE_LLONG;} +static inline hid_t hdf5_type(unsigned short dummy) {return H5T_NATIVE_USHORT;} +static inline hid_t hdf5_type(unsigned int dummy) {return H5T_NATIVE_UINT;} +static inline hid_t hdf5_type(unsigned long dummy) {return H5T_NATIVE_ULONG;} +static inline hid_t hdf5_type(unsigned long long dummy) {return H5T_NATIVE_ULLONG;} +static inline hid_t hdf5_type(std::string dummy) {return H5T_C_S1;} +static inline hid_t hdf5_type_from_string(std::string dummy) +{ + if (dummy == std::string("float32")) return H5T_NATIVE_FLOAT; + else if (dummy == std::string("float64")) return H5T_NATIVE_DOUBLE; + else if (dummy == std::string("int16")) return H5T_NATIVE_SHORT; + else if (dummy == std::string("int32")) return H5T_NATIVE_INT; + else if (dummy == std::string("int64")) return H5T_NATIVE_LLONG; + else if (dummy == std::string("uint16")) return H5T_NATIVE_USHORT; + else if (dummy == std::string("uint32")) return H5T_NATIVE_UINT; + else if (dummy == std::string("uint64")) return H5T_NATIVE_ULLONG; + else return H5T_C_S1; +} + +#if H5_VERSION_GE(1,10,1) +#endif + +#if H5_VERSION_GE(1,12,0) +#endif + +///\name HDF class to manage writing information +///\todo need to look into whether one can open directly with +/// full path or must open groups explicitly. If latter, updated needed +class H5OutputFile +{ + +private: + + hid_t file_id; +#ifdef USEPARALLELHDF + hid_t parallel_access_id; +#endif + +protected: + + /// size of chunks when compressing + unsigned int HDFOUTPUTCHUNKSIZE = 8192; + + /// Called if a HDF5 call fails (might need to MPI_Abort) + void io_error(std::string message) { + std::cerr << message << std::endl; +#ifdef USEMPI + MPI_Abort(MPI_COMM_WORLD, 1); +#endif + abort(); + } + + /// parallel hdf5 mpi routines +#ifdef USEPARALLELHDF + /// set the offset points for the mpi write + void _set_mpi_dim_and_offset(MPI_Comm &comm, + hsize_t rank, std::vector &dims, + std::vector &dims_single, + std::vector &dims_offset, + std::vector &mpi_hdf_dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_first_dim_parallel + ); + /// select the hyperslab + void _set_mpi_hyperslab(hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector &dims, + std::vector &mpi_hdf_dims_tot, + bool flag_parallel, bool flag_hyperslab); + /// and set the transfer properties + void _set_mpi_dataset_properties(hid_t &prop_id, bool &iwrite, + hid_t &dspace_id, hid_t &memspace_id, + hsize_t rank, std::vector dims, std::vector dims_offset, + bool flag_parallel, bool flag_collective, bool flag_hyperslab) +#endif + + /// set chunks size for a dataset + void _set_chunks(std::vector &chunks, + int rank, hsize_t *dims, + #ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, + #endif + bool flag_parallel + ); + void _set_chunks(std::vector &chunks, + std::vector &dims, +#ifdef USEPARALLELHDF + std::vector &mpi_hdf_dims_tot, +#endif + bool flag_parallel + ) + { + _set_chunks(chunks, dims.size(), dims.data(), +#ifdef USEPARALLELHDF + mpi_hdf_dims_tot, +#endif + flag_parallel + ); + } + hid_t _set_compression(int rank, std::vector &chunks); + + /// tokenize a path given an input string + std::vector _tokenize(const std::string &s); + + /// get attribute id + void _get_attribute(std::vector &ids, const std::string attr_name); + /// get attribute id from tokenized string + void _get_attribute(std::vector &ids, const std::vector &parts); + + /// wrapper for reading scalar + template void _do_read(const hid_t &attr, const hid_t &type, T &val) + { + if (hdf5_type(T{}) == H5T_C_S1) + { + _do_read_string(attr, type, val); + } + else { + H5Aread(attr, type, &val); + } + } + /// wrapper for reading string + void _do_read_string(const hid_t &attr, const hid_t &type, std::string &val); + /// wrapper for reading vector + template void _do_read_v(const hid_t &attr, const hid_t &type, std::vector &val) + { + hid_t space = H5Aget_space (attr); + int npoints = H5Sget_simple_extent_npoints(space); + val.resize(npoints); + H5Aread(attr, type, val.data()); + H5Sclose(space); + } + + /// get dataset id + void _get_dataset(std::vector &ids, const std::string dset_name); + /// get dataset id from tokenized string + void _get_dataset(std::vector &ids, const std::vector &parts); + + /// get dataset id + void _get_hdf5_id(std::vector &ids, const std::string name); + /// get attribute id from tokenized string + void _get_hdf5_id(std::vector &ids, const std::vector &parts); + +public: + + // Constructor + H5OutputFile(); + // Destructor closes the file if it's open + ~H5OutputFile(); + + /// Create a new file + void create(std::string filename, hid_t flag = H5F_ACC_TRUNC, + int taskID = -1, bool iparallelopen = true); + /// Append to a file + void append(std::string filename, hid_t flag = H5F_ACC_RDWR, + int taskID = -1, bool iparallelopen = true); + + /// Close the file + void close(); + + /// create a group + hid_t create_group(std::string groupname) { + hid_t group_id; + if (H5Lexists(group_id, groupname.c_str(), H5P_DEFAULT)) { + throw std::invalid_argument("Group "+groupname+"already present, not creating group"); + } + group_id = H5Gcreate(file_id, groupname.c_str(), + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return group_id; + } + hid_t open_group(std::string groupname) { + hid_t group_id = H5Gopen(file_id, groupname.c_str(), H5P_DEFAULT); + return group_id; + } + /// close group + herr_t close_group(hid_t gid) { + herr_t status = H5Gclose(gid); + return status; + } + + /// close path + void close_path(std::string path); + /// close hids stored in vector + void close_hdf_ids(std::vector &ids); + + /// create a dataspace + hid_t create_dataspace(const std::vector &dims) + { + hid_t dspace_id; + int rank = dims.size(); + dspace_id = H5Screate_simple(rank, dims.data(), NULL); + return dspace_id; + } + hid_t create_dataspace(int rank, hsize_t *dims) + { + hid_t dspace_id; + dspace_id = H5Screate_simple(rank, dims, NULL); + return dspace_id; + } + hid_t create_dataspace(hsize_t len) + { + int rank = 1; + hsize_t dims[1] = {len}; + hid_t dspace_id = H5Screate_simple(rank, dims, NULL); + return dspace_id; + } + /// close data space + herr_t close_dataspace(hid_t dspace_id) { + herr_t status = H5Sclose(dspace_id); + return status; + } + + /// create a dataset + hid_t create_dataset(std::string dsetname, hid_t type_id, hid_t dspace_id) + { + hid_t dset_id; + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return dset_id; + } + template hid_t create_dataset(std::string dsetname, T *data, hid_t dspace_id) + { + hid_t dset_id; + hid_t type_id = hdf5_type(T{}); + dset_id = H5Dcreate(file_id, dsetname.c_str(), type_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + return dset_id; + } + /// create data set and return hid + /// and possibly close all groups, data spaces and data set itself after creation + template hid_t create_dataset(std::string path, std::string name, T&data, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(path+"/"+name, hdf5_type(T{}), dims, chunkDims, + flag_closedataset, + flag_parallel, flag_hyperslab, flag_collective); + } + hid_t create_dataset(std::string path, std::string name, hid_t datatype, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(path+"/"+name, datatype, dims, chunkDims, + flag_closedataset, + flag_parallel, flag_hyperslab, flag_collective); + } + template hid_t create_dataset(std::string fullname, T&data, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + return create_dataset(fullname, hdf5_type(T{}), dims, chunkDims, + flag_closedataset, + flag_parallel, flag_hyperslab, flag_collective); + } + hid_t create_dataset(std::string fullname, hid_t datatype, + std::vector dims, std::vector chunkDims = std::vector(0), + bool flag_closedataset = true, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true); + + /// close data set + herr_t close_dataset(hid_t dset_id) { + herr_t status = H5Dclose(dset_id); + return status; + } + + /// find and hdf5 object + void get_hdf5_id(std::vector &ids, const std::string &name); + hid_t get_hdf5_id(std::string path, std::string name, bool closeids = true); + hid_t get_hdf5_id(std::string fullname, bool closeids = true); + + /// create a link + ///\todo could provide HDF 1.12 viable APIs for external links handling of VOL + herr_t create_link(std::string orgname, std::string linkname, bool ihard = true); + + /// writes to an existing data set + /// with a hyperslab selection defined by count, start + void write_to_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + template void write_to_dataset(std::string name, hsize_t len, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_to_dataset_nd(name, rank, dims, data, + count, start, + memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); + } + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + memtype_id = hdf5_type(T{}); + write_to_dataset_nd(name, rank, dims, (void*)data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + template void write_to_dataset_nd(std::string name, std::vector dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_to_dataset_nd(name, dims.size(), dims.data(),(void*)data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + //write dataset with hyperslab selection + void write_to_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// write 1D data sets of string or input data with defined data type + /// without hyperslab selection, creates dataset, writes to it and closes + void write_dataset(std::string name, hsize_t len, std::string data, + bool flag_parallel = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, void *data, + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + /// template function so defined here + template void write_dataset(std::string name, hsize_t len, T *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, memtype_id, filetype_id, flag_parallel, flag_hyperslab, flag_collective); + } + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, (void*)data, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); +// #ifdef USEPARALLELHDF +// MPI_Comm comm = mpi_comm_write; +// MPI_Info info = MPI_INFO_NULL; +// #endif +// hid_t dspace_id, dset_id, prop_id, memspace_id, ret; +// std::vector chunks; +// +// // Get HDF5 data type of the array in memory +// if (memtype_id == -1) memtype_id = hdf5_type(T{}); +// +// // Determine type of the dataset to create +// if(filetype_id < 0) filetype_id = memtype_id; +// +// #ifdef USEPARALLELHDF +// std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); +// _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +// #endif +// +// // Determine if going to compress data in chunks +// // Only chunk non-zero size datasets +// _set_chunks(chunks, rank, dims, +// #ifdef USEPARALLELHDF +// mpi_hdf_dims_tot, +// #endif +// flag_parallel +// ); +// +// // Create the dataspace +// #ifdef USEPARALLELHDF +// _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +// #else +// dspace_id = H5Screate_simple(rank, dims, NULL); +// memspace_id = dspace_id; +// #endif +// +// // Dataset creation properties +// prop_id = H5P_DEFAULT; +// #ifdef USEHDFCOMPRESSION +// prop_id = _set_compression(rank, chunks); +// #endif +// // Create the dataset +// dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, +// H5P_DEFAULT, prop_id, H5P_DEFAULT); +// if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); +// H5Pclose(prop_id); +// +// prop_id = H5P_DEFAULT; +// bool iwrite = (dims[0] > 0); +// #ifdef USEPARALLELHDF +// _set_mpi_dataset_properties(prop_id, iwrite, +// dspace_id, memspace_id, +// rank, dims, dims_offset, +// flag_parallel, flag_collective, flag_hyperslab); +// #endif +// if (iwrite) { +// ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); +// if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); +// } +// // Clean up (note that dtype_id is NOT a new object so don't need to close it) +// H5Pclose(prop_id); +// #ifdef USEPARALLELHDF +// if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +// #endif +// H5Sclose(dspace_id); +// H5Dclose(dset_id); + } + template void write_dataset_nd(std::string name, std::vector dims, T *data, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_dataset_nd(name, dims.size(), dims.data(), data, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + //write dataset with hyperslab selection + void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// with a hyperslab selection defined by count, start + void write_dataset(std::string name, hsize_t len, std::string data, + const std::vector& count, const std::vector& start, + bool flag_parallel = true, bool flag_collective = true); + void write_dataset(std::string name, hsize_t len, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id=-1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + /// Write a new 1D dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + /// template function so defined here + template void write_dataset(std::string name, hsize_t len, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_hyperslab = true, bool flag_collective = true) + { + int rank = 1; + hsize_t dims[1] = {len}; + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + write_dataset_nd(name, rank, dims, data, + count, start, memtype_id, filetype_id, + flag_parallel, flag_hyperslab, flag_collective); + } + + /// Write a multidimensional dataset. Data type of the new dataset is taken to be the type of + /// the input data if not explicitly specified with the filetype_id parameter. + template void write_dataset_nd(std::string name, std::vector dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { + write_dataset_nd(name, dims.size(), dims.data(), data, + count, start, + memtype_id, filetype_id, + flag_parallel, flag_first_dim_parallel, + flag_hyperslab, flag_collective); + } + template void write_dataset_nd(std::string name, int rank, hsize_t *dims, T *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id = -1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true) + { +#ifdef USEPARALLELHDF + MPI_Comm comm = mpi_comm_write; + MPI_Info info = MPI_INFO_NULL; +#endif + hid_t dspace_id, dset_id, prop_id, memspace_id, ret; + std::vector chunks(rank); + + // Get HDF5 data type of the array in memory + if (memtype_id == -1) memtype_id = hdf5_type(T{}); + + // Determine type of the dataset to create + if(filetype_id < 0) filetype_id = memtype_id; + +#ifdef USEPARALLELHDF + std::vector mpi_hdf_dims(rank*NProcsWrite), mpi_hdf_dims_tot(rank), dims_single(rank), dims_offset(rank); + // _set_mpi_dim_and_offset(comm, rank, dims, dims_single, dims_offset, mpi_hdf_dims, mpi_hdf_dims_tot, flag_parallel, flag_first_dim_parallel); +#endif + + // Determine if going to compress data in chunks + // Only chunk non-zero size datasets + _set_chunks(chunks, rank, dims, +#ifdef USEPARALLELHDF + mpi_hdf_dims_tot, +#endif + flag_parallel + ); + + // Create the dataspace +#ifdef USEPARALLELHDF + // _set_mpi_hyperslab(dspace_id, memspace_id, rank, dims, mpi_hdf_dims_tot, flag_parallel, flag_hyperslab); +#else + dspace_id = H5Screate_simple(rank, dims, NULL); + memspace_id = dspace_id; +#endif + + // Dataset creation properties + prop_id = H5P_DEFAULT; +#ifdef USEHDFCOMPRESSION + prop_id = _set_compression(rank, chunks); +#endif + // Create the dataset + dset_id = H5Dcreate(file_id, name.c_str(), filetype_id, dspace_id, + H5P_DEFAULT, prop_id, H5P_DEFAULT); + if(dset_id < 0) io_error(std::string("Failed to create dataset: ")+name); + H5Pclose(prop_id); + + prop_id = H5P_DEFAULT; + bool iwrite = (dims[0] > 0); + if (!count.empty() && !start.empty()) { + herr_t ret = H5Sselect_hyperslab(dspace_id, H5S_SELECT_SET, start.data(), NULL, count.data(), NULL); + } +#ifdef USEPARALLELHDF + // _set_mpi_dataset_properties(prop_id, iwrite, + // dspace_id, memspace_id, + // rank, dims, dims_offset, + // flag_parallel, flag_collective, flag_hyperslab); +#endif + if (iwrite) { + ret = H5Dwrite(dset_id, memtype_id, memspace_id, dspace_id, prop_id, data); + if (ret < 0) io_error(std::string("Failed to write dataset: ")+name); + } + + // Clean up (note that dtype_id is NOT a new object so don't need to close it) + H5Pclose(prop_id); +#ifdef USEPARALLELHDF + if (flag_hyperslab && flag_parallel) H5Sclose(memspace_id); +#endif + H5Sclose(dspace_id); + H5Dclose(dset_id); + } + + void write_dataset_nd(std::string name, int rank, hsize_t *dims, void *data, + const std::vector& count, const std::vector& start, + hid_t memtype_id = -1, hid_t filetype_id=-1, + bool flag_parallel = true, bool flag_first_dim_parallel = true, + bool flag_hyperslab = true, bool flag_collective = true); + + /// get a dataset with full path given by name + void get_dataset(std::vector &ids, const std::string &name); + /// check if dataset exits + bool exists_dataset(const std::string &parent, const std::string &name); + + /// get an attribute with full path given by name + void get_attribute(std::vector &ids, const std::string &name); + + /// read scalar attribute + template const T read_attribute(const std::string &name) + { + std::string attr_name; + T val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; + } + /// read vector attribute + template const std::vector read_attribute_v(const std::string &name) + { + std::string attr_name; + std::vector val; + hid_t type; + H5O_info_t object_info; + std::vector ids; + //traverse the file to get to the attribute, storing the ids of the + //groups, data spaces, etc that have been opened. + get_attribute(ids, name); + //now reverse ids and load attribute + reverse(ids.begin(),ids.end()); + //determine hdf5 type of the array in memory + type = hdf5_type(T{}); + // read the data + _do_read_v(ids[0], type, val); + H5Aclose(ids[0]); + ids.erase(ids.begin()); + //now have hdf5 ids traversed to get to desired attribute so move along to close all + //based on their object type + close_hdf_ids(ids); + return val; + } + + /// sees if attribute exits + bool exists_attribute(const std::string &parent, const std::string &name); + + /// write an attribute, not that since these are template function, define here in the header + template void write_attribute(const std::string &parent, const std::string &name, const std::vector &data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data[0]); + hsize_t size = data.size(); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SIMPLE); + hid_t dspace_extent = H5Sset_extent_simple(dspace_id, 1, &size, NULL); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.data()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } + + template void write_attribute(const std::string &parent, const std::string &name, const T &data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = hdf5_type(data); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, &data) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } + + void write_attribute(const std::string parent, const std::string &name, std::string data) + { + // Get HDF5 data type of the value to write + hid_t dtype_id = H5Tcopy(H5T_C_S1); + if (data.size() == 0) data=" "; + H5Tset_size(dtype_id, data.size()); + H5Tset_strpad(dtype_id, H5T_STR_NULLTERM); + + // Open the parent object + hid_t parent_id = H5Oopen(file_id, parent.c_str(), H5P_DEFAULT); + if(parent_id < 0)io_error(std::string("Unable to open object to write attribute: ")+name); + + // Create dataspace + hid_t dspace_id = H5Screate(H5S_SCALAR); + + // Create attribute + hid_t attr_id = H5Acreate(parent_id, name.c_str(), dtype_id, dspace_id, H5P_DEFAULT, H5P_DEFAULT); + if(attr_id < 0)io_error(std::string("Unable to create attribute ")+name+std::string(" on object ")+parent); + + // Write the attribute + if(H5Awrite(attr_id, dtype_id, data.c_str()) < 0) + io_error(std::string("Unable to write attribute ")+name+std::string(" on object ")+parent); + + // Clean up + H5Aclose(attr_id); + H5Sclose(dspace_id); + H5Oclose(parent_id); + } + +}; + +#endif diff --git a/MipMap.cc b/MipMap.cc index fea92bc..cacb4da 100644 --- a/MipMap.cc +++ b/MipMap.cc @@ -11,44 +11,55 @@ MipMap::~MipMap() { } } -void MipMap::createDataset(H5::Group group, const std::vector& chunkDims) { - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - +// void MipMap::createDataset(H5::Group group, const std::vector& chunkDims) { +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// +// std::ostringstream mipMapName; +// mipMapName << "MipMaps/DATA/DATA_XY_" << mip; +// +// if (useChunks(datasetDims)) { +// createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims); +// } else { +// createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims); +// } +// +// } + +void MipMap::createDataset(H5OutputFile &H5outputfile, std::string path, const std::vector& chunkDims) { + std::ostringstream mipMapName; - mipMapName << "MipMaps/DATA/DATA_XY_" << mip; - - if (useChunks(datasetDims)) { - createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims, chunkDims); - } else { - createHdf5Dataset(dataset, group, mipMapName.str(), floatType, datasetDims); - } + mipMapName << "/MipMaps/DATA/DATA_XY_" << mip; + std::vector chunks = chunkDims; + datasetfullpath = path+mipMapName.str(); + if (~useChunks(datasetDims)) chunks.clear(); + H5outputfile.create_dataset(datasetfullpath, H5T_NATIVE_FLOAT, datasetDims, chunks); } void MipMap::createBuffers(std::vector& bufferDims) { bufferSize = product(bufferDims); - + vals = new double[bufferSize]; count = new int[bufferSize]; - + resetBuffers(); - + this->bufferDims = bufferDims; - + auto N = bufferDims.size(); - + width = bufferDims[N - 1]; height = bufferDims[N - 2]; depth = N > 2 ? bufferDims[N - 3] : 1; - stokes = N > 3 ? bufferDims[N - 4] : 1; + stokes = N > 3 ? bufferDims[N - 4] : 1; } -void MipMap::write(hsize_t stokesOffset, hsize_t channelOffset) { +void MipMap::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) { int N = datasetDims.size(); std::vector count = trimAxes({1, depth, height, width}, N); std::vector start = trimAxes({stokesOffset, channelOffset, 0, 0}, N); - - writeHdf5Data(dataset, vals, bufferDims, count, start); + // writeHdf5Data(dataset, vals, bufferDims, count, start); + H5outputfile.write_to_dataset_nd(datasetfullpath, bufferDims, vals, count, start); } void MipMap::resetBuffers() { @@ -62,7 +73,7 @@ MipMaps::MipMaps(std::vector standardDims, const std::vector& auto dims = standardDims; int N = dims.size(); int mip = 1; - + // We keep going until we have a mipmap which fits entirely within the minimum size while (dims[N - 1] > MIN_MIPMAP_SIZE || dims[N - 2] > MIN_MIPMAP_SIZE) { mip *= 2; @@ -77,7 +88,7 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto auto datasetDims = standardDims; auto bufferDims = standardBufferDims; int N = standardDims.size(); - + while (datasetDims[N - 1] > MIN_MIPMAP_SIZE || datasetDims[N - 2] > MIN_MIPMAP_SIZE) { mip *= 2; datasetDims = mipDims(datasetDims, 2); @@ -88,11 +99,17 @@ hsize_t MipMaps::size(const std::vector& standardDims, const std::vecto return size; } -void MipMaps::createDatasets(H5::Group group) { +// void MipMaps::createDatasets(H5::Group group) { +// for (auto& mipMap : mipMaps) { +// mipMap.createDataset(group, chunkDims); +// } +// +// } + +void MipMaps::createDatasets(H5OutputFile &H5outputfile, std::string path) { for (auto& mipMap : mipMaps) { - mipMap.createDataset(group, chunkDims); + mipMap.createDataset(H5outputfile, path, chunkDims); } - } void MipMaps::createBuffers(const std::vector& standardBufferDims) { @@ -102,9 +119,9 @@ void MipMaps::createBuffers(const std::vector& standardBufferDims) { } } -void MipMaps::write(hsize_t stokesOffset, hsize_t channelOffset) { +void MipMaps::write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset) { for (auto& mipMap : mipMaps) { - mipMap.write(stokesOffset, channelOffset); + mipMap.write(H5outputfile, stokesOffset, channelOffset); } } diff --git a/MipMap.h b/MipMap.h index ee2c7a8..d949070 100644 --- a/MipMap.h +++ b/MipMap.h @@ -3,16 +3,18 @@ #include "common.h" #include "Util.h" +#include "HDF5Wrapper.h" // A single mipmap struct MipMap { MipMap() {}; MipMap(const std::vector& datasetDims, int mip); ~MipMap(); - - void createDataset(H5::Group group, const std::vector& chunkDims); + +// void createDataset(H5::Group group, const std::vector& chunkDims); + void createDataset(H5OutputFile &h5outputfile, std::string path, const std::vector& chunkDims); void createBuffers(std::vector& bufferDims); - + void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { hsize_t mipIndex = totalChannelOffset * width * height + (y / mip) * width + (x / mip); vals[mipIndex] += val; @@ -28,23 +30,25 @@ struct MipMap { } } } - - void write(hsize_t stokesOffset, hsize_t channelOffset); + + void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); - + std::vector datasetDims; int mip; - - H5::DataSet dataset; - + + // H5::DataSet dataset; + hid_t dset_id; + std::string datasetfullpath; + std::vector bufferDims; hsize_t bufferSize; - + hsize_t width; hsize_t height; hsize_t depth; hsize_t stokes; - + double* vals; int* count; }; @@ -53,13 +57,14 @@ struct MipMap { struct MipMaps { MipMaps() {}; MipMaps(std::vector standardDims, const std::vector& chunkDims); - + // We need the dataset dimensions to work out how many mipmaps we have static hsize_t size(const std::vector& standardDims, const std::vector& standardBufferDims); - - void createDatasets(H5::Group group); + +// void createDatasets(H5::Group group); + void createDatasets(H5OutputFile &H5outputfile, std::string path); void createBuffers(const std::vector& standardBufferDims); - + void accumulate(double val, hsize_t x, hsize_t y, hsize_t totalChannelOffset) { for (auto& mipMap : mipMaps) { mipMap.accumulate(val, x, y, totalChannelOffset); @@ -71,16 +76,16 @@ struct MipMaps { mipMap.calculate(); } } - + // TODO if we ever want a tiled mipmap calculation // we'll need to implement options to pass in custom buffer dims // and additional x and y offsets - void write(hsize_t stokesOffset, hsize_t channelOffset); + void write(H5OutputFile &H5outputfile, hsize_t stokesOffset, hsize_t channelOffset); void resetBuffers(); - + std::vector standardDims; std::vector chunkDims; - + std::vector mipMaps; }; diff --git a/README.md b/README.md index 9017529..bf1a1c9 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ C++ implementation of FITS to IDIA-HDF5 converter, optimised using OpenMP ## Installation -Dependencies: CFITSIO, HDF5, HDF5 C++ bindings +Dependencies: CFITSIO, HDF5 To install: diff --git a/SlowConverter.cc b/SlowConverter.cc index bbc75d9..7cd8626 100644 --- a/SlowConverter.cc +++ b/SlowConverter.cc @@ -8,22 +8,22 @@ void SlowConverter::reportMemoryUsage() { sizes["Main dataset"] = height * width * sizeof(float); sizes["Mipmaps"] = MipMaps::size(standardDims, {1, height, width}); sizes["XY stats"] = Stats::size({depth}, numBins); - + if (depth > 1) { sizes["Rotation"] = 2 * product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N)) * sizeof(float); sizes["XYZ stats"] = Stats::size({}, numBins, depth); sizes["Z stats"] = Stats::size({TILE_SIZE, TILE_SIZE}); } - + hsize_t total(0); - + std::cout << "APPROXIMATE MEMORY REQUIREMENTS:" << std::endl; - + for (auto& kv : sizes) { std::cout << kv.first << ":\t" << kv.second * 1e-9 << " GB" << std::endl; total += kv.second; } - + std::string note = ""; if (depth > 1) { total -= std::min(sizes["Main dataset"], sizes["Rotation"] + sizes["Z stats"]); @@ -37,35 +37,35 @@ void SlowConverter::copyAndCalculate() { const hsize_t channelProgressStride = std::max((hsize_t)1, (hsize_t)(depth / 100)); hsize_t numTiles = std::ceil(width / TILE_SIZE) * std::ceil(height / TILE_SIZE); const hsize_t tileProgressStride = std::max((hsize_t)1, (hsize_t)(numTiles / 100)); - + // Allocate one channel at a time, and no swizzled data hsize_t cubeSize = height * width; TIMER(timer.start("Allocate");); standardCube = new float[cubeSize]; - + // Allocate one stokes of stats at a time statsXY.createBuffers({depth}); - + if (depth > 1) { statsXYZ.createBuffers({}, depth); } - + mipMaps.createBuffers({1, height, width}); std::vector count = trimAxes({1, 1, height, width}, N); std::vector memDims = {height, width}; - + std::string timerLabelStatsMipmaps = depth > 1 ? "XY and XYZ statistics and mipmaps" : "XY statistics and mipmaps"; - + for (unsigned int s = 0; s < stokes; s++) { DEBUG(std::cout << "Processing Stokes " << s << "... " << std::endl;); PROGRESS("Stokes " << s << ":" << std::endl); - + PROGRESS("\tMain loop\t"); - + StatsCounter counterXYZ; - + for (hsize_t c = 0; c < depth; c++) { PROGRESS_DECIMATED(c, channelProgressStride, "|"); // read one channel @@ -73,80 +73,82 @@ void SlowConverter::copyAndCalculate() { DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); readFitsData(inputFilePtr, c, s, cubeSize, standardCube); - + // Write the standard dataset - + DEBUG(std::cout << " Writing main dataset..." << std::flush;); TIMER(timer.start("Write");); - + std::vector start = trimAxes({s, c, 0, 0}, N); - writeHdf5Data(standardDataSet, standardCube, memDims, count, start); - +// writeHdf5Data(standardDataSet, standardCube, memDims, count, start); + H5outputfile.write_dataset_nd(standardDataSet, + memDims.size(), memDims.data(), standardCube, count, start); + DEBUG(std::cout << " Accumulating XY stats and mipmaps..." << std::flush;); TIMER(timer.start(timerLabelStatsMipmaps);); StatsCounter counterXY; auto indexXY = c; std::function accumulate; - + auto lazy_accumulate = [&] (float val) { counterXY.accumulateFiniteLazy(val); }; - + auto first_accumulate = [&] (float val) { counterXY.accumulateFiniteLazyFirst(val); accumulate = lazy_accumulate; }; - + accumulate = first_accumulate; - + for (hsize_t y = 0; y < height; y++) { for (hsize_t x = 0; x < width; x++) { auto pos = y * width + x; // relative to channel slice auto& val = standardCube[pos]; - + if (std::isfinite(val)) { // XY statistics accumulate(val); - + // Accumulate mipmaps mipMaps.accumulate(val, x, y, 0); - + } else { counterXY.accumulateNonFinite(); } } } // end of XY loop - + // Final correction of XY min and max DEBUG(std::cout << " Final XY stats..." << std::flush;); statsXY.copyStatsFromCounter(indexXY, height * width, counterXY); - + // Accumulate XYZ statistics if (depth > 1) { DEBUG(std::cout << " Accumulating XYZ stats..." << std::flush;); statsXY.accumulateStatsToCounter(counterXYZ, indexXY); } - + // Final mipmap calculation DEBUG(std::cout << " Final mipmaps..." << std::flush;); mipMaps.calculate(); - - + + // Write the mipmaps DEBUG(std::cout << " Writing mipmaps..." << std::flush;); TIMER(timer.start("Write");); - mipMaps.write(s, c); - + mipMaps.write(H5outputfile, s, c); + // Reset mipmaps before next channel DEBUG(std::cout << " Resetting mipmap objects..." << std::endl;); TIMER(timer.start(timerLabelStatsMipmaps);); mipMaps.resetBuffers(); - + } // end of first channel loop - + PROGRESS(std::endl); - + if (depth > 1) { // Final correction of XYZ min and max DEBUG(std::cout << " Final XYZ stats..." << std::flush;); @@ -154,81 +156,81 @@ void SlowConverter::copyAndCalculate() { TIMER(timer.start(timerLabelStatsMipmaps);); statsXYZ.copyStatsFromCounter(0, depth * height * width, counterXYZ); } - + // XY and XYZ histograms // We need a second pass over all channels because we need cube min and max (and channel min and max per channel) // We do the second pass backwards to take advantage of caching DEBUG(std::cout << " Histograms..." << std::endl;); PROGRESS("\tHistograms\t"); TIMER(timer.start("Histograms");); - + double cubeMin; double cubeMax; double cubeRange; bool cubeHist(false); - + if (depth > 1) { cubeMin = statsXYZ.minVals[0]; cubeMax = statsXYZ.maxVals[0]; cubeRange = cubeMax - cubeMin; cubeHist = std::isfinite(cubeMin) && std::isfinite(cubeMax) && cubeRange > 0; } - + statsXY.clearHistogramBuffers(); statsXYZ.clearHistogramBuffers(); - + DEBUG(std::cout << "+ Will " << (cubeHist ? "" : "not ") << "calculate cube histogram." << std::endl;); - + for (hsize_t c = depth; c-- > 0; ) { DEBUG(std::cout << "+ Processing channel " << c << "... " << std::flush;); PROGRESS_DECIMATED(c, channelProgressStride, "|"); auto indexXY = c; - + double chanMin = statsXY.minVals[indexXY]; double chanMax = statsXY.maxVals[indexXY]; double chanRange = chanMax - chanMin; - + bool chanHist(std::isfinite(chanMin) && std::isfinite(chanMax) && chanRange > 0); DEBUG(std::cout << " Will " << (chanHist ? "" : "not ") << "calculate channel histogram." << std::flush;); - + if (!chanHist && !cubeHist) { continue; } - + auto doChannelHistogram = [&] (float val) { // XY histogram statsXY.accumulateHistogram(val, chanMin, chanRange, c); }; - + auto doCubeHistogram = [&] (float val) { // XYZ histogram statsXYZ.accumulateHistogram(val, cubeMin, cubeRange, 0); }; - + auto doNothing = [&] (float val) { UNUSED(val); }; - + std::function channelHistogramFunc = doChannelHistogram; std::function cubeHistogramFunc = doCubeHistogram; - + if (!chanHist) { channelHistogramFunc = doNothing; } - + if (!cubeHist) { cubeHistogramFunc = doNothing; } - + // read one channel DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); - + readFitsData(inputFilePtr, c, s, cubeSize, standardCube); DEBUG(std::cout << " Calculating histogram(s)..." << std::endl;); TIMER(timer.start("Histograms");); - + for (hsize_t p = 0; p < width * height; p++) { auto& val = standardCube[p]; if (std::isfinite(val)) { @@ -237,94 +239,94 @@ void SlowConverter::copyAndCalculate() { } } // end of XY loop } // end of second channel loop (XY and XYZ histograms) - + PROGRESS(std::endl); - + // Write the statistics TIMER(timer.start("Write");); PROGRESS("\tWrite stats & mipmaps" << std::endl); - - statsXY.write({1, depth}, {s, 0}); - + + statsXY.write(H5outputfile, {1, depth}, {s, 0}); + if (depth > 1) { - statsXYZ.write({1}, {s}); + statsXYZ.write(H5outputfile, {1}, {s}); } - + } // end of stokes - + // Free memory DEBUG(std::cout << "Freeing memory from main dataset... " << std::endl;); TIMER(timer.start("Free");); - + delete[] standardCube; - + // Swizzle if (depth > 1) { DEBUG(std::cout << "Performing tiled rotation." << std::endl;); PROGRESS("Tiled rotation & Z stats" << std::endl); TIMER(timer.start("Allocate");); - + hsize_t sliceSize = product(trimAxes({stokes, depth, TILE_SIZE, TILE_SIZE}, N)); float* standardSlice = new float[sliceSize]; float* rotatedSlice = new float[sliceSize]; - + statsZ.createBuffers({TILE_SIZE, TILE_SIZE}); - + for (unsigned int s = 0; s < stokes; s++) { DEBUG(std::cout << "Processing Stokes " << s << "..." << std::endl;); PROGRESS("\tStokes " << s << "\t"); - + hsize_t tileCount(0); - + for (hsize_t xOffset = 0; xOffset < width; xOffset += TILE_SIZE) { for (hsize_t yOffset = 0; yOffset < height; yOffset += TILE_SIZE) { tileCount++; hsize_t xSize = std::min(TILE_SIZE, width - xOffset); hsize_t ySize = std::min(TILE_SIZE, height - yOffset); - + DEBUG(std::cout << "+ Processing tile slice at " << xOffset << ", " << yOffset << "..." << std::flush;); PROGRESS_DECIMATED(tileCount, tileProgressStride, "#"); - + // read tile slice DEBUG(std::cout << " Reading main dataset..." << std::flush;); TIMER(timer.start("Read");); - + auto standardMemDims = trimAxes({1, depth, ySize, xSize}, N); auto standardCount = trimAxes({1, depth, ySize, xSize}, N); auto standardStart = trimAxes({s, 0, yOffset, xOffset}, N); - - readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart); - + +// readHdf5Data(standardDataSet, standardSlice, standardMemDims, standardCount, standardStart); + // rotate tile slice DEBUG(std::cout << " Calculating rotation..." << std::flush;); TIMER(timer.start("Rotation");); - + for (hsize_t i = 0; i < depth; i++) { for (hsize_t j = 0; j < ySize; j++) { for (hsize_t k = 0; k < xSize; k++) { auto sourceIndex = k + xSize * j + (ySize * xSize) * i; auto& val = standardSlice[sourceIndex]; - + // rotation auto destIndex = i + depth * j + (ySize * depth) * k; rotatedSlice[destIndex] = val; } } } - - // A separate pass over the same slice depth-last + + // A separate pass over the same slice depth-last DEBUG(std::cout << " Calculating Z statistics..." << std::flush;); TIMER(timer.start("Z statistics");); - + for (hsize_t j = 0; j < ySize; j++) { for (hsize_t k = 0; k < xSize; k++) { StatsCounter counterZ; auto indexZ = k + xSize * j; - + for (hsize_t i = 0; i < depth; i++) { auto sourceIndex = k + xSize * j + (ySize * xSize) * i; auto& val = standardSlice[sourceIndex]; - + if (std::isfinite(val)) { // Not lazy; too much risk of encountering an ascending / descending sequence. counterZ.accumulateFinite(val); @@ -332,29 +334,30 @@ void SlowConverter::copyAndCalculate() { counterZ.accumulateNonFinite(); } } - + statsZ.copyStatsFromCounter(indexZ, depth, counterZ); } } - + // write tile slice DEBUG(std::cout << " Writing rotated dataset..." << std::endl;); TIMER(timer.start("Write");); - + auto swizzledMemDims = trimAxes({1, xSize, ySize, depth}, N); auto swizzledCount = trimAxes({1, xSize, ySize, depth}, N); auto swizzledStart = trimAxes({s, xOffset, yOffset, 0}, N); - - writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); - + +// writeHdf5Data(swizzledDataSet, rotatedSlice, swizzledMemDims, swizzledCount, swizzledStart); + H5outputfile.write_dataset_nd(swizzledDataSet, swizzledMemDims, rotatedSlice, swizzledCount, swizzledStart); + DEBUG(std::cout << " Writing Z statistics..." << std::endl;); // write Z statistics - statsZ.write({ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset}); + statsZ.write(H5outputfile, {ySize, xSize}, {1, ySize, xSize}, {s, yOffset, xOffset}); } } PROGRESS(std::endl); } - + TIMER(timer.start("Free");); DEBUG(std::cout << "Freeing memory from main and rotated dataset slices... " << std::endl;); delete[] standardSlice; diff --git a/Stats.cc b/Stats.cc index 17ed207..ef625c0 100644 --- a/Stats.cc +++ b/Stats.cc @@ -1,6 +1,9 @@ #include "Stats.h" -Stats::Stats(const std::vector& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) {} +Stats::Stats(){}; + +Stats::Stats(const std::vector& basicDatasetDims, hsize_t numBins) : basicDatasetDims(basicDatasetDims), numBins(numBins), partialHistMultiplier(0) { +} Stats::~Stats() { if (!fullBasicBufferDims.empty()) { @@ -21,34 +24,46 @@ hsize_t Stats::size(std::vector dims, hsize_t numBins, hsize_t partialH return (2 * sizeof(float) + 2 * sizeof(double) + sizeof(int64_t)) * statsSize + sizeof(int64_t) * (statsSize * numBins + statsSize * numBins * partialHistMultiplier); } -void Stats::createDatasets(H5::Group group, std::string name) { - H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); - floatType.setOrder(H5T_ORDER_LE); - - H5::IntType intType(H5::PredType::NATIVE_INT64); - intType.setOrder(H5T_ORDER_LE); - - createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims); - createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims); - createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims); - createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims); - createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims); - +// void Stats::createDatasets(H5::Group group, std::string name) { +void Stats::createDatasets(H5OutputFile &H5outputfile, std::string pathname, std::string name) { + +// H5::FloatType floatType(H5::PredType::NATIVE_FLOAT); +// floatType.setOrder(H5T_ORDER_LE); +// +// H5::IntType intType(H5::PredType::NATIVE_INT64); +// intType.setOrder(H5T_ORDER_LE); +// +// createHdf5Dataset(minDset, group, "Statistics/" + name + "/MIN", floatType, basicDatasetDims); +// createHdf5Dataset(maxDset, group, "Statistics/" + name + "/MAX", floatType, basicDatasetDims); +// createHdf5Dataset(sumDset, group, "Statistics/" + name + "/SUM", floatType, basicDatasetDims); +// createHdf5Dataset(ssqDset, group, "Statistics/" + name + "/SUM_SQ", floatType, basicDatasetDims); +// createHdf5Dataset(nanDset, group, "Statistics/" + name + "/NAN_COUNT", intType, basicDatasetDims); +// +// if (numBins) { +// createHdf5Dataset(histDset, group, "Statistics/" + name + "/HISTOGRAM", intType, extend(basicDatasetDims, {numBins})); +// } + + for (auto i = 0; i dims, hsize_t partialHistMultiplier) { fullBasicBufferDims = dims; auto statsSize = product(dims); - + minVals = new float[statsSize]; maxVals = new float[statsSize]; sums = new double[statsSize]; sumsSq = new double[statsSize]; nanCounts = new int64_t[statsSize]; - + if (numBins) { histograms = new int64_t[statsSize * numBins]; partialHistograms = new int64_t[statsSize * numBins * partialHistMultiplier]; @@ -62,36 +77,45 @@ void Stats::clearHistogramBuffers() { memset(partialHistograms, 0, sizeof(int64_t) * statsSize * numBins * partialHistMultiplier); } -void Stats::write() { - writeBasic(fullBasicBufferDims); - +void Stats::write(H5OutputFile &H5outputfile) { + writeBasic(H5outputfile, fullBasicBufferDims); + if (numBins) { - writeHistogram(fullBasicBufferDims); + writeHistogram(H5outputfile, fullBasicBufferDims); } } -void Stats::write(const std::vector& count, const std::vector& start) { - write(fullBasicBufferDims, count, start); +void Stats::write(H5OutputFile &H5outputfile, const std::vector& count, const std::vector& start) { + write(H5outputfile, fullBasicBufferDims, count, start); } -void Stats::write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::write(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { auto basicN = basicDatasetDims.size(); - writeBasic(basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN)); - + writeBasic(H5outputfile, basicBufferDims, trimAxes(count, basicN), trimAxes(start, basicN)); + if (numBins) { auto histN = basicN + 1; - writeHistogram(basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN)); + writeHistogram(H5outputfile, basicBufferDims, trimAxes(extend(count, {numBins}), histN), trimAxes(extend(start, {0}), histN)); } } - -void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { - writeHdf5Data(minDset, minVals, basicBufferDims, count, start); - writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start); - writeHdf5Data(sumDset, sums, basicBufferDims, count, start); - writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); - writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); + +// void Stats::writeBasic(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeBasic(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) +{ +// writeHdf5Data(minDset, minVals, basicBufferDims, count, start); +// writeHdf5Data(maxDset, maxVals, basicBufferDims, count, start); +// writeHdf5Data(sumDset, sums, basicBufferDims, count, start); +// writeHdf5Data(ssqDset, sumsSq, basicBufferDims, count, start); +// writeHdf5Data(nanDset, nanCounts, basicBufferDims, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[0], basicBufferDims, minVals, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[1], basicBufferDims, maxVals, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[2], basicBufferDims, sums, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[3], basicBufferDims, sumsSq, count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[4], basicBufferDims, nanCounts, count, start); } -void Stats::writeHistogram(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { - writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); +// void Stats::writeHistogram(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +void Stats::writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start) { +// writeHdf5Data(histDset, histograms, extend(basicBufferDims, {numBins}), count, start); + H5outputfile.write_to_dataset_nd(datasetfullnames[5], extend(basicBufferDims, {numBins}), histograms, count, start); } diff --git a/Stats.h b/Stats.h index 50abda2..a32fc65 100644 --- a/Stats.h +++ b/Stats.h @@ -3,11 +3,12 @@ #include "common.h" #include "Util.h" +#include "HDF5Wrapper.h" struct StatsCounter { StatsCounter() : minVal(std::numeric_limits::max()), maxVal(-std::numeric_limits::max()), sum(0), sumSq(0), nanCount(0) { } - + void accumulateFinite(float val) { minVal = fmin(minVal, val); maxVal = fmax(maxVal, val); @@ -44,18 +45,19 @@ struct StatsCounter { }; struct Stats { - Stats() {} + Stats(); Stats(const std::vector& basicDatasetDims, hsize_t numBins = 0); ~Stats(); - + static hsize_t size(std::vector dims, hsize_t numBins = 0, hsize_t partialHistMultiplier = 0); - + // Setup - void createDatasets(H5::Group group, std::string name); + //void createDatasets(H5::Group group, std::string name); + void createDatasets(H5OutputFile &H5outputfile, std::string path, std::string name); void createBuffers(std::vector dims, hsize_t partialHistMultiplier = 0); - + // Basic stats - + void accumulateStatsToCounter(StatsCounter& counter, hsize_t index) { if (std::isfinite(maxVals[index])) { counter.sum += sums[index]; @@ -65,7 +67,7 @@ struct Stats { } counter.nanCount += nanCounts[index]; } - + void copyStatsFromCounter(hsize_t index, hsize_t totalVals, const StatsCounter& counter) { if ((hsize_t)counter.nanCount == totalVals) { minVals[index] = NAN; @@ -78,11 +80,11 @@ struct Stats { sumsSq[index] = counter.sumSq; nanCounts[index] = counter.nanCount; } - + // Histograms // Histograms - + void clearHistogramBuffers(); void accumulateHistogram(float val, double min, double range, hsize_t offset) { @@ -102,39 +104,46 @@ struct Stats { } } } - + // Writing - void write(); - void write(const std::vector& count, const std::vector& start); - void write(const std::vector& basicBufferDims, const std::vector& count, const std::vector& start); - void writeBasic(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - void writeHistogram(const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - + void write(H5OutputFile &H5outputfile); + void write(H5OutputFile &H5outputfile, const std::vector& count, const std::vector& start); + void write(H5OutputFile &H5outputfile, const std::vector& basicBufferDims, const std::vector& count, const std::vector& start); + void writeBasic(H5OutputFile &H5outputfile, const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + void writeHistogram(H5OutputFile &H5outputfile, const std::vector& basicBufferDims = EMPTY_DIMS, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + // Dataset dimensions std::vector basicDatasetDims; hsize_t numBins; - + // Datasets - H5::DataSet minDset; - H5::DataSet maxDset; - H5::DataSet sumDset; - H5::DataSet ssqDset; - H5::DataSet nanDset; - - H5::DataSet histDset; - + std::vector datasetnames = {"/MIN", "/MAX", "/SUM", "/SUM_SQ", "/NAN_COUNT"}; + std::vector datasettypes = {H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_FLOAT, H5T_NATIVE_LLONG}; + std::vector datasetids; + std::vector datasetfullnames; + + hid_t minDset; + hid_t maxDset; + hid_t sumDset; + hid_t ssqDset; + hid_t nanDset; + + hid_t histDset; + + // Buffer dimensions - + std::vector fullBasicBufferDims; hsize_t partialHistMultiplier; // Buffers + //why not change this to vectors ??? float* minVals; float* maxVals; double* sums; double* sumsSq; int64_t* nanCounts; - + int64_t* histograms; int64_t* partialHistograms; }; diff --git a/UI.h b/UI.h new file mode 100644 index 0000000..2926b9e --- /dev/null +++ b/UI.h @@ -0,0 +1,86 @@ +#ifndef __UI_H +#define __UI_H + +#include +#include "common.h" + +bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) { + extern int optind; + extern char *optarg; + + int opt; + bool err(false); + + std::ostringstream usage; + usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION + << " using IDIA schema version " << SCHEMA_VERSION << std::endl + << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl + << "Options:" << std::endl + << "-o\tOutput filename" << std::endl + << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl + << "-p\tPrint progress output (by default the program is silent)" << std::endl + << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl + << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl; + + while ((opt = getopt(argc, argv, ":o:spqm")) != -1) { + switch (opt) { + case 'o': + outputFileName.assign(optarg); + break; + case 's': + // use slower but less memory-intensive method + slow = true; + break; + case 'p': + progress = true; + break; + case 'q': + std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl; + break; + case 'm': + // only print memory usage and exit + onlyReportMemory = true; + break; + case ':': + err = true; + std::cerr << "Missing argument for option " << opt << "." << std::endl; + break; + case '?': + err = true; + std::cerr << "Unknown option " << opt << "." << std::endl; + break; + } + } + + if (optind >= argc) { + err = true; + std::cerr << "Missing input filename parameter." << std::endl; + } else { + inputFileName.assign(argv[optind]); + optind++; + } + + if (argc > optind) { + err = true; + std::cerr << "Unexpected additional parameters." << std::endl; + } + + if (err) { + std::cerr << std::endl << usage.str() << std::endl; + return false; + } + + if (outputFileName.empty()) { + auto fitsIndex = inputFileName.find_last_of(".fits"); + if (fitsIndex != std::string::npos) { + outputFileName = inputFileName.substr(0, fitsIndex - 4); + outputFileName += ".hdf5"; + } else { + outputFileName = inputFileName + ".hdf5"; + } + } + + return true; +} + +#endif diff --git a/Util.cc b/Util.cc index d5f42b5..2197ee5 100644 --- a/Util.cc +++ b/Util.cc @@ -145,95 +145,95 @@ void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize } // Only available in C++ API from 1.10.1 -bool hdf5Exists(H5::H5Location& location, const std::string& name) { - return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0; -} - -void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims) { - auto splitPath = split(path, '/'); - - auto name = splitPath.back(); - splitPath.pop_back(); - - for (auto& groupname : splitPath) { - if (!hdf5Exists(group, groupname)) { - group = group.createGroup(groupname); - } else { - group = group.openGroup(groupname); - } - } - - H5::DSetCreatPropList propList; - if (!chunkDims.empty()) { - propList.setChunk(chunkDims.size(), chunkDims.data()); - } - - auto dataSpace = H5::DataSpace(dims.size(), dims.data()); - dataset = group.createDataSet(name, dataType, dataSpace, propList); -} - -void writeHdf5Attribute(H5::Group group, std::string name, std::string value) { - H5::StrType strType(H5::PredType::C_S1, 256); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, strType, dataSpace); - attribute.write(strType, value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) { - H5::IntType intType(H5::PredType::NATIVE_INT64); - intType.setOrder(H5T_ORDER_LE); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, intType, dataSpace); - attribute.write(intType, &value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, double value) { - H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE); - doubleType.setOrder(H5T_ORDER_LE); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, doubleType, dataSpace); - attribute.write(doubleType, &value); -} - -void writeHdf5Attribute(H5::Group group, std::string name, bool value) { - H5::IntType boolType(H5::PredType::NATIVE_HBOOL); - H5::DataSpace dataSpace(H5S_SCALAR); - auto attribute = group.createAttribute(name, boolType, dataSpace); - attribute.write(boolType, &value); -} - -void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); -} - -void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace); -} - -void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace); -} - -void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { - H5::DataSpace memSpace(dims.size(), dims.data()); - auto fileSpace = dataset.getSpace(); - if (!count.empty() && !start.empty()) { - fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); - } - dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); -} +// bool hdf5Exists(H5::H5Location& location, const std::string& name) { +// return H5Lexists(location.getId(), name.c_str(), H5P_DEFAULT) > 0; +// } +// +// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims) { +// auto splitPath = split(path, '/'); +// +// auto name = splitPath.back(); +// splitPath.pop_back(); +// +// for (auto& groupname : splitPath) { +// if (!hdf5Exists(group, groupname)) { +// group = group.createGroup(groupname); +// } else { +// group = group.openGroup(groupname); +// } +// } +// +// H5::DSetCreatPropList propList; +// if (!chunkDims.empty()) { +// propList.setChunk(chunkDims.size(), chunkDims.data()); +// } +// +// auto dataSpace = H5::DataSpace(dims.size(), dims.data()); +// dataset = group.createDataSet(name, dataType, dataSpace, propList); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, std::string value) { +// H5::StrType strType(H5::PredType::C_S1, 256); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, strType, dataSpace); +// attribute.write(strType, value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value) { +// H5::IntType intType(H5::PredType::NATIVE_INT64); +// intType.setOrder(H5T_ORDER_LE); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, intType, dataSpace); +// attribute.write(intType, &value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, double value) { +// H5::FloatType doubleType(H5::PredType::NATIVE_DOUBLE); +// doubleType.setOrder(H5T_ORDER_LE); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, doubleType, dataSpace); +// attribute.write(doubleType, &value); +// } +// +// void writeHdf5Attribute(H5::Group group, std::string name, bool value) { +// H5::IntType boolType(H5::PredType::NATIVE_HBOOL); +// H5::DataSpace dataSpace(H5S_SCALAR); +// auto attribute = group.createAttribute(name, boolType, dataSpace); +// attribute.write(boolType, &value); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_DOUBLE, memSpace, fileSpace); +// } +// +// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.write(data, H5::PredType::NATIVE_INT64, memSpace, fileSpace); +// } +// +// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count, const std::vector& start) { +// H5::DataSpace memSpace(dims.size(), dims.data()); +// auto fileSpace = dataset.getSpace(); +// if (!count.empty() && !start.empty()) { +// fileSpace.selectHyperslab(H5S_SELECT_SET, count.data(), start.data()); +// } +// dataset.read(data, H5::PredType::NATIVE_FLOAT, memSpace, fileSpace); +// } diff --git a/Util.h b/Util.h index dbd119a..51f7773 100644 --- a/Util.h +++ b/Util.h @@ -29,17 +29,17 @@ void readFitsStringAttribute(fitsfile* filePtr, const std::string& name, std::st void readFitsData(fitsfile* filePtr, hsize_t channel, unsigned int stokes, hsize_t size, float* destination); // Only available in C++ API from 1.10.1 -bool hdf5Exists(H5::H5Location& location, const std::string& name); -void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims = EMPTY_DIMS); -void writeHdf5Attribute(H5::Group group, std::string name, std::string value); -void writeHdf5Attribute(H5::Group group, std::string name, int64_t value); -void writeHdf5Attribute(H5::Group group, std::string name, double value); -void writeHdf5Attribute(H5::Group group, std::string name, bool value); - -void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); -void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); -void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); - -void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// bool hdf5Exists(H5::H5Location& location, const std::string& name); +// void createHdf5Dataset(H5::DataSet& dataset, H5::Group group, std::string path, H5::DataType dataType, std::vector dims, const std::vector& chunkDims = EMPTY_DIMS); +// void writeHdf5Attribute(H5::Group group, std::string name, std::string value); +// void writeHdf5Attribute(H5::Group group, std::string name, int64_t value); +// void writeHdf5Attribute(H5::Group group, std::string name, double value); +// void writeHdf5Attribute(H5::Group group, std::string name, bool value); + +// void writeHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// void writeHdf5Data(H5::DataSet& dataset, double* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); +// void writeHdf5Data(H5::DataSet& dataset, int64_t* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); + +// void readHdf5Data(H5::DataSet& dataset, float* data, const std::vector& dims, const std::vector& count = EMPTY_DIMS, const std::vector& start = EMPTY_DIMS); #endif diff --git a/cmake/FindCFITSIO.cmake b/cmake/FindCFITSIO.cmake new file mode 100644 index 0000000..a749701 --- /dev/null +++ b/cmake/FindCFITSIO.cmake @@ -0,0 +1,74 @@ +# - Try to find CFITSIO. +# Variables used by this module: +# CFITSIO_ROOT_DIR - CFITSIO root directory +# Variables defined by this module: +# CFITSIO_FOUND - system has CFITSIO +# CFITSIO_INCLUDE_DIR - the CFITSIO include directory (cached) +# CFITSIO_INCLUDE_DIRS - the CFITSIO include directories +# (identical to CFITSIO_INCLUDE_DIR) +# CFITSIO_LIBRARY - the CFITSIO library (cached) +# CFITSIO_LIBRARIES - the CFITSIO libraries +# (identical to CFITSIO_LIBRARY) +# CFITSIO_VERSION_STRING the found version of CFITSIO, padded to 3 digits + +# Copyright (C) 2009 +# ASTRON (Netherlands Institute for Radio Astronomy) +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This file is part of the LOFAR software suite. +# The LOFAR software suite is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# The LOFAR software suite is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with the LOFAR software suite. If not, see . +# +# $Id$ + +if(NOT CFITSIO_FOUND) + + find_path(CFITSIO_INCLUDE_DIR fitsio.h + HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES include include/cfitsio + include/libcfitsio0) + + if(CFITSIO_INCLUDE_DIR) + FILE(READ "${CFITSIO_INCLUDE_DIR}/fitsio.h" CFITSIO_H) + set(CFITSIO_VERSION_REGEX ".*#define CFITSIO_VERSION[^0-9]*([0-9]+)\\.([0-9]+).*") + if ("${CFITSIO_H}" MATCHES ${CFITSIO_VERSION_REGEX}) + # Pad CFITSIO minor version to three digit because 3.181 is older than 3.35 + STRING(REGEX REPLACE ${CFITSIO_VERSION_REGEX} + "\\1.\\200" CFITSIO_VERSION_STRING "${CFITSIO_H}") + STRING(SUBSTRING ${CFITSIO_VERSION_STRING} 0 5 CFITSIO_VERSION_STRING) + STRING(REGEX REPLACE "^([0-9]+)[.]([0-9]+)" "\\1" CFITSIO_VERSION_MAJOR ${CFITSIO_VERSION_STRING}) + # CFITSIO_VERSION_MINOR will contain 80 for 3.08, 181 for 3.181 and 200 for 3.2 + STRING(REGEX REPLACE "^([0-9]+)[.]0*([0-9]+)" "\\2" CFITSIO_VERSION_MINOR ${CFITSIO_VERSION_STRING}) + else () + set(CFITSIO_VERSION_STRING "Unknown") + endif() + endif(CFITSIO_INCLUDE_DIR) + + find_library(CFITSIO_LIBRARY cfitsio + HINTS ${CFITSIO_ROOT_DIR} PATH_SUFFIXES lib) + find_library(M_LIBRARY m) + mark_as_advanced(CFITSIO_INCLUDE_DIR CFITSIO_LIBRARY M_LIBRARY) + + if(CMAKE_VERSION VERSION_LESS "2.8.3") + find_package_handle_standard_args(CFITSIO DEFAULT_MSG + CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR) + else () + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(CFITSIO + REQUIRED_VARS CFITSIO_LIBRARY M_LIBRARY CFITSIO_INCLUDE_DIR + VERSION_VAR CFITSIO_VERSION_STRING) + endif () + + set(CFITSIO_INCLUDE_DIRS ${CFITSIO_INCLUDE_DIR}) + set(CFITSIO_LIBRARIES ${CFITSIO_LIBRARY} ${M_LIBRARY}) + +endif(NOT CFITSIO_FOUND) diff --git a/common.h b/common.h index dfc21d6..3cc0b2f 100644 --- a/common.h +++ b/common.h @@ -14,7 +14,8 @@ #include #include -#include +// #include +#include #include #define SCHEMA_VERSION "0.3" diff --git a/main.cc b/main.cc index f0ac7a6..d816679 100644 --- a/main.cc +++ b/main.cc @@ -1,106 +1,27 @@ -#include +#include "UI.h" #include "Converter.h" -bool getOptions(int argc, char** argv, std::string& inputFileName, std::string& outputFileName, bool& slow, bool& progress, bool& onlyReportMemory) { - extern int optind; - extern char *optarg; - - int opt; - bool err(false); - - std::ostringstream usage; - usage << "IDIA FITS to HDF5 converter version " << HDF5_CONVERTER_VERSION - << " using IDIA schema version " << SCHEMA_VERSION << std::endl - << "Usage: hdf_convert [-o output_filename] [-s] [-p] [-m] input_filename" << std::endl << std::endl - << "Options:" << std::endl - << "-o\tOutput filename" << std::endl - << "-s\tUse slower but less memory-intensive method (enable if memory allocation fails)" << std::endl - << "-p\tPrint progress output (by default the program is silent)" << std::endl - << "-m\tReport predicted memory usage and exit without performing the conversion" << std::endl - << "-q\tSuppress all non-error output. Deprecated; this is now the default." << std::endl; - - while ((opt = getopt(argc, argv, ":o:spqm")) != -1) { - switch (opt) { - case 'o': - outputFileName.assign(optarg); - break; - case 's': - // use slower but less memory-intensive method - slow = true; - break; - case 'p': - progress = true; - break; - case 'q': - std::cerr << "The -q flag is deprecated. The converter is quiet by default." << std::endl; - break; - case 'm': - // only print memory usage and exit - onlyReportMemory = true; - break; - case ':': - err = true; - std::cerr << "Missing argument for option " << opt << "." << std::endl; - break; - case '?': - err = true; - std::cerr << "Unknown option " << opt << "." << std::endl; - break; - } - } - - if (optind >= argc) { - err = true; - std::cerr << "Missing input filename parameter." << std::endl; - } else { - inputFileName.assign(argv[optind]); - optind++; - } - - if (argc > optind) { - err = true; - std::cerr << "Unexpected additional parameters." << std::endl; - } - - if (err) { - std::cerr << std::endl << usage.str() << std::endl; - return false; - } - - if (outputFileName.empty()) { - auto fitsIndex = inputFileName.find_last_of(".fits"); - if (fitsIndex != std::string::npos) { - outputFileName = inputFileName.substr(0, fitsIndex - 4); - outputFileName += ".hdf5"; - } else { - outputFileName = inputFileName + ".hdf5"; - } - } - - return true; -} - int main(int argc, char** argv) { std::string inputFileName; std::string outputFileName; bool slow(false); bool progress(false); bool onlyReportMemory(false); - + if (!getOptions(argc, argv, inputFileName, outputFileName, slow, progress, onlyReportMemory)) { return 1; } - + std::unique_ptr converter; - + try { converter = Converter::getConverter(inputFileName, outputFileName, slow, progress); - + if (onlyReportMemory) { converter->reportMemoryUsage(); return 0; } - + DEBUG(std::cout << "Converting FITS file " << inputFileName << " to HDF5 file " << outputFileName << (slow ? " using slower, memory-efficient method" : "") << std::endl;); converter->convert();