diff --git a/src/coreComponents/common/initializeEnvironment.hpp b/src/coreComponents/common/initializeEnvironment.hpp index f01c1e7df2b..d268fa1b70a 100644 --- a/src/coreComponents/common/initializeEnvironment.hpp +++ b/src/coreComponents/common/initializeEnvironment.hpp @@ -93,6 +93,9 @@ struct CommandLineOptions /// Print memory usage in data repository real64 printMemoryUsage = -1.0; + + /// Set the archiving level + integer archiveInputDeck = 1; }; /** diff --git a/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp b/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp index 21c80c890a8..e0e6aef1b63 100644 --- a/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp +++ b/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp @@ -18,6 +18,9 @@ #include "dataRepository/xmlWrapper.hpp" #include "common/format/EnumStrings.hpp" +#include +#include + using namespace geos; TEST( testXmlWrapper, array3d_errors ) @@ -501,6 +504,214 @@ TEST( testXmlWrapper, testGroupNamesArrayFormats ) } } +class CollectIncludedTest : public ::testing::Test +{ +protected: + std::filesystem::path m_tempDir; + + void SetUp() override + { + m_tempDir = std::filesystem::temp_directory_path() / "geos_collectIncluded_test"; + std::filesystem::create_directories( m_tempDir ); + } + + void TearDown() override + { + std::filesystem::remove_all( m_tempDir ); + } + + string filePath( string const & filename ) + { + return ( m_tempDir / filename ).string(); + } + + void writeXML( string const & filename, string const & content ) + { + std::ofstream f( filePath( filename ) ); + f << content; + } +}; + + +TEST_F( CollectIncludedTest, collectIncluded_noIncludes ) +{ + writeXML( "base.xml", "" + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_TRUE( result.empty() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_singleInclude ) +{ + writeXML( "child.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_multipleIncludes ) +{ + writeXML( "child1.xml", "" + "" ); + writeXML( "child2.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child1.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child2.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_emptyNameAttribute ) +{ + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + std::set< string > result; + + EXPECT_ANY_THROW( xmlWrapper::collectIncluded( filePath( "base.xml" ), result ) ); + + EXPECT_TRUE( result.empty() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_existingEntriesKept ) +{ + std::set< string > existingCollection; + existingCollection.insert( "/somewhere/thereisanalreadyexistingxmlfile.xml" ); + writeXML( "base.xml", "" + "" ); + + xmlWrapper::collectIncluded( filePath( "base.xml" ), existingCollection ); + + EXPECT_NE( existingCollection.find( "/somewhere/thereisanalreadyexistingxmlfile.xml" ), + existingCollection.end() ); +} + + +TEST_F( CollectIncludedTest, collectIncludedRecursive_noIncludes ) +{ + writeXML( "base.xml", "" + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_EQ( result.size(), 1 ); // size 1 because collectIncludedRecursive collects the base file +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_singleInclude ) +{ + writeXML( "child.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_multipleIncludes ) +{ + writeXML( "child1.xml", "" + "" ); + writeXML( "child2.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child1.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child2.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_simpleRecursive ) +{ + writeXML( "child.xml", "" + "" ); + + writeXML( "middle.xml", "" + " " + " " + " " + "" ); + + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "middle.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_cyclePrevention ) +{ + writeXML( "cycle.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "cycle.xml" ) ); + + EXPECT_NE( result.find( filePath( "cycle.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_noDuplicates ) +{ + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_EQ( result.size(), 1 ); // collectIncludedRecursive collects the base file +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_existingEntriesKept ) +{ + std::set< string > existingCollection; + existingCollection.insert( "/somewhere/thereisanalreadyexistingxmlfile.xml" ); + writeXML( "base.xml", "" + "" ); + + xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ), existingCollection ); + + EXPECT_NE( existingCollection.find( "/somewhere/thereisanalreadyexistingxmlfile.xml" ), + existingCollection.end() ); +} + int main( int argc, char * argv[] ) { diff --git a/src/coreComponents/dataRepository/xmlWrapper.cpp b/src/coreComponents/dataRepository/xmlWrapper.cpp index 0c5a469a570..46e502a90cf 100644 --- a/src/coreComponents/dataRepository/xmlWrapper.cpp +++ b/src/coreComponents/dataRepository/xmlWrapper.cpp @@ -24,6 +24,7 @@ #include "common/format/StringUtilities.hpp" #include "common/MpiWrapper.hpp" #include "dataRepository/KeyNames.hpp" +#include "common/Path.hpp" namespace geos { @@ -268,6 +269,91 @@ string buildMultipleInputXML( string_array const & inputFileList, return inputFileName; } +void collectIncluded( string const & filePath, + std::set< string > & collection ) +{ + xmlDocument doc; + xmlResult result = doc.loadFile( filePath ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", filePath, result.description() ), + InputError ); + xmlNode rootNode = doc.getFirstChild(); + + string const currentDir = splitPath( filePath ).first; + + for( auto & includedNode : rootNode.children( includedListTag ) ) + { + for( auto & fileNode : includedNode.children( includedFileTag ) ) + { + string const fileName = fileNode.attribute( "name" ).value(); + + GEOS_THROW_IF( fileName.empty(), + GEOS_FMT( "An included file entry in '{}' has an empty or missing 'name' attribute.", filePath ), + InputError ); + + string absolutePath = isAbsolutePath( fileName ) + ? getAbsolutePath( fileName ) + : getAbsolutePath( joinPath( currentDir, fileName ) ); + collection.insert( absolutePath ); + } + } +} + +std::set< string > collectIncluded( string const & filePath ) +{ + std::set< string > collection; + collectIncluded( filePath, collection ); + return collection; +} + +void collectIncludedRecursive( string const & filePath, + std::set< string > & collection ) +{ + // We want absolute paths + string const absFilePath = getAbsolutePath( filePath ); + + if( collection.count( absFilePath ) > 0 ) + { + return; + } + collection.insert( absFilePath ); + + xmlDocument doc; + xmlResult result = doc.loadFile( absFilePath ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", filePath, result.description() ), + InputError ); + xmlNode rootNode = doc.getFirstChild(); + + string const currentDir = splitPath( filePath ).first; + + for( auto & includedNode : rootNode.children( includedListTag ) ) + { + for( auto & fileNode : includedNode.children( includedFileTag ) ) + { + string const includedFilePath = fileNode.attribute( "name" ).value(); + + if( includedFilePath.empty() ) + { + continue; + } + + string includedAbsPath = isAbsolutePath( includedFilePath ) + ? getAbsolutePath( includedFilePath ) + : getAbsolutePath( joinPath( currentDir, includedFilePath ) ); + collectIncludedRecursive( includedAbsPath, + collection ); + } + } +} + +std::set< string > collectIncludedRecursive( string const & filePath ) +{ + std::set< string > collection; + collectIncludedRecursive( filePath, collection ); + return collection; +} + bool isFileMetadataAttribute( string const & name ) { static const std::set< string > fileMetadataAttributes { diff --git a/src/coreComponents/dataRepository/xmlWrapper.hpp b/src/coreComponents/dataRepository/xmlWrapper.hpp index 494eb2db119..cbd04536b5d 100644 --- a/src/coreComponents/dataRepository/xmlWrapper.hpp +++ b/src/coreComponents/dataRepository/xmlWrapper.hpp @@ -301,6 +301,50 @@ constexpr char const includedFileTag[] = "File"; string buildMultipleInputXML( string_array const & inputFileList, string const & outputDir = {} ); +/** + * @brief Collect the absolute paths of XML files directly included + * by a given xml file + * @param[in] filePath absolute path of the xml file to inspect + * @param[inout] collection collection to append with absolute file paths + * + * Only one level of inclusion is collected (files included by the included + * files are not added). See collectIncludedRecursive if you want this behavior. + * Duplicate entries are not inserted in @p collection + */ +void collectIncluded( string const & filePath, + std::set< string > & collection ); + +/** + * @brief Collect the absolute paths of XML files directly included + * by a given xml file + * @param[in] filePath absolute path of the xml file to inspect + * @return a collection of absolute paths + * + * Only one level of inclusion is collected (files included by the included + * files are not added). See collectIncludedRecursive if you want this behavior. + * Duplicate entries are not inserted in @p collection + */ +std::set< string > collectIncluded( string const & filePath ); + +/** + * @brief Recursively collect the absolute paths of an XML file and all XML + * files it includes + * @param[in] filePath absolute path of the root XML file + * @param[inout] collection collection to append with absolute file paths + * of every visited file (including @p filePath itself) + */ +void collectIncludedRecursive( string const & filePath, + std::set< string > & collection ); + +/** + * @brief Recursively collect the absolute paths of an XML file and all XML + * files it includes + * @param[in] filePath absolute path of the root XML file + * @return a collection of absolute paths of every visited file (including + * @p filePath itself) + */ +std::set< string > collectIncludedRecursive( string const & filePath ); + /** * @return true if the attribute with the specified name declares metadata relative to the xml * @param name the name of an attribute diff --git a/src/coreComponents/fileIO/CMakeLists.txt b/src/coreComponents/fileIO/CMakeLists.txt index eedca99b65c..88177d8a2fe 100644 --- a/src/coreComponents/fileIO/CMakeLists.txt +++ b/src/coreComponents/fileIO/CMakeLists.txt @@ -24,6 +24,7 @@ Contains: # set( fileIO_headers LogLevelsInfo.hpp + Outputs/ArchiveInputDeck.hpp Outputs/BlueprintOutput.hpp Outputs/MemoryStatsOutput.hpp Outputs/OutputBase.hpp @@ -43,6 +44,7 @@ set( fileIO_headers # Specify all sources # set( fileIO_sources + Outputs/ArchiveInputDeck.cpp Outputs/BlueprintOutput.cpp Outputs/MemoryStatsOutput.cpp Outputs/OutputBase.cpp diff --git a/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp new file mode 100644 index 00000000000..0de455d0fc4 --- /dev/null +++ b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp @@ -0,0 +1,207 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (c) 2016-2024 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2024 TotalEnergies + * Copyright (c) 2018-2024 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2023-2024 Chevron + * Copyright (c) 2019- GEOS/GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +/** + * @file ArchiveInputDeck.cpp + */ + +#include "ArchiveInputDeck.hpp" + +#include "common/Path.hpp" +#include "common/format/Format.hpp" +#include "common/logger/Logger.hpp" +#include "dataRepository/xmlWrapper.hpp" + +#include +#include + +namespace geos +{ + +using namespace dataRepository; + +namespace archiveInputDeck +{ + +namespace +{ + +string makeTimestamp() +{ + auto const now = std::chrono::system_clock::now(); + auto const time_t_now = std::chrono::system_clock::to_time_t( now ); + std::ostringstream timestampStream; + timestampStream << std::put_time( std::localtime( &time_t_now ), "%Y%m%d_%H%M%S" ); + return timestampStream.str(); +} + +void stripMetadataAttributes( xmlWrapper::xmlNode node ) +{ + node.remove_attribute( xmlWrapper::filePathString ); + node.remove_attribute( xmlWrapper::charOffsetString ); + + for( xmlWrapper::xmlNode child : node.children() ) + { + stripMetadataAttributes( child ); + } +} + +void reorderTags( xmlWrapper::xmlNode rootNode, string_array const & tagOrder ) +{ + xmlWrapper::xmlNode lastInserted; + for( string const & tagName : tagOrder ) + { + xmlWrapper::xmlNode tag = rootNode.child( tagName.c_str() ); + if( !tag ) + { + continue; + } + + lastInserted ? rootNode.insert_move_after( tag, lastInserted ) + : rootNode.append_move( tag ); + + lastInserted = tag; + } + + // ProblemManager's order list doesn't provide every XML tags available in GEOS + // so we put the missing ones below the ones it provides. + // And sort them alphabetically + stdVector< string > missingTags; + + for( xmlWrapper::xmlNode const & tag : rootNode.children() ) + { + string const & tagName = tag.name(); + + if( std::find( tagOrder.begin(), tagOrder.end(), tag.name() ) == tagOrder.end() ) + { + missingTags.push_back( tagName ); + } + } + + std::sort( missingTags.begin(), missingTags.end() ); + + for( string const & tagName : missingTags ) + { + xmlWrapper::xmlNode tag = rootNode.child( tagName.c_str() ); + + if( tag ) + { + rootNode.append_move( tag ); + } + } +} + +void sortAttributes( xmlWrapper::xmlNode node ) +{ + stdVector< std::pair< string, string > > attributes; + for( xmlWrapper::xmlAttribute attr = node.first_attribute(); + attr; + attr = attr.next_attribute() ) + { + attributes.emplace_back( attr.name(), attr.value() ); + } + + std::sort( attributes.begin(), + attributes.end(), + []( std::pair< string, string > const & a, + std::pair< string, string > const & b ) + { + // name attribute should be the first attribute, and not sorted alphabetically + bool const aIsName = ( a.first == "name" ); + bool const bIsName = ( b.first == "name" ); + if( aIsName != bIsName ) + { + return aIsName; + } + + // other attributes are sorted alphabetically + return a.first < b.first; + } ); + + // pugi doesn't have any move_attribute method yet, so we have to + // copy and remove attributes + while( node.remove_attribute( node.first_attribute() ) ) + {} + for( auto const & attr : attributes ) + { + node.append_attribute( attr.first.c_str() ).set_value( attr.second.c_str() ); + } + + for( xmlWrapper::xmlNode child : node.children() ) + { + sortAttributes( child ); + } +} + +xmlWrapper::xmlDocument flattenXMLs( string_array const & fileNames ) +{ + xmlWrapper::xmlDocument flatDoc; + xmlWrapper::xmlNode root = flatDoc.appendChild( "Problem" ); + + for( string const & fileName : fileNames ) + { + xmlWrapper::xmlDocument doc; + xmlWrapper::xmlResult const result = doc.loadFile( fileName, true ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", fileName, result.description() ), + InputError ); + xmlWrapper::xmlNode docRoot = doc.getFirstChild(); + + doc.addIncludedXML( docRoot ); + + for( xmlWrapper::xmlNode & node : docRoot.children() ) + { + root.append_copy( node ); + } + } + + return flatDoc; +} + + +} + + +void archiveInputDeck( string_array const & inputFileNames, + string const & outputDirectory, + string_array const & xmlTagOrder ) +{ + if( inputFileNames.empty() || outputDirectory.empty() ) + { + return; + } + + string const timestamp = makeTimestamp(); + string const archiveDir = joinPath( outputDirectory, "archive_inputFiles", timestamp ); + makeDirsForPath( archiveDir + "/" ); + + xmlWrapper::xmlDocument flatDoc = flattenXMLs( inputFileNames ); + xmlWrapper::xmlNode root = flatDoc.getFirstChild(); + + stripMetadataAttributes( root ); + reorderTags( root, xmlTagOrder ); + sortAttributes( root ); + + string const inputArchiveFile = joinPath( archiveDir, "input.xml" ); + flatDoc.saveFile( inputArchiveFile ); + + GEOS_LOG_RANK_0( GEOS_FMT( "Archived XML inputs: {}", + getAbsolutePath( inputArchiveFile ) ) ); +} + + +} /* namespace archiveInputDeck */ + +} /* namespace geos */ diff --git a/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp new file mode 100644 index 00000000000..17ac785ab6f --- /dev/null +++ b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp @@ -0,0 +1,49 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (c) 2016-2024 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2024 TotalEnergies + * Copyright (c) 2018-2024 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2023-2024 Chevron + * Copyright (c) 2019- GEOS/GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +/** + * @file ArchiveInputDeck.hpp + */ + +#ifndef GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ +#define GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ + +#include "common/DataTypes.hpp" + +namespace geos +{ + +namespace archiveInputDeck +{ + +/** + * @brief Copy XML input files as a flat XML file into the output directory + * @param inputFileNames Container of XML file names to start the copy from + * @param outputDirectory The output directory to copy files into + * @param xmlTagOrder The order of the XML tags in the XML archive file + * + * Copy XML input files and every included files they contain (specified in + * the Included tag) into a single flat file. + */ +void archiveInputDeck( string_array const & inputFileNames, + string const & outputDirectory, + string_array const & xmlTagOrder ); + +} /* namespace archiveInputDeck */ + +} /* namespace geos */ + + +#endif // GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ diff --git a/src/coreComponents/mainInterface/ProblemManager.cpp b/src/coreComponents/mainInterface/ProblemManager.cpp index 8f824fa7b04..1e52308a8df 100644 --- a/src/coreComponents/mainInterface/ProblemManager.cpp +++ b/src/coreComponents/mainInterface/ProblemManager.cpp @@ -37,6 +37,7 @@ #include "finiteVolume/FluxApproximationBase.hpp" #include "finiteVolume/HybridMimeticDiscretization.hpp" #include "fieldSpecification/FieldSpecificationManager.hpp" +#include "fileIO/Outputs/ArchiveInputDeck.hpp" #include "fileIO/Outputs/OutputBase.hpp" #include "fileIO/Outputs/OutputManager.hpp" #include "functions/FunctionManager.hpp" @@ -323,6 +324,13 @@ void ProblemManager::parseCommandLineInput() GEOS_LOG_RANK_0( "Opened XML file: " << absPath ); } + if( opts.archiveInputDeck && MpiWrapper::commRank() == 0 ) + { + string_array xmlTagOrder; + initializationOrder( xmlTagOrder ); + archiveInputDeck::archiveInputDeck( opts.inputFileNames, outputDirectory, xmlTagOrder ); + } + inputFileName = xmlWrapper::buildMultipleInputXML( opts.inputFileNames, outputDirectory ); string & schemaName = commandLine.getReference< string >( viewKeys.schemaFileName ); diff --git a/src/coreComponents/mainInterface/initialization.cpp b/src/coreComponents/mainInterface/initialization.cpp index d371259f990..0a27527e220 100644 --- a/src/coreComponents/mainInterface/initialization.cpp +++ b/src/coreComponents/mainInterface/initialization.cpp @@ -106,6 +106,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * MEMORY_USAGE, PAUSE_FOR, ERRORSOUTPUT, + ARCHIVE, }; const option::Descriptor usage[] = @@ -130,6 +131,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * { MEMORY_USAGE, 0, "m", "memory-usage", Arg::nonEmpty, "\t-m, --memory-usage, \t Minimum threshold for printing out memory allocations in a member of the data repository." }, { PAUSE_FOR, 0, "", "pause-for", Arg::numeric, "\t--pause-for, \t Pause geosx for a given number of seconds before starting execution" }, { ERRORSOUTPUT, 0, "e", "errorsOutput", Arg::nonEmpty, "\t-e, --errors-output, \t Output path for the errors file (\".yaml\" supported)" }, + { ARCHIVE, 0, "a", "archive", Arg::numeric, "\t-a, --archive, \t Set the archiving strategy level (0 = no archiving, 1 = only XML inputs (default))" }, { 0, 0, nullptr, nullptr, nullptr, nullptr } }; @@ -266,6 +268,12 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * ErrorLogger::global().createFile(); } break; + case ARCHIVE: + { + integer const level = std::stoi( opt.arg ); + commandLineOptions->archiveInputDeck = level; + } + break; } }