diff --git a/src/coreComponents/common/initializeEnvironment.hpp b/src/coreComponents/common/initializeEnvironment.hpp index 1072e8147a0..93261ac5694 100644 --- a/src/coreComponents/common/initializeEnvironment.hpp +++ b/src/coreComponents/common/initializeEnvironment.hpp @@ -93,6 +93,9 @@ struct CommandLineOptions /// Print memory usage in data repository real64 printMemoryUsage = -1.0; + + /// Set the archiving level + integer archiveInputDeck = 0; }; /** diff --git a/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp b/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp index 21c80c890a8..e0e6aef1b63 100644 --- a/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp +++ b/src/coreComponents/dataRepository/unitTests/testXmlWrapper.cpp @@ -18,6 +18,9 @@ #include "dataRepository/xmlWrapper.hpp" #include "common/format/EnumStrings.hpp" +#include +#include + using namespace geos; TEST( testXmlWrapper, array3d_errors ) @@ -501,6 +504,214 @@ TEST( testXmlWrapper, testGroupNamesArrayFormats ) } } +class CollectIncludedTest : public ::testing::Test +{ +protected: + std::filesystem::path m_tempDir; + + void SetUp() override + { + m_tempDir = std::filesystem::temp_directory_path() / "geos_collectIncluded_test"; + std::filesystem::create_directories( m_tempDir ); + } + + void TearDown() override + { + std::filesystem::remove_all( m_tempDir ); + } + + string filePath( string const & filename ) + { + return ( m_tempDir / filename ).string(); + } + + void writeXML( string const & filename, string const & content ) + { + std::ofstream f( filePath( filename ) ); + f << content; + } +}; + + +TEST_F( CollectIncludedTest, collectIncluded_noIncludes ) +{ + writeXML( "base.xml", "" + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_TRUE( result.empty() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_singleInclude ) +{ + writeXML( "child.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_multipleIncludes ) +{ + writeXML( "child1.xml", "" + "" ); + writeXML( "child2.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncluded( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child1.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child2.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_emptyNameAttribute ) +{ + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + std::set< string > result; + + EXPECT_ANY_THROW( xmlWrapper::collectIncluded( filePath( "base.xml" ), result ) ); + + EXPECT_TRUE( result.empty() ); +} + +TEST_F( CollectIncludedTest, collectIncluded_existingEntriesKept ) +{ + std::set< string > existingCollection; + existingCollection.insert( "/somewhere/thereisanalreadyexistingxmlfile.xml" ); + writeXML( "base.xml", "" + "" ); + + xmlWrapper::collectIncluded( filePath( "base.xml" ), existingCollection ); + + EXPECT_NE( existingCollection.find( "/somewhere/thereisanalreadyexistingxmlfile.xml" ), + existingCollection.end() ); +} + + +TEST_F( CollectIncludedTest, collectIncludedRecursive_noIncludes ) +{ + writeXML( "base.xml", "" + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_EQ( result.size(), 1 ); // size 1 because collectIncludedRecursive collects the base file +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_singleInclude ) +{ + writeXML( "child.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_multipleIncludes ) +{ + writeXML( "child1.xml", "" + "" ); + writeXML( "child2.xml", "" + "" ); + writeXML( "base.xml", "" + " " + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "child1.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child2.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_simpleRecursive ) +{ + writeXML( "child.xml", "" + "" ); + + writeXML( "middle.xml", "" + " " + " " + " " + "" ); + + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_NE( result.find( filePath( "middle.xml" ) ), result.end() ); + EXPECT_NE( result.find( filePath( "child.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_cyclePrevention ) +{ + writeXML( "cycle.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "cycle.xml" ) ); + + EXPECT_NE( result.find( filePath( "cycle.xml" ) ), result.end() ); +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_noDuplicates ) +{ + writeXML( "base.xml", "" + " " + " " + " " + "" ); + + auto result = xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ) ); + + EXPECT_EQ( result.size(), 1 ); // collectIncludedRecursive collects the base file +} + +TEST_F( CollectIncludedTest, collectIncludedRecursive_existingEntriesKept ) +{ + std::set< string > existingCollection; + existingCollection.insert( "/somewhere/thereisanalreadyexistingxmlfile.xml" ); + writeXML( "base.xml", "" + "" ); + + xmlWrapper::collectIncludedRecursive( filePath( "base.xml" ), existingCollection ); + + EXPECT_NE( existingCollection.find( "/somewhere/thereisanalreadyexistingxmlfile.xml" ), + existingCollection.end() ); +} + int main( int argc, char * argv[] ) { diff --git a/src/coreComponents/dataRepository/xmlWrapper.cpp b/src/coreComponents/dataRepository/xmlWrapper.cpp index 0c5a469a570..46e502a90cf 100644 --- a/src/coreComponents/dataRepository/xmlWrapper.cpp +++ b/src/coreComponents/dataRepository/xmlWrapper.cpp @@ -24,6 +24,7 @@ #include "common/format/StringUtilities.hpp" #include "common/MpiWrapper.hpp" #include "dataRepository/KeyNames.hpp" +#include "common/Path.hpp" namespace geos { @@ -268,6 +269,91 @@ string buildMultipleInputXML( string_array const & inputFileList, return inputFileName; } +void collectIncluded( string const & filePath, + std::set< string > & collection ) +{ + xmlDocument doc; + xmlResult result = doc.loadFile( filePath ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", filePath, result.description() ), + InputError ); + xmlNode rootNode = doc.getFirstChild(); + + string const currentDir = splitPath( filePath ).first; + + for( auto & includedNode : rootNode.children( includedListTag ) ) + { + for( auto & fileNode : includedNode.children( includedFileTag ) ) + { + string const fileName = fileNode.attribute( "name" ).value(); + + GEOS_THROW_IF( fileName.empty(), + GEOS_FMT( "An included file entry in '{}' has an empty or missing 'name' attribute.", filePath ), + InputError ); + + string absolutePath = isAbsolutePath( fileName ) + ? getAbsolutePath( fileName ) + : getAbsolutePath( joinPath( currentDir, fileName ) ); + collection.insert( absolutePath ); + } + } +} + +std::set< string > collectIncluded( string const & filePath ) +{ + std::set< string > collection; + collectIncluded( filePath, collection ); + return collection; +} + +void collectIncludedRecursive( string const & filePath, + std::set< string > & collection ) +{ + // We want absolute paths + string const absFilePath = getAbsolutePath( filePath ); + + if( collection.count( absFilePath ) > 0 ) + { + return; + } + collection.insert( absFilePath ); + + xmlDocument doc; + xmlResult result = doc.loadFile( absFilePath ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", filePath, result.description() ), + InputError ); + xmlNode rootNode = doc.getFirstChild(); + + string const currentDir = splitPath( filePath ).first; + + for( auto & includedNode : rootNode.children( includedListTag ) ) + { + for( auto & fileNode : includedNode.children( includedFileTag ) ) + { + string const includedFilePath = fileNode.attribute( "name" ).value(); + + if( includedFilePath.empty() ) + { + continue; + } + + string includedAbsPath = isAbsolutePath( includedFilePath ) + ? getAbsolutePath( includedFilePath ) + : getAbsolutePath( joinPath( currentDir, includedFilePath ) ); + collectIncludedRecursive( includedAbsPath, + collection ); + } + } +} + +std::set< string > collectIncludedRecursive( string const & filePath ) +{ + std::set< string > collection; + collectIncludedRecursive( filePath, collection ); + return collection; +} + bool isFileMetadataAttribute( string const & name ) { static const std::set< string > fileMetadataAttributes { diff --git a/src/coreComponents/dataRepository/xmlWrapper.hpp b/src/coreComponents/dataRepository/xmlWrapper.hpp index 494eb2db119..cbd04536b5d 100644 --- a/src/coreComponents/dataRepository/xmlWrapper.hpp +++ b/src/coreComponents/dataRepository/xmlWrapper.hpp @@ -301,6 +301,50 @@ constexpr char const includedFileTag[] = "File"; string buildMultipleInputXML( string_array const & inputFileList, string const & outputDir = {} ); +/** + * @brief Collect the absolute paths of XML files directly included + * by a given xml file + * @param[in] filePath absolute path of the xml file to inspect + * @param[inout] collection collection to append with absolute file paths + * + * Only one level of inclusion is collected (files included by the included + * files are not added). See collectIncludedRecursive if you want this behavior. + * Duplicate entries are not inserted in @p collection + */ +void collectIncluded( string const & filePath, + std::set< string > & collection ); + +/** + * @brief Collect the absolute paths of XML files directly included + * by a given xml file + * @param[in] filePath absolute path of the xml file to inspect + * @return a collection of absolute paths + * + * Only one level of inclusion is collected (files included by the included + * files are not added). See collectIncludedRecursive if you want this behavior. + * Duplicate entries are not inserted in @p collection + */ +std::set< string > collectIncluded( string const & filePath ); + +/** + * @brief Recursively collect the absolute paths of an XML file and all XML + * files it includes + * @param[in] filePath absolute path of the root XML file + * @param[inout] collection collection to append with absolute file paths + * of every visited file (including @p filePath itself) + */ +void collectIncludedRecursive( string const & filePath, + std::set< string > & collection ); + +/** + * @brief Recursively collect the absolute paths of an XML file and all XML + * files it includes + * @param[in] filePath absolute path of the root XML file + * @return a collection of absolute paths of every visited file (including + * @p filePath itself) + */ +std::set< string > collectIncludedRecursive( string const & filePath ); + /** * @return true if the attribute with the specified name declares metadata relative to the xml * @param name the name of an attribute diff --git a/src/coreComponents/fileIO/CMakeLists.txt b/src/coreComponents/fileIO/CMakeLists.txt index eedca99b65c..88177d8a2fe 100644 --- a/src/coreComponents/fileIO/CMakeLists.txt +++ b/src/coreComponents/fileIO/CMakeLists.txt @@ -24,6 +24,7 @@ Contains: # set( fileIO_headers LogLevelsInfo.hpp + Outputs/ArchiveInputDeck.hpp Outputs/BlueprintOutput.hpp Outputs/MemoryStatsOutput.hpp Outputs/OutputBase.hpp @@ -43,6 +44,7 @@ set( fileIO_headers # Specify all sources # set( fileIO_sources + Outputs/ArchiveInputDeck.cpp Outputs/BlueprintOutput.cpp Outputs/MemoryStatsOutput.cpp Outputs/OutputBase.cpp diff --git a/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp new file mode 100644 index 00000000000..2fd952aee49 --- /dev/null +++ b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.cpp @@ -0,0 +1,235 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (c) 2016-2024 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2024 TotalEnergies + * Copyright (c) 2018-2024 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2023-2024 Chevron + * Copyright (c) 2019- GEOS/GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +/** + * @file ArchiveInputDeck.cpp + */ + +#include "ArchiveInputDeck.hpp" + +#include "common/MpiWrapper.hpp" +#include "common/Path.hpp" +#include "common/format/Format.hpp" +#include "common/initializeEnvironment.hpp" +#include "common/logger/Logger.hpp" +#include "dataRepository/Group.hpp" +#include "dataRepository/xmlWrapper.hpp" +#include "mainInterface/ProblemManager.hpp" + +#include + +#include +#include +#include +#include + +namespace geos +{ + +using namespace dataRepository; + +namespace archiveInputDeck +{ + +namespace +{ + +string makeTimestamp() +{ + auto const now = std::chrono::system_clock::now(); + auto const time_t_now = std::chrono::system_clock::to_time_t( now ); + std::ostringstream timestampStream; + timestampStream << std::put_time( std::localtime( &time_t_now ), "%Y%m%d_%H%M%S" ); + return timestampStream.str(); +} + +void stripMetadataAttributes( xmlWrapper::xmlNode node ) +{ + node.remove_attribute( xmlWrapper::filePathString ); + node.remove_attribute( xmlWrapper::charOffsetString ); + + for( xmlWrapper::xmlNode child : node.children() ) + { + stripMetadataAttributes( child ); + } +} + +void reorderTags( xmlWrapper::xmlNode rootNode, string_array const & tagOrder ) +{ + xmlWrapper::xmlNode lastInserted; + for( string const & tagName : tagOrder ) + { + xmlWrapper::xmlNode tag = rootNode.child( tagName.c_str() ); + if( !tag ) + { + continue; + } + + lastInserted ? rootNode.insert_move_after( tag, lastInserted ) + : rootNode.append_move( tag ); + + lastInserted = tag; + } + + // ProblemManager's order list doesn't provide every XML tags available in GEOS + // so we put the missing ones below the ones it provides. + // And sort them alphabetically + stdVector< string > missingTags; + + for( xmlWrapper::xmlNode const & tag : rootNode.children() ) + { + string const & tagName = tag.name(); + + if( std::find( tagOrder.begin(), tagOrder.end(), tag.name() ) == tagOrder.end() ) + { + missingTags.push_back( tagName ); + } + } + + std::sort( missingTags.begin(), missingTags.end() ); + + for( string const & tagName : missingTags ) + { + xmlWrapper::xmlNode tag = rootNode.child( tagName.c_str() ); + + if( tag ) + { + rootNode.append_move( tag ); + } + } +} + +void sortAttributes( xmlWrapper::xmlNode node ) +{ + stdVector< std::pair< string, string > > attributes; + for( xmlWrapper::xmlAttribute attr = node.first_attribute(); + attr; + attr = attr.next_attribute() ) + { + attributes.emplace_back( attr.name(), attr.value() ); + } + + std::sort( attributes.begin(), + attributes.end(), + []( std::pair< string, string > const & a, + std::pair< string, string > const & b ) + { + // name attribute should be the first attribute, and not sorted alphabetically + bool const aIsName = ( a.first == "name" ); + bool const bIsName = ( b.first == "name" ); + if( aIsName != bIsName ) + { + return aIsName; + } + + // other attributes are sorted alphabetically + return a.first < b.first; + } ); + + // pugi doesn't have any move_attribute method yet, so we have to + // copy and remove attributes + while( node.remove_attribute( node.first_attribute() ) ) + {} + for( auto const & attr : attributes ) + { + node.append_attribute( attr.first.c_str() ).set_value( attr.second.c_str() ); + } + + for( xmlWrapper::xmlNode child : node.children() ) + { + sortAttributes( child ); + } +} + +xmlWrapper::xmlDocument flattenXMLs( string_array const & fileNames ) +{ + xmlWrapper::xmlDocument flatDoc; + xmlWrapper::xmlNode root = flatDoc.appendChild( "Problem" ); + + for( string const & fileName : fileNames ) + { + xmlWrapper::xmlDocument doc; + xmlWrapper::xmlResult const result = doc.loadFile( fileName, true ); + GEOS_THROW_IF( !result, + GEOS_FMT( "Could not load XML file '{}': {}", fileName, result.description() ), + InputError ); + xmlWrapper::xmlNode docRoot = doc.getFirstChild(); + + doc.addIncludedXML( docRoot ); + + for( xmlWrapper::xmlNode & node : docRoot.children() ) + { + root.append_copy( node ); + } + } + + return flatDoc; +} + + +} + + +void archiveInputDeck( CommandLineOptions const & opts ) +{ + if( opts.inputFileNames.empty() || opts.outputDirectory.empty() ) + { + return; + } + + if( MpiWrapper::commRank() != 0 ) + { + return; + } + + // Creates a temporary and isolated ProblemManager to generate the schema.xsd + // because ProblemManager::generateDocumentation() has unwanted side effects + conduit::Node tempRoot; + ProblemManager tempPM( tempRoot ); + + string_array xmlTagOrder; + tempPM.initializationOrder( xmlTagOrder ); + + string const timestamp = makeTimestamp(); + string const archiveDir = joinPath( opts.outputDirectory, "archive_inputFiles", timestamp ); + makeDirsForPath( archiveDir + "/" ); + + xmlWrapper::xmlDocument flatDoc = flattenXMLs( opts.inputFileNames ); + xmlWrapper::xmlNode root = flatDoc.getFirstChild(); + + stripMetadataAttributes( root ); + reorderTags( root, xmlTagOrder ); + sortAttributes( root ); + + flatDoc.saveFile( joinPath( archiveDir, "input.xml" ) ); + + string const schemaPath = joinPath( archiveDir, "schema.xsd" ); + + dataRepository::Group & commandLine = tempPM.getGroup< dataRepository::Group >( tempPM.groupKeys.commandLine ); + commandLine.getReference< string >( tempPM.viewKeys.schemaFileName ) = schemaPath; + + if( opts.archiveInputDeck >= 2 ) + { + tempPM.generateDocumentation(); + } + + std::error_code ec; + std::filesystem::remove( std::filesystem::path( schemaPath + ".other" ), ec ); +} + + +} /* namespace archiveInputDeck */ + +} /* namespace geos */ diff --git a/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp new file mode 100644 index 00000000000..a106e66558f --- /dev/null +++ b/src/coreComponents/fileIO/Outputs/ArchiveInputDeck.hpp @@ -0,0 +1,48 @@ +/* + * ------------------------------------------------------------------------------------------------------------ + * SPDX-License-Identifier: LGPL-2.1-only + * + * Copyright (c) 2016-2024 Lawrence Livermore National Security LLC + * Copyright (c) 2018-2024 TotalEnergies + * Copyright (c) 2018-2024 The Board of Trustees of the Leland Stanford Junior University + * Copyright (c) 2023-2024 Chevron + * Copyright (c) 2019- GEOS/GEOSX Contributors + * All rights reserved + * + * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details. + * ------------------------------------------------------------------------------------------------------------ + */ + +/** + * @file ArchiveInputDeck.hpp + */ + +#ifndef GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ +#define GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ + +#include "common/DataTypes.hpp" + +namespace geos +{ + +struct CommandLineOptions; + +namespace archiveInputDeck +{ + +/** + * @brief Copy the XML input files as a flat XML file into the output directory + * @param opts A reference to the command line options, used to retrieve the input + * file names and the output directory to store the archive + * + * Copy XML input files and every included files they contain (specified in + * the tag) into a single flat file. + */ +void archiveInputDeck( CommandLineOptions const & opts ); + +} /* namespace archiveInputDeck */ + +} /* namespace geos */ + + +#endif // GEOS_FILEIO_OUTPUTS_ARCHIVEINPUTDECK_HPP_ diff --git a/src/coreComponents/mainInterface/initialization.cpp b/src/coreComponents/mainInterface/initialization.cpp index 39367cbb263..e5ea6cf519f 100644 --- a/src/coreComponents/mainInterface/initialization.cpp +++ b/src/coreComponents/mainInterface/initialization.cpp @@ -106,6 +106,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * MEMORY_USAGE, PAUSE_FOR, ERRORSOUTPUT, + ARCHIVE, }; const option::Descriptor usage[] = @@ -130,6 +131,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * { MEMORY_USAGE, 0, "m", "memory-usage", Arg::nonEmpty, "\t-m, --memory-usage, \t Minimum threshold for printing out memory allocations in a member of the data repository." }, { PAUSE_FOR, 0, "", "pause-for", Arg::numeric, "\t--pause-for, \t Pause geosx for a given number of seconds before starting execution" }, { ERRORSOUTPUT, 0, "e", "errorsOutput", Arg::nonEmpty, "\t-e, --errors-output, \t Output path for the errors file (\".yaml\" supported)" }, + { ARCHIVE, 0, "a", "archive", Arg::numeric, "\t-a, --archive, \t Set the archiving strategy level (0 = no archiving, 1 = only XML inputs, 2 = XML inputs and the XSD schema)" }, { 0, 0, nullptr, nullptr, nullptr, nullptr } }; @@ -266,6 +268,12 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char * ErrorLogger::global().createFile(); } break; + case ARCHIVE: + { + integer const level = std::stoi( opt.arg ); + commandLineOptions->archiveInputDeck = level; + } + break; } } diff --git a/src/main/main.cpp b/src/main/main.cpp index 7bfa23a2b02..081ea6203fe 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -18,6 +18,7 @@ #include "common/logger/Logger.hpp" #include "common/TimingMacros.hpp" #include "common/Units.hpp" +#include "fileIO/Outputs/ArchiveInputDeck.hpp" #include "mainInterface/initialization.hpp" #include "mainInterface/ProblemManager.hpp" #include "mainInterface/GeosxState.hpp" @@ -37,6 +38,11 @@ int main( int argc, char *argv[] ) outputVersionInfo(); + if( commandLineOptions->archiveInputDeck ) + { + archiveInputDeck::archiveInputDeck( *commandLineOptions ); + } + GEOS_LOG_RANK_0( GEOS_FMT( "Started at {:%Y-%m-%d %H:%M:%S}", startTime ) ); std::chrono::system_clock::duration initTime;