diff --git a/bindings/python_bindings.cpp b/bindings/python_bindings.cpp index fe3dd8f..e10999d 100644 --- a/bindings/python_bindings.cpp +++ b/bindings/python_bindings.cpp @@ -99,5 +99,6 @@ PYBIND11_MODULE(_py_capio_cl, m) { }); m.def("serialize", &capiocl::serializer::Serializer::dump, py::arg("engine"), - py::arg("filename"), py::arg("version") = capiocl::CAPIO_CL_VERSION::V1); + py::arg("filename"), py::arg("compress") = false, + py::arg("version") = capiocl::CAPIO_CL_VERSION::V1); } \ No newline at end of file diff --git a/capiocl/serializer.h b/capiocl/serializer.h index 1c20ef7..63f3a18 100644 --- a/capiocl/serializer.h +++ b/capiocl/serializer.h @@ -28,6 +28,22 @@ class SerializerException final : public std::exception { /// @brief Dump the current loaded CAPIO-CL configuration from class Engine to a CAPIO-CL /// configuration file. class Serializer final { + /** + * Check whether a CAPIO-CL entry has a parent entry for which the same rules applies, and tell + * whether this entry can be omitted by using rule inheritance. + * @param compress + * @param path + * @param engine + * @return + */ + static bool entryCanBeCompressed(bool compress, const std::filesystem::path &path, + const engine::Engine &engine); + + /** + * Sort path entries from longest to shortest + * @param paths + */ + static void sortPathsByDecreasingLength(std::vector &paths); /// @brief Available serializers for CAPIO-CL struct available_serializers { @@ -37,10 +53,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress the serialized output * @throws SerializerException */ static void serialize_v1(const engine::Engine &engine, - const std::filesystem::path &filename); + const std::filesystem::path &filename, bool compress = false); /** * @brief Dump the current configuration loaded into an instance of Engine to a CAPIO-CL @@ -48,10 +65,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress the serialized output * @throws SerializerException */ static void serialize_v1_1(const engine::Engine &engine, - const std::filesystem::path &filename); + const std::filesystem::path &filename, bool compress = false); }; public: @@ -61,10 +79,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress directories entries when possible * @param version Version of CAPIO-CL used to generate configuration files. */ static void dump(const engine::Engine &engine, const std::filesystem::path &filename, - const std::string &version = CAPIO_CL_VERSION::V1); + bool compress = false, const std::string &version = CAPIO_CL_VERSION::V1); }; } // namespace capiocl::serializer #endif // CAPIO_CL_SERIALIZER_H \ No newline at end of file diff --git a/src/Serializer.cpp b/src/Serializer.cpp index 3af869e..c211e95 100644 --- a/src/Serializer.cpp +++ b/src/Serializer.cpp @@ -9,13 +9,13 @@ void capiocl::serializer::Serializer::dump(const engine::Engine &engine, const std::filesystem::path &filename, - const std::string &version) { + const bool compress, const std::string &version) { if (version == CAPIO_CL_VERSION::V1) { printer::print(printer::CLI_LEVEL_INFO, "Serializing engine with V1 specification"); - available_serializers::serialize_v1(engine, filename); + available_serializers::serialize_v1(engine, filename, compress); } else if (version == CAPIO_CL_VERSION::V1_1) { printer::print(printer::CLI_LEVEL_INFO, "Serializing engine with V1.1 specification"); - available_serializers::serialize_v1_1(engine, filename); + available_serializers::serialize_v1_1(engine, filename, compress); } else { const auto message = "No serializer available for CAPIO-CL version: " + version; throw SerializerException(message); @@ -25,4 +25,36 @@ void capiocl::serializer::Serializer::dump(const engine::Engine &engine, capiocl::serializer::SerializerException::SerializerException(const std::string &msg) : message(msg) { printer::print(printer::CLI_LEVEL_ERROR, msg); +} + +bool capiocl::serializer::Serializer::entryCanBeCompressed(const bool compress, + const std::filesystem::path &path, + const engine::Engine &engine) { + + if (!compress) { + return false; + } + + if (engine.isDirectory(path)) { + return false; + } + + const auto parent_path = path.parent_path(); + + if (!engine.contains(parent_path)) { + return false; + } + + return engine.getCommitRule(path) == engine.getCommitRule(parent_path) && + engine.getFireRule(path) == engine.getFireRule(parent_path); +} + + +void capiocl::serializer::Serializer::sortPathsByDecreasingLength(std::vector &paths) { + std::sort(paths.begin(), paths.end(), [](const std::string &a, const std::string &b) { + if (a.length() != b.length()) { + return a.length() > b.length(); + } + return a > b; + }); } \ No newline at end of file diff --git a/src/serializers/v1.1.cpp b/src/serializers/v1.1.cpp index b862711..0767050 100644 --- a/src/serializers/v1.1.cpp +++ b/src/serializers/v1.1.cpp @@ -6,7 +6,13 @@ #include "capiocl/serializer.h" void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( - const engine::Engine &engine, const std::filesystem::path &filename) { + const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { + + if (compress) { + printer::print(printer::CLI_LEVEL_WARNING, + "Using configuration compression to directories!"); + } + jsoncons::json doc; doc["version"] = 1.1; doc["name"] = engine.getWorkflowName(); @@ -24,7 +30,22 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); - for (const auto &[path, entry] : files) { + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + + sortPathsByDecreasingLength(keys); + + for (const auto &path : keys) { + const auto entry = files.at(path); + + if (entryCanBeCompressed(compress, path, engine)) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; + } + if (entry.permanent) { permanent.push_back(path); } @@ -41,13 +62,22 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( } } - for (const auto &[app_name, outputs] : app_outputs) { + for (auto &[app_name, outputs] : app_outputs) { jsoncons::json app = jsoncons::json::object(); jsoncons::json streaming = jsoncons::json::array(); + std::vector filtered_outputs; + + sortPathsByDecreasingLength(outputs); for (const auto &path : outputs) { const auto &entry = files.at(path); + if (entryCanBeCompressed(compress, path, engine)) { + continue; + } + + filtered_outputs.push_back(path); + jsoncons::json streaming_item = jsoncons::json::object(); std::string committed = entry.commit_rule; const char *name_kind = entry.is_file ? "name" : "dirname"; @@ -87,7 +117,7 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( app["name"] = app_name; app["input_stream"] = app_inputs[app_name]; - app["output_stream"] = outputs; + app["output_stream"] = filtered_outputs; app["streaming"] = streaming; io_graph.push_back(app); diff --git a/src/serializers/v1.cpp b/src/serializers/v1.cpp index f43a09d..d911d58 100644 --- a/src/serializers/v1.cpp +++ b/src/serializers/v1.cpp @@ -6,7 +6,13 @@ #include "capiocl/serializer.h" void capiocl::serializer::Serializer::available_serializers::serialize_v1( - const engine::Engine &engine, const std::filesystem::path &filename) { + const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { + + if (compress) { + printer::print(printer::CLI_LEVEL_WARNING, + "Using configuration compression to directories!"); + } + jsoncons::json doc; doc["name"] = engine.getWorkflowName(); @@ -23,7 +29,22 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); - for (const auto &[path, entry] : files) { + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + + sortPathsByDecreasingLength(keys); + + for (const auto &path : keys) { + const auto entry = files.at(path); + + if (entryCanBeCompressed(compress, path, engine)) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; + } + if (entry.permanent) { permanent.push_back(path); } @@ -40,13 +61,22 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( } } - for (const auto &[app_name, outputs] : app_outputs) { + for (auto &[app_name, outputs] : app_outputs) { jsoncons::json app = jsoncons::json::object(); jsoncons::json streaming = jsoncons::json::array(); + std::vector filtered_outputs; + + sortPathsByDecreasingLength(outputs); for (const auto &path : outputs) { const auto &entry = files.at(path); + if (entryCanBeCompressed(compress, path, engine)) { + continue; + } + + filtered_outputs.push_back(path); + jsoncons::json streaming_item = jsoncons::json::object(); std::string committed = entry.commit_rule; const char *name_kind = entry.is_file ? "name" : "dirname"; @@ -86,7 +116,7 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( app["name"] = app_name; app["input_stream"] = app_inputs[app_name]; - app["output_stream"] = outputs; + app["output_stream"] = filtered_outputs; app["streaming"] = streaming; io_graph.push_back(app); diff --git a/tests/cpp/test_exceptions.hpp b/tests/cpp/test_exceptions.hpp index 9356939..60b3a1a 100644 --- a/tests/cpp/test_exceptions.hpp +++ b/tests/cpp/test_exceptions.hpp @@ -36,8 +36,9 @@ TEST(EXCEPTION_SUITE_NAME, testFailedserializeVersion) { const std::filesystem::path source = "/tmp/capio_cl_jsons/V" + version + "/test24.json"; auto engine = capiocl::parser::Parser::parse(source, "/tmp"); - EXPECT_THROW(capiocl::serializer::Serializer::dump(*engine, "test.json", "1234.5678"), - capiocl::serializer::SerializerException); + EXPECT_THROW( + capiocl::serializer::Serializer::dump(*engine, "test.json", false, "1234.5678"), + capiocl::serializer::SerializerException); } } diff --git a/tests/cpp/test_serialize_deserialize.hpp b/tests/cpp/test_serialize_deserialize.hpp index 8c0cd1a..c381304 100644 --- a/tests/cpp/test_serialize_deserialize.hpp +++ b/tests/cpp/test_serialize_deserialize.hpp @@ -42,7 +42,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1) { engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -74,7 +74,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1NcloseNfiles) engine.addProducer(file_1_name, producer_name); engine.addConsumer(file_1_name, consumer_name); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -110,7 +110,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1FileDeps) { engine.setFileDeps(file_3_name, {file_1_name, file_2_name}); engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -139,7 +139,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeCommitOnCloseCountNoCommitRu engine.setCommitedCloseNumber(file_1_name, 10); engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve);