From 66842968dea0e7a17057c034fb4901781b9d8fb9 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Thu, 11 Dec 2025 13:29:12 +0100 Subject: [PATCH 1/2] Added serialization compression flag --- bindings/python_bindings.cpp | 3 +- capiocl/serializer.h | 9 ++-- src/Serializer.cpp | 6 +-- src/serializers/v1.1.cpp | 28 ++++++++++- src/serializers/v1.cpp | 64 ++++++++++++++++++++++-- tests/cpp/test_exceptions.hpp | 5 +- tests/cpp/test_serialize_deserialize.hpp | 8 +-- 7 files changed, 104 insertions(+), 19 deletions(-) diff --git a/bindings/python_bindings.cpp b/bindings/python_bindings.cpp index fe3dd8f..e10999d 100644 --- a/bindings/python_bindings.cpp +++ b/bindings/python_bindings.cpp @@ -99,5 +99,6 @@ PYBIND11_MODULE(_py_capio_cl, m) { }); m.def("serialize", &capiocl::serializer::Serializer::dump, py::arg("engine"), - py::arg("filename"), py::arg("version") = capiocl::CAPIO_CL_VERSION::V1); + py::arg("filename"), py::arg("compress") = false, + py::arg("version") = capiocl::CAPIO_CL_VERSION::V1); } \ No newline at end of file diff --git a/capiocl/serializer.h b/capiocl/serializer.h index 1c20ef7..d9b48ec 100644 --- a/capiocl/serializer.h +++ b/capiocl/serializer.h @@ -37,10 +37,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress the serialized output * @throws SerializerException */ static void serialize_v1(const engine::Engine &engine, - const std::filesystem::path &filename); + const std::filesystem::path &filename, bool compress = false); /** * @brief Dump the current configuration loaded into an instance of Engine to a CAPIO-CL @@ -48,10 +49,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress the serialized output * @throws SerializerException */ static void serialize_v1_1(const engine::Engine &engine, - const std::filesystem::path &filename); + const std::filesystem::path &filename, bool compress = false); }; public: @@ -61,10 +63,11 @@ class Serializer final { * * @param engine instance of Engine to dump * @param filename path of output file + * @param compress Compress directories entries when possible * @param version Version of CAPIO-CL used to generate configuration files. */ static void dump(const engine::Engine &engine, const std::filesystem::path &filename, - const std::string &version = CAPIO_CL_VERSION::V1); + bool compress = false, const std::string &version = CAPIO_CL_VERSION::V1); }; } // namespace capiocl::serializer #endif // CAPIO_CL_SERIALIZER_H \ No newline at end of file diff --git a/src/Serializer.cpp b/src/Serializer.cpp index 3af869e..531f873 100644 --- a/src/Serializer.cpp +++ b/src/Serializer.cpp @@ -9,13 +9,13 @@ void capiocl::serializer::Serializer::dump(const engine::Engine &engine, const std::filesystem::path &filename, - const std::string &version) { + const bool compress, const std::string &version) { if (version == CAPIO_CL_VERSION::V1) { printer::print(printer::CLI_LEVEL_INFO, "Serializing engine with V1 specification"); - available_serializers::serialize_v1(engine, filename); + available_serializers::serialize_v1(engine, filename, compress); } else if (version == CAPIO_CL_VERSION::V1_1) { printer::print(printer::CLI_LEVEL_INFO, "Serializing engine with V1.1 specification"); - available_serializers::serialize_v1_1(engine, filename); + available_serializers::serialize_v1_1(engine, filename, compress); } else { const auto message = "No serializer available for CAPIO-CL version: " + version; throw SerializerException(message); diff --git a/src/serializers/v1.1.cpp b/src/serializers/v1.1.cpp index b862711..6bdb943 100644 --- a/src/serializers/v1.1.cpp +++ b/src/serializers/v1.1.cpp @@ -6,13 +6,25 @@ #include "capiocl/serializer.h" void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( - const engine::Engine &engine, const std::filesystem::path &filename) { + const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { jsoncons::json doc; doc["version"] = 1.1; doc["name"] = engine.getWorkflowName(); const auto files = engine._capio_cl_entries; + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + std::sort(keys.begin(), keys.end(), [](const std::string &a, const std::string &b) { + if (a.length() != b.length()) { + return a.length() < b.length(); + } + return a < b; + }); + std::unordered_map> app_inputs; std::unordered_map> app_outputs; @@ -24,7 +36,19 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); - for (const auto &[path, entry] : files) { + for (const auto &path : keys) { + const auto entry = files.at(path); + + if (compress) { + if (const std::filesystem::path p(path); files.find(p.parent_path()) != files.end()) { + if (const auto &parent = files.at(p.parent_path()); + parent.fire_rule == entry.fire_rule && + parent.commit_rule == entry.commit_rule && entry.is_file) { + continue; + } + } + } + if (entry.permanent) { permanent.push_back(path); } diff --git a/src/serializers/v1.cpp b/src/serializers/v1.cpp index f43a09d..70b1042 100644 --- a/src/serializers/v1.cpp +++ b/src/serializers/v1.cpp @@ -5,13 +5,33 @@ #include "capiocl/printer.h" #include "capiocl/serializer.h" +#include + void capiocl::serializer::Serializer::available_serializers::serialize_v1( - const engine::Engine &engine, const std::filesystem::path &filename) { + const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { + + if (compress) { + printer::print(printer::CLI_LEVEL_WARNING, + "Using configuration compression to directories!"); + } + jsoncons::json doc; doc["name"] = engine.getWorkflowName(); const auto files = engine._capio_cl_entries; + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + std::sort(keys.begin(), keys.end(), [](const std::string &a, const std::string &b) { + if (a.length() != b.length()) { + return a.length() > b.length(); + } + return a > b; + }); + std::unordered_map> app_inputs; std::unordered_map> app_outputs; @@ -23,7 +43,20 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); - for (const auto &[path, entry] : files) { + for (const auto &path : keys) { + const auto entry = files.at(path); + + if (compress) { + if (const std::filesystem::path p(path); files.find(p.parent_path()) != files.end()) { + if (const auto &parent = files.at(p.parent_path()); + parent.fire_rule == entry.fire_rule && + parent.commit_rule == entry.commit_rule && entry.is_file) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; + } + } + } + if (entry.permanent) { permanent.push_back(path); } @@ -40,13 +73,36 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( } } - for (const auto &[app_name, outputs] : app_outputs) { + for (auto &[app_name, outputs] : app_outputs) { jsoncons::json app = jsoncons::json::object(); jsoncons::json streaming = jsoncons::json::array(); + std::sort(outputs.begin(), outputs.end(), [](const std::string &a, const std::string &b) { + if (a.length() != b.length()) { + return a.length() > b.length(); + } + return a > b; + }); + + std::vector filtered_outputs; + for (const auto &path : outputs) { const auto &entry = files.at(path); + if (compress) { + if (const std::filesystem::path p(path); + files.find(p.parent_path()) != files.end()) { + if (const auto &parent = files.at(p.parent_path()); + parent.fire_rule == entry.fire_rule && + parent.commit_rule == entry.commit_rule && entry.is_file) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; + } + } + } + + filtered_outputs.push_back(path); + jsoncons::json streaming_item = jsoncons::json::object(); std::string committed = entry.commit_rule; const char *name_kind = entry.is_file ? "name" : "dirname"; @@ -86,7 +142,7 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( app["name"] = app_name; app["input_stream"] = app_inputs[app_name]; - app["output_stream"] = outputs; + app["output_stream"] = filtered_outputs; app["streaming"] = streaming; io_graph.push_back(app); diff --git a/tests/cpp/test_exceptions.hpp b/tests/cpp/test_exceptions.hpp index 9356939..60b3a1a 100644 --- a/tests/cpp/test_exceptions.hpp +++ b/tests/cpp/test_exceptions.hpp @@ -36,8 +36,9 @@ TEST(EXCEPTION_SUITE_NAME, testFailedserializeVersion) { const std::filesystem::path source = "/tmp/capio_cl_jsons/V" + version + "/test24.json"; auto engine = capiocl::parser::Parser::parse(source, "/tmp"); - EXPECT_THROW(capiocl::serializer::Serializer::dump(*engine, "test.json", "1234.5678"), - capiocl::serializer::SerializerException); + EXPECT_THROW( + capiocl::serializer::Serializer::dump(*engine, "test.json", false, "1234.5678"), + capiocl::serializer::SerializerException); } } diff --git a/tests/cpp/test_serialize_deserialize.hpp b/tests/cpp/test_serialize_deserialize.hpp index 8c0cd1a..c381304 100644 --- a/tests/cpp/test_serialize_deserialize.hpp +++ b/tests/cpp/test_serialize_deserialize.hpp @@ -42,7 +42,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1) { engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -74,7 +74,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1NcloseNfiles) engine.addProducer(file_1_name, producer_name); engine.addConsumer(file_1_name, consumer_name); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -110,7 +110,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeParseCAPIOCLV1FileDeps) { engine.setFileDeps(file_3_name, {file_1_name, file_2_name}); engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); @@ -139,7 +139,7 @@ TEST(SERIALIZE_DESERIALIZE_SUITE_NAME, testSerializeCommitOnCloseCountNoCommitRu engine.setCommitedCloseNumber(file_1_name, 10); engine.print(); - capiocl::serializer::Serializer::dump(engine, path, _cl_version); + capiocl::serializer::Serializer::dump(engine, path, false, _cl_version); std::filesystem::path resolve = ""; auto new_engine = capiocl::parser::Parser::parse(path, resolve); From 396ed0fc5d0cb6339048c28c61e331a8258d1422 Mon Sep 17 00:00:00 2001 From: Marco Edoardo Santimaria Date: Thu, 11 Dec 2025 17:21:37 +0100 Subject: [PATCH 2/2] Code refactor --- capiocl/serializer.h | 16 ++++++++++++ src/Serializer.cpp | 32 +++++++++++++++++++++++ src/serializers/v1.1.cpp | 50 +++++++++++++++++++---------------- src/serializers/v1.cpp | 56 +++++++++++----------------------------- 4 files changed, 91 insertions(+), 63 deletions(-) diff --git a/capiocl/serializer.h b/capiocl/serializer.h index d9b48ec..63f3a18 100644 --- a/capiocl/serializer.h +++ b/capiocl/serializer.h @@ -28,6 +28,22 @@ class SerializerException final : public std::exception { /// @brief Dump the current loaded CAPIO-CL configuration from class Engine to a CAPIO-CL /// configuration file. class Serializer final { + /** + * Check whether a CAPIO-CL entry has a parent entry for which the same rules applies, and tell + * whether this entry can be omitted by using rule inheritance. + * @param compress + * @param path + * @param engine + * @return + */ + static bool entryCanBeCompressed(bool compress, const std::filesystem::path &path, + const engine::Engine &engine); + + /** + * Sort path entries from longest to shortest + * @param paths + */ + static void sortPathsByDecreasingLength(std::vector &paths); /// @brief Available serializers for CAPIO-CL struct available_serializers { diff --git a/src/Serializer.cpp b/src/Serializer.cpp index 531f873..c211e95 100644 --- a/src/Serializer.cpp +++ b/src/Serializer.cpp @@ -25,4 +25,36 @@ void capiocl::serializer::Serializer::dump(const engine::Engine &engine, capiocl::serializer::SerializerException::SerializerException(const std::string &msg) : message(msg) { printer::print(printer::CLI_LEVEL_ERROR, msg); +} + +bool capiocl::serializer::Serializer::entryCanBeCompressed(const bool compress, + const std::filesystem::path &path, + const engine::Engine &engine) { + + if (!compress) { + return false; + } + + if (engine.isDirectory(path)) { + return false; + } + + const auto parent_path = path.parent_path(); + + if (!engine.contains(parent_path)) { + return false; + } + + return engine.getCommitRule(path) == engine.getCommitRule(parent_path) && + engine.getFireRule(path) == engine.getFireRule(parent_path); +} + + +void capiocl::serializer::Serializer::sortPathsByDecreasingLength(std::vector &paths) { + std::sort(paths.begin(), paths.end(), [](const std::string &a, const std::string &b) { + if (a.length() != b.length()) { + return a.length() > b.length(); + } + return a > b; + }); } \ No newline at end of file diff --git a/src/serializers/v1.1.cpp b/src/serializers/v1.1.cpp index 6bdb943..0767050 100644 --- a/src/serializers/v1.1.cpp +++ b/src/serializers/v1.1.cpp @@ -7,24 +7,18 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { + + if (compress) { + printer::print(printer::CLI_LEVEL_WARNING, + "Using configuration compression to directories!"); + } + jsoncons::json doc; doc["version"] = 1.1; doc["name"] = engine.getWorkflowName(); const auto files = engine._capio_cl_entries; - std::vector keys; - keys.reserve(files.size()); - for (const auto &[k, v] : files) { - keys.push_back(k); - } - std::sort(keys.begin(), keys.end(), [](const std::string &a, const std::string &b) { - if (a.length() != b.length()) { - return a.length() < b.length(); - } - return a < b; - }); - std::unordered_map> app_inputs; std::unordered_map> app_outputs; @@ -36,17 +30,20 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + + sortPathsByDecreasingLength(keys); + for (const auto &path : keys) { const auto entry = files.at(path); - if (compress) { - if (const std::filesystem::path p(path); files.find(p.parent_path()) != files.end()) { - if (const auto &parent = files.at(p.parent_path()); - parent.fire_rule == entry.fire_rule && - parent.commit_rule == entry.commit_rule && entry.is_file) { - continue; - } - } + if (entryCanBeCompressed(compress, path, engine)) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; } if (entry.permanent) { @@ -65,13 +62,22 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( } } - for (const auto &[app_name, outputs] : app_outputs) { + for (auto &[app_name, outputs] : app_outputs) { jsoncons::json app = jsoncons::json::object(); jsoncons::json streaming = jsoncons::json::array(); + std::vector filtered_outputs; + + sortPathsByDecreasingLength(outputs); for (const auto &path : outputs) { const auto &entry = files.at(path); + if (entryCanBeCompressed(compress, path, engine)) { + continue; + } + + filtered_outputs.push_back(path); + jsoncons::json streaming_item = jsoncons::json::object(); std::string committed = entry.commit_rule; const char *name_kind = entry.is_file ? "name" : "dirname"; @@ -111,7 +117,7 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1_1( app["name"] = app_name; app["input_stream"] = app_inputs[app_name]; - app["output_stream"] = outputs; + app["output_stream"] = filtered_outputs; app["streaming"] = streaming; io_graph.push_back(app); diff --git a/src/serializers/v1.cpp b/src/serializers/v1.cpp index 70b1042..d911d58 100644 --- a/src/serializers/v1.cpp +++ b/src/serializers/v1.cpp @@ -5,8 +5,6 @@ #include "capiocl/printer.h" #include "capiocl/serializer.h" -#include - void capiocl::serializer::Serializer::available_serializers::serialize_v1( const engine::Engine &engine, const std::filesystem::path &filename, const bool compress) { @@ -20,18 +18,6 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( const auto files = engine._capio_cl_entries; - std::vector keys; - keys.reserve(files.size()); - for (const auto &[k, v] : files) { - keys.push_back(k); - } - std::sort(keys.begin(), keys.end(), [](const std::string &a, const std::string &b) { - if (a.length() != b.length()) { - return a.length() > b.length(); - } - return a > b; - }); - std::unordered_map> app_inputs; std::unordered_map> app_outputs; @@ -43,18 +29,20 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( jsoncons::json storage = jsoncons::json::object(); jsoncons::json io_graph = jsoncons::json::array(); + std::vector keys; + keys.reserve(files.size()); + for (const auto &[k, v] : files) { + keys.push_back(k); + } + + sortPathsByDecreasingLength(keys); + for (const auto &path : keys) { const auto entry = files.at(path); - if (compress) { - if (const std::filesystem::path p(path); files.find(p.parent_path()) != files.end()) { - if (const auto &parent = files.at(p.parent_path()); - parent.fire_rule == entry.fire_rule && - parent.commit_rule == entry.commit_rule && entry.is_file) { - printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); - continue; - } - } + if (entryCanBeCompressed(compress, path, engine)) { + printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); + continue; } if (entry.permanent) { @@ -76,29 +64,15 @@ void capiocl::serializer::Serializer::available_serializers::serialize_v1( for (auto &[app_name, outputs] : app_outputs) { jsoncons::json app = jsoncons::json::object(); jsoncons::json streaming = jsoncons::json::array(); - - std::sort(outputs.begin(), outputs.end(), [](const std::string &a, const std::string &b) { - if (a.length() != b.length()) { - return a.length() > b.length(); - } - return a > b; - }); - std::vector filtered_outputs; + sortPathsByDecreasingLength(outputs); + for (const auto &path : outputs) { const auto &entry = files.at(path); - if (compress) { - if (const std::filesystem::path p(path); - files.find(p.parent_path()) != files.end()) { - if (const auto &parent = files.at(p.parent_path()); - parent.fire_rule == entry.fire_rule && - parent.commit_rule == entry.commit_rule && entry.is_file) { - printer::print(printer::CLI_LEVEL_WARNING, "Compressing entry " + path); - continue; - } - } + if (entryCanBeCompressed(compress, path, engine)) { + continue; } filtered_outputs.push_back(path);