From c0d211d060726201515cd1044a8128623ae345a9 Mon Sep 17 00:00:00 2001 From: Jakub Havlik Date: Sat, 1 Feb 2025 00:49:29 +0100 Subject: [PATCH 1/3] Initial commit integrating L* and NL* into mata Added new file learning.cc for L*, NL* Added example program - use of learning edited CMakeLists accordingly Fixed a bug with rfsa_closure -- property violated when it shouldn't because of prime rows evaluating of prime rows was fixed --- examples/example07-learning.cc | 34 +++ include/mata/nfa/learning.hh | 293 ++++++++++++++++++++++ src/CMakeLists.txt | 1 + src/nfa/learning.cc | 440 +++++++++++++++++++++++++++++++++ 4 files changed, 768 insertions(+) create mode 100644 examples/example07-learning.cc create mode 100644 include/mata/nfa/learning.hh create mode 100644 src/nfa/learning.cc diff --git a/examples/example07-learning.cc b/examples/example07-learning.cc new file mode 100644 index 000000000..e332b0959 --- /dev/null +++ b/examples/example07-learning.cc @@ -0,0 +1,34 @@ +#include "mata/nfa/learning.hh" +#include "mata/nfa/nfa.hh" + +int main(){ + Nfa aut; + aut.initial = {0}; + aut.final = {3}; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 0); + aut.delta.add(0, 'a', 1); + aut.delta.add(1, 'a', 1); + aut.delta.add(1, 'a', 0); + aut.delta.add(1, 'b', 0); + aut.delta.add(1, 'a', 2); + aut.delta.add(1, 'b', 2); + aut.delta.add(2, 'a', 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'b', 0); + aut.delta.add(2, 'a', 3); + aut.delta.add(2, 'b', 3); + aut.delta.add(3, 'a', 1); + aut.delta.add(3, 'a', 0); + aut.delta.add(3, 'b', 0); + + ParameterMap params; + params["algorithm"] = "nlstar"; + Nfa res1 = learn(aut, params); + res1.print_to_dot(std::cout); + + params["algorithm"] = "lstar"; + Nfa res2 = learn(aut, params); + res2.print_to_dot(std::cout); + +} \ No newline at end of file diff --git a/include/mata/nfa/learning.hh b/include/mata/nfa/learning.hh new file mode 100644 index 000000000..e5ae789bd --- /dev/null +++ b/include/mata/nfa/learning.hh @@ -0,0 +1,293 @@ +#ifndef __MATA_NFA_LEARNING_HH__ +#define __MATA_NFA_LEARNING_HH__ + +#include "mata/nfa/nfa.hh" +#include "mata/nfa/algorithms.hh" +#include "mata/alphabet.hh" +#include "mata/nfa/builder.hh" +#include "mata/nfa/learning.hh" + +#include +#include +#include +using namespace mata::nfa; +using namespace mata; +using namespace std; +using namespace mata::utils; +using namespace mata::nfa::algorithms; + +/** + * @brief Structure of one row in the OT + * stores its binary vector, access string + * and a set of row in cover relation + * + */ +struct Row{ + BoolVector T = BoolVector(); + Word value = Word(); + //needed only for NLstar + vector>covering = vector>(); +}; + +/** + * @brief Structure used for constructing + * conjectures + */ +struct State_construction{ + Row row; + long unsigned int index; + State_construction(Row row, long unsigned int index) : row(row), index(index) {} +}; + +struct unclosed{ + Word unclosed_word = Word(); + shared_ptrunclosed_row = nullptr; +}; + +/** + * @brief Used to save information about + * consistency and closure checks + * + */ +struct const_check_res{ + bool consistent; + bool closed; + Word c; + unclosed not_closed; + const_check_res() : consistent(false), closed(false), c(), not_closed() {} +}; + +/** + * @brief OT for NL* algorithm inferring RFSA/DFA + * stores all rows in all, distinguishing to top and + * bottom parts is done with vectorsz of pointer (S, Splus) + * E stores experiment strings + * + */ +class OT{ + public: + vector> S = vector>(); // top part + vector> Splus = vector>(); // bottom part + vector> all = vector>(); // stores all rows + vector E = vector(); // experiment strings + map> all_map = map>(); // map for faster access to rows + + OT(OrdVector alphabet, Nfa teacher, const ParameterMap& params); + ~OT(); + + /** + * @brief Join operator defined for two rows -- basically binary or + * + */ + inline BoolVector join_op(BoolVector r1, BoolVector r2); + + /** + * @brief if r1[i] = +, it implies that r2[i] = + + * + * @param r1 first binary vector + * @param r2 second binary vector + * @return true if r1 is covered by r2 + * @return false if not covered + */ + inline bool covers(BoolVector r1, BoolVector r2); + + /** + * @brief Returns a membership query + * for a given access string concatenated with a + * given experiment string + * + * @param aut oracle automaton answering the query + * @param e experiment string + * @param value access string + * @return true if accepted + * @return false if not accepted + */ + bool get_t(Nfa aut, Word e, Word value); + + /** + * @brief Evaluates if a certain row from the upper part + * is prime, meaning that it shouldnt be joined by + * rows it covers + * + * @param row evaluated row + * @return true if prime + * @return false if composed (not prime) + */ + bool is_prime(shared_ptr row); + + /** + * @brief RFSA Closure property -- checks if + * all rows in bottom part are join of prime rows + * from the top part (have to also be in cover relation) + * + * @param not_closed returned unclosed row access string, ptr to row + * @return true if closed + * @return false if not closed + */ + bool rfsa_closure(unclosed& not_closed); + + /** + * @brief DFA Closure property -- checks if all + * rows in the bottom part also appear in the top part + * of the table + * + * @param not_closed returned unclosed row access string, ptr to row + * @return true if closed + * @return false not closed + */ + bool dfa_closure(unclosed& not_closed); + + /** + * @brief RFSA Consistency property -- checks if all + * pairs of rows in cover relation imply that their extensions are in + * cover relation + * + * @param c word that will be added to experiment strings if not consistent + * @param alphabet input alphabet + * @return true if consistent + * @return false if not consistent + */ + bool rfsa_consistency(Word& c, OrdVector alphabet); + + /** + * @brief DFA Consistency property -- checks if all + * rows in the top part that are equal are also equal + * when extended by a symbol from the alphabet + * + * @param c word that will be added to experiment strings if not consistent + * @param alphabet input alphabet + * @return true if consistent + * @return false if not consistent + */ + bool dfa_consistency(Word& c, OrdVector alphabet); + + /** + * @brief When a row is moved to the top part + * its covering relation vector has to be filled + * from the scratch + * Only for NLstar + * + * @param row row to evaluate cover relation for + */ + void get_covering(shared_ptr row); + + /** + * @brief If a new experiment string is added, + * cover relation of rows can change, if some coverage is not + * valid anymore, it is removed + * Only for NLstar + * + */ + void update_covering_new_e(); + + /** + * @brief If a new row is added to the bottom part + * rows in the top part check if they are not in cover relation with it + * Only for NLstar + * + * @param row row that was added + */ + void update_covering_new_row(shared_ptr row); + + /** + * @brief adds a new row to the bottom part of the OT + * + * @param row row to be added + */ + inline void add_to_Splus(shared_ptr row); + + /** + * @brief When the table is not closed, this moves + * the unclosed row to the top part of the table and adds new + * rows to the bottom part + * + * @param not_closed input string of the unclosed row + * @param alphabet input alphabet + * @param teacher oracle automaton + */ + void state_not_closed(unclosed not_closed, OrdVector alphabet, Nfa teacher); + + /** + * @brief Adds new columns to the table by adding experiment words + * and extending the table by membership queries + * + * @param teacher oracle answering queries + * @param c word to get added to experiment strings + * @param params parameters to distinguish L* and NL* approaches + */ + void update_consistency(Nfa teacher, Word c, const ParameterMap& params); + + /** + * @brief Adds all suffixes from cex to the OT as experiment strings (to E) + * this adds new collumns to the table to distinguish states of the automaton + * same as updating consistency + * + * @param cex counter-example word + * @param teacher oracle automaton + * @param params parameters to distinguish L* and NL* approaches + */ + void update_after_cex(Word cex, Nfa teacher, const ParameterMap& params); + + /** + * @brief Checks whether OT is consistent a closed + * -> if we can construct a conjecture automaton + * + * @param res structure saving information about closure, consistency properties + * @param alphabet input alphabet + * @param params parameters to distinguish L* and NL* approaches + * @return true if can be constructed + * @return false if cant be constructed + * + */ + bool mata_can_be_constructed(const_check_res& res, OrdVector alphabet, const ParameterMap& params); +}; + +/** + * @brief Membership query for OT - checks whether + * given input word is accepted by the oracle or not + * + * @param teacher automaton to get the query from + * @param word input string + * @return true if the word is accepted by teacher + * @return false if its not accepted + */ +bool membership_query(Nfa teacher, Word word); + +/** + * @brief Constructs a conjecture automaton + * first defines all the states of the conjecture, + * then computes transition function + * + * @param table OT to get the conjecture from + * @param alphabet input alphabet + * @param params parameters to distinguish L* and NL* approaches + * @return Nfa hypothesis automaton + */ +Nfa construct_conjecture(OT table, OrdVector alphabet, const ParameterMap& params); + +/** + * @brief Gets all suffixes from counter-example, that + * will get added to the OT + * + * @param cex word to get suffixes from + * @return set set of suffixes + */ +inline set get_all_suffixes(Word cex); + +/** + * @brief Equivalence query for OT, checks whether + * constructed conjecture's language is equivalent to the oracle + * if not, returns a counter-example + * + * @param teacher oracle automaton + * @param conjecture constructed hypothesis + * @param cex in case they are not equivalent - counter-example word + * @return true if equivalent + * @return false if not equivalent + */ +bool equivalence_query(Nfa teacher, Nfa conjecture, Word& cex); + +Nfa learn(Nfa teacher, const ParameterMap& params); + + +#endif \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0bfc68a4b..1ace50b05 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -23,6 +23,7 @@ add_library(libmata STATIC nfa/delta.cc nfa/operations.cc nfa/builder.cc + nfa/learning.cc nft/nft.cc nft/inclusion.cc diff --git a/src/nfa/learning.cc b/src/nfa/learning.cc new file mode 100644 index 000000000..e83ab4821 --- /dev/null +++ b/src/nfa/learning.cc @@ -0,0 +1,440 @@ +/* nfa-learn.cc -- NFA learning algorithms +*/ + +//MATA headers +#include "mata/nfa/learning.hh" + +Nfa learning(Nfa teacher, const ParameterMap& params){ + if(teacher.num_of_states() == 0){ + throw std::runtime_error("Invalid teacher automaton!\n"); + } + const Alphabet& alph = create_alphabet(teacher); + OrdVector alphabet = alph.get_alphabet_symbols(); + Nfa conjecture; + OT table(alphabet, teacher, params); + while(1){ + const_check_res res; + if(table.mata_can_be_constructed(res, alphabet, params)){ + Nfa conjecture = construct_conjecture(table, alphabet, params); + Word cex; + bool equivalent = equivalence_query(conjecture, teacher, cex); + if(equivalent){ + return conjecture; + } + else{ + table.update_after_cex(cex, teacher, params); + } + } + //cant be constructed + else{ + if(!res.consistent){ + table.update_consistency(teacher, res.c, params); + } + if(!res.closed){ + table.state_not_closed(res.not_closed, alphabet, teacher); + } + } + } +} + +Nfa learn(Nfa teacher, const ParameterMap& params){ + if (!haskey(params, "algorithm")) { + throw std::runtime_error(std::to_string(__func__) + + " requires setting the \"algorithm\" key in the \"params\" argument; " + "received: " + std::to_string(params)); + } + const std::string& algo = params.at("algorithm"); + if(algo == "nlstar" || algo == "lstar"){ + return learning(teacher, params); + } + else{ + throw std::runtime_error(std::to_string(__func__) + + " received an unknown value of the \"algorithm\" key: " + algo); + } +} + +// very inefficient query for long words +bool membership_query(Nfa teacher, Word word){ + return teacher.is_in_lang(word); +} + +bool equivalence_query(Nfa teacher, Nfa conjecture, Word& cex){ + Run counter_example_run; + is_included_naive(conjecture, teacher, teacher.alphabet, &counter_example_run); + if(counter_example_run.word.size() == 0) + is_included_naive(teacher, conjecture, teacher.alphabet, &counter_example_run); + ParameterMap params; + params["algorithm"] = "naive"; + bool equivalent = are_equivalent(teacher, conjecture, teacher.alphabet, params); + cex = counter_example_run.word; + return equivalent; +}//equivalence_query + +Nfa construct_conjecture(OT table, OrdVector alphabet, const ParameterMap& params){ + Nfa conjecture; + vectorstates; + long unsigned int num_of_states = 0; + // first get all states possible + if(params.at("algorithm") == "nlstar"){ + for(auto row : table.S){ + if(table.is_prime(row)){ + if(table.covers(row->T, table.S.front()->T)) conjecture.initial.insert(num_of_states); + if(row->T[0]) conjecture.final.insert(num_of_states); + conjecture.add_state(num_of_states); + states.emplace_back(State_construction({(*row), num_of_states})); + num_of_states++; + } + } + // go through all acquired states and compute the delta function + for(State_construction state : states){ + for(Symbol symbol : alphabet){ + Word new_input = state.row.value; + new_input.emplace_back(symbol); + //we have to check if the new input is located in the table + //if its in, it means that it represents some state and we have a delta + auto res = table.all_map.find(new_input); + if(res != table.all_map.end()){ + for(State_construction state2 : states){ + if(table.covers(state2.row.T, res->second->T)){ + Transition new_transition; + new_transition.symbol = symbol; + new_transition.source = state.index; + new_transition.target = state2.index; + conjecture.delta.add(new_transition); + } + } + } + } + } + } + if(params.at("algorithm") == "lstar"){ + for(auto row : table.S){ + if(row->value.empty()) conjecture.initial.insert(num_of_states); + if(row->T[0]) conjecture.final.insert(num_of_states); + states.push_back({(*row), num_of_states}); + conjecture.add_state(num_of_states); + num_of_states++; + } + for(State_construction state : states){ + for(Symbol symbol : alphabet){ + Word new_input = state.row.value; + new_input.push_back(symbol); + auto res = table.all_map.find(new_input); + if(res != table.all_map.end()){ + for(State_construction state2 : states){ + if(res->second->T == state2.row.T){ + Transition new_transition; + new_transition.symbol = symbol; + new_transition.source = state.index; + new_transition.target = state2.index; + conjecture.delta.add(new_transition); + } + } + } + } + } + } + + //might remove this, ensure there is no trap state - not needed + conjecture.trim(); + return conjecture; +} + +OT::OT(OrdVector alphabet, Nfa teacher, const ParameterMap& params){ + // essentially epsilon, but Mata would try to find + // eps transitions and explicitly add it to Words + E.emplace_back(Word({})); + auto eps_ptr = make_shared(Row{{membership_query(teacher, {})}, {}, {}}); + all.emplace_back(eps_ptr); + S.emplace_back(eps_ptr); + all_map.insert({{}, eps_ptr}); + + for(Symbol sym : alphabet){ + auto row_ptr = make_shared(Row{{membership_query(teacher, {sym})}, {sym}, {}}); + all.emplace_back(row_ptr); + Splus.emplace_back(row_ptr); + all_map.insert({{sym}, row_ptr}); + } + //initialize cover relation + if(params.at("algorithm") == "nlstar"){ + get_covering(eps_ptr); + } +} + +OT::~OT(){ + S.clear(); + Splus.clear(); + all.clear(); + E.clear(); +} + +inline BoolVector OT::join_op(BoolVector r1, BoolVector r2){ + for(auto it1 = r1.begin(), it2 = r2.begin(); it1 != r1.end(); it1++, it2++){ + *it1 = *it1 || *it2; + } + return r1; +} + +inline bool OT::covers(BoolVector r1, BoolVector r2){ + for(auto it1 = r1.begin(), it2 = r2.begin(); it1 != r1.end(); it1++, it2++){ + if(*it1 && !*it2){ // if r1(E) is true, it implies that r2(E) is true + return false; + } + } + return true; +} + +bool OT::get_t(Nfa aut, Word e, Word value){ + Word input = value; + for(Symbol symbol : e){ + input.emplace_back(symbol); + } + return membership_query(aut, input); +} + +bool OT::is_prime(shared_ptr row){ + BoolVector join(E.size(), false); + bool not_joined = true; + for(auto it = all.begin(); it != all.end(); it++){ + if((*it)->T != row->T && covers((*it)->T, row->T)){ + join = join_op(join, (*it)->T); + not_joined = false; + } + } + if(not_joined || join != row->T) return true; + + return false; +} + +bool OT::rfsa_closure(unclosed& not_closed) { + vector> primes_upp; + vector Ts; + for (auto row : S) { + if (is_prime(row)) { + primes_upp.emplace_back(row); + } + Ts.emplace_back(row->T); + } + + for (auto it = Splus.begin(); it != Splus.end(); it++) { + BoolVector join(E.size(), false); + + bool not_joined = true; + for (auto prime : primes_upp) { + if ((*it)->T != prime->T && covers(prime->T, (*it)->T)) { + join = join_op(join, prime->T); + not_joined = false; + } + } + bool closed = true; + for(auto it1 = join.begin(), it2 = (*it)->T.begin(); it1 != join.end(); it1++, it2++){ + if((*it1) != (*it2)){ + closed = false; + break; + } + } + bool not_in = find(Ts.begin(), Ts.end(), (*it)->T) == Ts.end(); + + if ((not_joined || !closed) && not_in) { + not_closed.unclosed_word = (*it)->value; + not_closed.unclosed_row = (*it); + //move the row to the top part + S.emplace_back(*it); + get_covering(*it); + update_covering_new_row(*it); + Splus.erase(it); + return false; + } + } + + return true; +} +bool OT::dfa_closure(unclosed& not_closed){ + set S_set; + for(auto S_row : S){ + S_set.insert(S_row->T); + } + for(auto it = Splus.begin(); it != Splus.end(); it++){ + if(S_set.find((*it)->T) == S_set.end()){ + not_closed.unclosed_word = (*it)->value; + not_closed.unclosed_row = (*it); + S.emplace_back(*it); + Splus.erase(it); + return false; + } + } + return true; +} + +bool OT::rfsa_consistency(Word& c, OrdVectoralphabet){ + for(auto row : S){ + for(auto covering : row->covering){ + for(Symbol sym : alphabet){ + Word input1 = row->value; + Word input2 = covering->value; + input1.emplace_back(sym); + input2.emplace_back(sym); + + auto res1 = all_map.find(input1); + auto res2 = all_map.find(input2); + for(long unsigned int i = 0; i < res1->second->T.size(); i++){ + if(res1->second->T[i] && !res2->second->T[i]){ + c = {sym}; + for(Symbol s : E[i]){ + c.emplace_back(s); + } + return false; + } + } + } + } + } + return true; +} + +bool OT::dfa_consistency(Word& c, OrdVector alphabet){ + multimap map; // one bool vector can be mapped to multiple access strings + std::map map2; + set T_in_table; // all bool vectors in the table + for(auto S_row : S){ + map.insert({S_row->T, S_row->value}); + map2.insert({S_row->value, S_row->T}); + T_in_table.insert(S_row->T); + } + for(auto Splus_row : Splus){ + map2.insert({Splus_row->value, Splus_row->T}); + } + for(auto T : T_in_table){ + auto equal_range = map.equal_range(T); + vector inputs; // all access string for this exact bool vector + if(equal_range.first != equal_range.second){ + for(auto it = equal_range.first; it != equal_range.second; it++){ + inputs.push_back(it->second); + } + for(auto Symbol : alphabet){ + setTs; + for(long unsigned int i = 0; i < inputs.size(); i++){ + Word new_input = inputs[i]; + new_input.push_back(Symbol); + Ts.insert(map2.find(new_input)->second); + } + for(long unsigned int e = 0; e < E.size(); e++){ + settest; + for(auto T : Ts){ + test.insert(T[e]); + } + if(test.size() > 1){ + c = {Symbol}; + for(long unsigned int sym = 0; sym < E[e].size(); sym++){ + c.push_back(E[e][sym]); + } + return false; + } + } + } + } + } + return true; +} + +void OT::get_covering(shared_ptr row){ + for(auto r : S){ + if(r->value == row->value) continue; + if(covers(row->T, r->T)){ + row->covering.emplace_back(row); + } + } +} + +void OT::update_covering_new_e() { + // if we added a new True membership query to a row + // it means that all rows in cover relation need to have true as well + // if they dont, remove them from the cover relation vector + for (auto row : S) { + if (row->T.back()){ + for (auto it = row->covering.begin(); it != row->covering.end();){ + if (!((*it)->T.back())) { + it = row->covering.erase(it); + } + else { + it++; + } + } + } + } +} + +void OT::update_covering_new_row(shared_ptr row){ + for(auto r : S){ + if(r->value == row->value) continue; + if(covers(r->T, row->T)){ + r->covering.emplace_back(row); + } + } +} + +inline void OT::add_to_Splus(shared_ptr row){ + Splus.emplace_back(row); + all.emplace_back(row); + all_map.insert({row->value, row}); +} + +void OT::state_not_closed(unclosed not_closed, OrdVector alphabet, Nfa teacher){ + //unclosed row is already moved to the top part + for(Symbol symbol : alphabet){ + shared_ptr new_row = make_shared(); + new_row->value = not_closed.unclosed_word; + new_row->value.emplace_back(symbol); + for(Word e : E){ + Word input = new_row->value; + for(Symbol sym : e){ + input.emplace_back(sym); + } + bool value = membership_query(teacher, input); + new_row->T.emplace_back(value); + } + + add_to_Splus(new_row); + } +} + + +void OT::update_consistency(Nfa teacher, Word c, const ParameterMap& params){ + E.emplace_back(c); + for(auto row : all){ + row->T.emplace_back(get_t(teacher, c, row->value)); + } + if(params.at("algorithm") == "nlstar"){ + update_covering_new_e(); + } +} + +inline set get_all_suffixes(Word cex){ + setsuffixes; + for(auto it = cex.begin(); it != cex.end(); it++){ + auto suffix = Word({it, cex.end()}); + suffixes.insert(suffix); + } + return suffixes; +} + +void OT::update_after_cex(Word cex, Nfa teacher, const ParameterMap& params){ + set suffixes = get_all_suffixes(cex); + for(Word suffix : suffixes){ + if(find(E.begin(), E.end(), suffix) == E.end()){ // dont need to add suffixes that are already in + update_consistency(teacher, suffix, params); + } + } +} + +bool OT::mata_can_be_constructed(const_check_res& res, OrdVector alphabet, const ParameterMap& params){ + if(params.at("algorithm") == "nlstar"){ + res.consistent = rfsa_consistency(res.c, alphabet); + res.closed = rfsa_closure(res.not_closed); + } + if(params.at("algorithm") == "lstar"){ + res.consistent = dfa_consistency(res.c, alphabet); + res.closed = dfa_closure(res.not_closed); + } + return res.consistent && res.closed; +} \ No newline at end of file From 0a03012362bf5a7a5c31a7846a0094fc6cf0f668 Mon Sep 17 00:00:00 2001 From: Jakub Havlik Date: Wed, 5 Feb 2025 11:38:39 +0100 Subject: [PATCH 2/3] Added a hand-made test that was failing before the fix of rfsa_closure --- tests/CMakeLists.txt | 1 + tests/nfa/learning.cc | 51 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 tests/nfa/learning.cc diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index eb695cff6..2cccac89b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable(tests nfa/nfa-product.cc nfa/nfa-profiling.cc nfa/nfa-plumbing.cc + nfa/learning.cc nft/delta.cc nft/nft.cc nft/builder.cc diff --git a/tests/nfa/learning.cc b/tests/nfa/learning.cc new file mode 100644 index 000000000..2c4f6409f --- /dev/null +++ b/tests/nfa/learning.cc @@ -0,0 +1,51 @@ +#include "mata/nfa/nfa.hh" +#include "mata/nfa/learning.hh" +#include +#include + +TEST_CASE("mata::nfa::residual_automaton_learning"){ + Nfa aut; + aut.initial = {0}; + aut.final = {3}; + aut.delta.add(0, 'a', 0); + aut.delta.add(0, 'b', 0); + aut.delta.add(0, 'a', 1); + aut.delta.add(1, 'a', 1); + aut.delta.add(1, 'a', 0); + aut.delta.add(1, 'b', 0); + aut.delta.add(1, 'a', 2); + aut.delta.add(1, 'b', 2); + aut.delta.add(2, 'a', 1); + aut.delta.add(2, 'a', 0); + aut.delta.add(2, 'b', 0); + aut.delta.add(2, 'a', 3); + aut.delta.add(2, 'b', 3); + aut.delta.add(3, 'a', 1); + aut.delta.add(3, 'a', 0); + aut.delta.add(3, 'b', 0); + + StateRenaming* state_renaming = new StateRenaming(); + ParameterMap params_nlstar; + params_nlstar["algorithm"] = "nlstar"; + ParameterMap params_residuals; + params_residuals["algorithm"] = "residual"; + params_residuals["type"] = "with"; + params_residuals["direction"] = "forward"; + Nfa conjecture1 = learn(aut, params_nlstar); + Nfa conjecture2 = reduce(aut, state_renaming, params_residuals); + // should have only 4 states, while minimal DFA has 8 + // when rfsa_consistency was not fixed, this wasnt equal + REQUIRE(conjecture1.num_of_states() == conjecture2.num_of_states()); + REQUIRE(are_equivalent(conjecture1, conjecture2)); + + ParameterMap params_lstar; + params_lstar["algorithm"] = "lstar"; + Nfa conjecture3 = learn(aut, params_lstar); + Nfa conjecture4 = minimize_brzozowski(aut); + + REQUIRE(are_equivalent(conjecture3, conjecture4)); + REQUIRE(are_equivalent(conjecture1, conjecture3)); + REQUIRE(conjecture3.num_of_states() == conjecture4.num_of_states()); + REQUIRE(conjecture3.num_of_states() == 8); + REQUIRE(conjecture1.num_of_states() == 4); +} \ No newline at end of file From c2c2dfc92f8dfb12bfe08102f58e5a1cd756334c Mon Sep 17 00:00:00 2001 From: Jakub Havlik Date: Wed, 5 Feb 2025 12:19:45 +0100 Subject: [PATCH 3/3] added integration tests for learning algorithms --- tests-integration/jobs/learning.yaml.in | 2 ++ tests-integration/src/test-learning.cc | 44 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 tests-integration/jobs/learning.yaml.in create mode 100644 tests-integration/src/test-learning.cc diff --git a/tests-integration/jobs/learning.yaml.in b/tests-integration/jobs/learning.yaml.in new file mode 100644 index 000000000..2209f4aa3 --- /dev/null +++ b/tests-integration/jobs/learning.yaml.in @@ -0,0 +1,2 @@ +learn: + cmd: @CMAKE_CURRENT_BINARY_DIR@/templates/test-learning $1 \ No newline at end of file diff --git a/tests-integration/src/test-learning.cc b/tests-integration/src/test-learning.cc new file mode 100644 index 000000000..14a33cec9 --- /dev/null +++ b/tests-integration/src/test-learning.cc @@ -0,0 +1,44 @@ +#include "utils/utils.hh" +#include "mata/nfa/learning.hh" + +constexpr bool MINTERMIZE_AUTOMATA{false}; + +int main(int argc, char *argv[]) { + if (argc != 2) { + std::cerr << "Input files or algorithm missing\n"; + return EXIT_FAILURE; + } + + std::string filename = argv[1]; + Nfa test_automaton; + mata::OnTheFlyAlphabet alphabet; + if (load_automaton(filename, test_automaton, alphabet, MINTERMIZE_AUTOMATA) != EXIT_SUCCESS) { + return EXIT_FAILURE; + } + StateRenaming* state_renaming = new StateRenaming(); + ParameterMap params_learning; + params_learning["algorithm"] = "nlstar"; + ParameterMap params_residuals; + params_residuals["algorithm"] = "residual"; + params_residuals["type"] = "with"; + params_residuals["direction"] = "forward"; + Nfa conjecture_nlstar = learn(test_automaton, params_learning); + Nfa conjecture_residuals = reduce(test_automaton, state_renaming, params_residuals); + if(conjecture_nlstar.num_of_states() != conjecture_residuals.num_of_states()){ + return EXIT_FAILURE; + } + if(!are_equivalent(conjecture_nlstar, conjecture_residuals)){ + return EXIT_FAILURE; + } + params_learning["algorithm"] = "lstar"; + Nfa conjecture_lstar = learn(test_automaton, params_learning); + Nfa conjecture_minimize = minimize_brzozowski(test_automaton); + if(!are_equivalent(conjecture_lstar, conjecture_minimize)){ + return EXIT_FAILURE; + } + if(conjecture_lstar.num_of_states() != conjecture_minimize.num_of_states()){ + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} \ No newline at end of file