From 03a2177fe24b2f5b921db1ec34194f0e32fd8501 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 09:53:42 -0400 Subject: [PATCH 01/12] b - use the pre-existing internal header for including problematic old Boost program_options, which disables some warnings in a pragma block, instead of the raw Boost header directly. Suppresses many warnings with clang on my Mac. --- include/options.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/options.h b/include/options.h index 4381d14b..ee49e9c8 100644 --- a/include/options.h +++ b/include/options.h @@ -25,7 +25,7 @@ #include #include -#include +#include "boost_program_options.h" namespace po = boost::program_options; class Options { From 7df63a26dc6ad7c7dd2239d9c489e211e3043a87 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 10:12:36 -0400 Subject: [PATCH 02/12] b - fix a warning about an unused function parameter. It's a good warning because it uncovered a function in need of attention, to be done on a separate. --- src/cmd/options.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/cmd/options.cpp b/src/cmd/options.cpp index d91664be..0bca839d 100644 --- a/src/cmd/options.cpp +++ b/src/cmd/options.cpp @@ -197,9 +197,10 @@ void Options::validateAndPopulateSearchOptions(const po::variables_map& optsMap, } } -void Options::populateSampleOptions(const po::variables_map& optsMap, std::vector& pargs) { - SampleLimit = - std::numeric_limits::size_type>::max(); +void Options::populateSampleOptions(const po::variables_map&, std::vector& pargs) { + // FIXME: the map's no longer used and this function needs a rethink with tests. + // For example, SampleLimit is set to the max value of size_t, so it's a bad default. + SampleLimit = std::numeric_limits::size_type>::max(); LoopLimit = 1; if (!pargs.empty()) { @@ -212,3 +213,4 @@ void Options::populateSampleOptions(const po::variables_map& optsMap, std::vecto } } } + From ee82cb5fdeb9441fae19a748de5f71fefa05479a Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 11:04:15 -0400 Subject: [PATCH 03/12] b - lg_create_pattern() was missing a void for its function args, resulting in a warning --- include/lightgrep/api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/lightgrep/api.h b/include/lightgrep/api.h index 79d70212..26f027df 100644 --- a/include/lightgrep/api.h +++ b/include/lightgrep/api.h @@ -119,7 +119,7 @@ extern "C" { // Create and destory an LG_HPATTERN. // This can be reused when parsing pattern strings to avoid re-allocating memory. - LG_HPATTERN lg_create_pattern(); + LG_HPATTERN lg_create_pattern(void); void lg_destroy_pattern(LG_HPATTERN hPattern); From 8c3206394ea7c315a3b0281c0a6586c6a51589d1 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 11:09:45 -0400 Subject: [PATCH 04/12] r - Change line breaking to be more consistent, the way I like it. --- include/lightgrep/api.h | 54 ++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/include/lightgrep/api.h b/include/lightgrep/api.h index 26f027df..ab4f41cd 100644 --- a/include/lightgrep/api.h +++ b/include/lightgrep/api.h @@ -135,10 +135,8 @@ extern "C" { // and fragmentation. A good rule of thumb is to pass the total number of // characters in all of the keywords. Everything will work fine with 0, // though. - LG_HFSM lg_create_fsm( - unsigned int patternCountHint, - unsigned int numFsmStateSizeHint - ); + LG_HFSM lg_create_fsm(unsigned int patternCountHint, + unsigned int numFsmStateSizeHint); void lg_destroy_fsm(LG_HFSM hFsm); @@ -149,13 +147,11 @@ extern "C" { // are no constraints on the value of userIndex. Set it for each pattern // to whatever you want to see as the LG_PatternInfo::UserIndex for that // pattern. - int lg_add_pattern( - LG_HFSM hFsm, - LG_HPATTERN hPattern, - const char* encoding, - uint64_t userIndex, - LG_Error** err - ); + int lg_add_pattern(LG_HFSM hFsm, + LG_HPATTERN hPattern, + const char* encoding, + uint64_t userIndex, + LG_Error** err); // Adds patterns to the FSM and Program. Each line of the pattern string // shall be formatted as tab separated columns: @@ -170,15 +166,13 @@ extern "C" { // The values of defaultEncodings and defaultOptions are used in case of // omitted columns. The "source" parameter indicates the source of the // patterns string (e.g., a path) and is used in error messages. - int lg_add_pattern_list( - LG_HFSM hFsm, - const char* patterns, - const char* source, - const char** defaultEncodings, - unsigned int defaultEncodingsNum, - const LG_KeyOptions* defaultOptions, - LG_Error** err - ); + int lg_add_pattern_list(LG_HFSM hFsm, + const char* patterns, + const char* source, + const char** defaultEncodings, + unsigned int defaultEncodingsNum, + const LG_KeyOptions* defaultOptions, + LG_Error** err); // The number of pattern-encoding pairs recognized by the FSM. This // will be one greater than the maximum pattern index accepted by @@ -260,11 +254,11 @@ extern "C" { // In particular, it may not be possible to determine the full length of a // hit until the entire byte stream has been searched... uint64_t lg_search(LG_HCONTEXT hCtx, - const char* bufStart, - const char* bufEnd, // pointer past the end of the buffer, i.e. bufEnd - bufStart == length of buffer - const uint64_t startOffset, // Increment this with each call, by the length of the previous buffer. i.e., startOffset += bufEnd - bufStart; - void* userData, // pass in what you like, it will be passed through to the callback function - LG_HITCALLBACK_FN callbackFn); + const char* bufStart, + const char* bufEnd, // pointer past the end of the buffer, i.e. bufEnd - bufStart == length of buffer + const uint64_t startOffset, // Increment this with each call, by the length of the previous buffer. i.e., startOffset += bufEnd - bufStart; + void* userData, // pass in what you like, it will be passed through to the callback function + LG_HITCALLBACK_FN callbackFn); // ...which is why it's important you call lg_closeout_search() when finished // searching a byte stream. This will flush out any remaining search hits. @@ -275,11 +269,11 @@ extern "C" { // Return value is the least offset for which a hit could still be returned // by further searching. uint64_t lg_search_resolve(LG_HCONTEXT hCtx, - const char* bufStart, - const char* bufEnd, - const uint64_t startOffset, - void* userData, - LG_HITCALLBACK_FN callbackFn); + const char* bufStart, + const char* bufEnd, + const uint64_t startOffset, + void* userData, + LG_HITCALLBACK_FN callbackFn); #ifdef __cplusplus } From bcfdde154f86a0c1fcc6d7a79a34030062624015 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 11:59:17 -0400 Subject: [PATCH 05/12] r - C complains about typed function args without variable names (where it's useful in C++ to do this to suppress unused argument warnings). But C is also quite happy for main(void) instead of main(int argc, char *arg[]), so let's use that instead in the C example to be rid of a warning. --- examples/c_example/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/c_example/main.c b/examples/c_example/main.c index 4cacaee8..4bd4b6d8 100644 --- a/examples/c_example/main.c +++ b/examples/c_example/main.c @@ -48,7 +48,7 @@ void searchText(const char** textArray, unsigned int numStrings, LG_HCONTEXT sea lg_closeout_search(searcher, (void*)17, getHit); } -int main(int, char**) { +int main(void) { const char* keys[] = {"mary", "lamb", "[a-z]+"}; const unsigned int kcount = sizeof(keys)/sizeof(keys[0]); From 9b8ce1b1787ff99c911743d36878aedbe217e74b Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 12:11:27 -0400 Subject: [PATCH 06/12] r - kill off a warning related to comparison of an int vs a size_type (a long) by refactoring this for-loop into a while loop. Initializing i to zero in the initializer of the for-loop, with auto, caused the type to be an int. Initializing it directly from std::string::find_first_of coerces the type to a size_type, and then... this just suits a while loop a bit better. --- src/lib/parsetree.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/lib/parsetree.cpp b/src/lib/parsetree.cpp index 7ead82bf..eb17853c 100644 --- a/src/lib/parsetree.cpp +++ b/src/lib/parsetree.cpp @@ -48,7 +48,8 @@ uint32_t nodesUpperBound(const std::string& expr) { uint32_t ub = 2*expr.length(); // add extra nodes for ^, $, \b, \B - for (auto i = 0; (i = expr.find_first_of("^$bB", i)) != std::string::npos; ++i) { + auto i = expr.find_first_of("^$bB"); + while (i != std::string::npos) { switch (expr[i]) { case '^': case '$': @@ -56,13 +57,13 @@ uint32_t nodesUpperBound(const std::string& expr) { break; case 'B': case 'b': - if (i > 0 && expr[i-1] == '\\') { + if (i > 0 && expr[i - 1] == '\\') { ub += 6; } break; } + i = expr.find_first_of("^$bB", i + 1); } - return ub; } From 898c55c05954f97d8181360365a7da84d385267a Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 12:47:23 -0400 Subject: [PATCH 07/12] b - suppress unused variable warnings in bison autogen code. It was complaining about yynerrs being set to 0, but never being used. --- src/lib/re_grammar.ypp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/lib/re_grammar.ypp b/src/lib/re_grammar.ypp index 56b84321..a1365c09 100644 --- a/src/lib/re_grammar.ypp +++ b/src/lib/re_grammar.ypp @@ -80,6 +80,9 @@ namespace { void throwError(ParseError e, Context* ctx); } +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" + #define YYLTYPE_IS_DECLARED 1 #define YYLTYPE_IS_TRIVIAL 1 @@ -1087,6 +1090,8 @@ cc_named: %% +#pragma GCC diagnostic pop + namespace { template From 47b29ad1c2967663e25b272a3d9e7406ba53e644 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 13:11:45 -0400 Subject: [PATCH 08/12] r - remove commented out #includes from lightgrep_c_util.cpp --- src/lib/lightgrep_c_util.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/lib/lightgrep_c_util.cpp b/src/lib/lightgrep_c_util.cpp index ee81d443..8282f2ba 100644 --- a/src/lib/lightgrep_c_util.cpp +++ b/src/lib/lightgrep_c_util.cpp @@ -15,11 +15,6 @@ * */ -/* -#include "container_out.h" -#include -*/ - #include #include #include From 3cca40d78ba6ff4b5cec6c713f167def178b2e9d Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 13:42:26 -0400 Subject: [PATCH 09/12] R - remove include/container_out.h. It's used only in test translation units and is not necessary there as Catch2 now supplies stream output for containers. --- Makefile.am | 1 - include/container_out.h | 40 ---------------------------------------- src/lib/vm.cpp | 1 - test/test_icu.cpp | 1 - test/test_oceencoder.cpp | 1 - test/test_rotencoder.cpp | 1 - test/test_utf16.cpp | 1 - test/test_utf32.cpp | 1 - test/test_utf8.cpp | 1 - test/test_xorencoder.cpp | 1 - 10 files changed, 49 deletions(-) delete mode 100644 include/container_out.h diff --git a/Makefile.am b/Makefile.am index 8988eba4..9a2d507a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -31,7 +31,6 @@ src_lib_liblightgrepint_la_SOURCES = \ include/chain.h \ include/codegen.h \ include/compiler.h \ - include/container_out.h \ include/decoders/asciidecoder.h \ include/decoders/bytesource.h \ include/decoders/decoder.h \ diff --git a/include/container_out.h b/include/container_out.h deleted file mode 100644 index 77facdcc..00000000 --- a/include/container_out.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2024 Aon Cyber Solutions - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#pragma once - -#include "ostream_join_iterator.h" - -#include -#include -#include - -template > class C> -std::ostream& operator<<(std::ostream& out, const C& con) { - out << '['; - std::copy(con.begin(), con.end(), ostream_join_iterator(out, ",")); - return out << ']'; -} - -template , typename A = std::allocator> class C> -std::ostream& operator<<(std::ostream& out, const C& con) { - out << '['; - std::copy(con.begin(), con.end(), ostream_join_iterator(out, ",")); - return out << ']'; -} diff --git a/src/lib/vm.cpp b/src/lib/vm.cpp index 26c95bf5..7688743f 100644 --- a/src/lib/vm.cpp +++ b/src/lib/vm.cpp @@ -16,7 +16,6 @@ */ #include "byteset.h" -#include "container_out.h" #include "vm.h" #include "program.h" diff --git a/test/test_icu.cpp b/test/test_icu.cpp index c3419a76..49522a2d 100644 --- a/test/test_icu.cpp +++ b/test/test_icu.cpp @@ -16,7 +16,6 @@ */ #include "basic.h" -#include "container_out.h" #include diff --git a/test/test_oceencoder.cpp b/test/test_oceencoder.cpp index 8ed9cfaa..15bcaa72 100644 --- a/test/test_oceencoder.cpp +++ b/test/test_oceencoder.cpp @@ -19,7 +19,6 @@ #include -#include "container_out.h" #include "encoders/ascii.h" #include "encoders/oceencoder.h" diff --git a/test/test_rotencoder.cpp b/test/test_rotencoder.cpp index 2aa3bc25..6b766e0a 100644 --- a/test/test_rotencoder.cpp +++ b/test/test_rotencoder.cpp @@ -19,7 +19,6 @@ #include -#include "container_out.h" #include "encoders/ascii.h" #include "encoders/rotencoder.h" diff --git a/test/test_utf16.cpp b/test/test_utf16.cpp index 453be99a..033f1461 100644 --- a/test/test_utf16.cpp +++ b/test/test_utf16.cpp @@ -17,7 +17,6 @@ #include -#include "container_out.h" #include "encoders/utf16.h" template diff --git a/test/test_utf32.cpp b/test/test_utf32.cpp index e8c337d3..3359088c 100644 --- a/test/test_utf32.cpp +++ b/test/test_utf32.cpp @@ -17,7 +17,6 @@ #include -#include "container_out.h" #include "encoders/utf32.h" template diff --git a/test/test_utf8.cpp b/test/test_utf8.cpp index b455df78..f0785f01 100644 --- a/test/test_utf8.cpp +++ b/test/test_utf8.cpp @@ -18,7 +18,6 @@ #include //#include "automata.h" -#include "container_out.h" //#include "fragment.h" //#include "utility.h" #include "encoders/utf8.h" diff --git a/test/test_xorencoder.cpp b/test/test_xorencoder.cpp index 6b35106a..7d4c76f8 100644 --- a/test/test_xorencoder.cpp +++ b/test/test_xorencoder.cpp @@ -19,7 +19,6 @@ #include -#include "container_out.h" #include "encoders/ascii.h" #include "encoders/xorencoder.h" From 1a71740930dc8cd0ff6c31a15ea87431f80d1c9f Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 14:03:28 -0400 Subject: [PATCH 10/12] R - Removed warning due to ostream_join_iterator's use of deprecated old-style C++ iterators, by removing it entirely. It was only used in one place for real, and a for-loop is a slightly simpler and smaller (in executable size for the translation unit) than using std::copy with the iterator. In C++23, there's std::ranges::join_with_view/std::ranges::views::join_with so the lack of this iterator can be replaced by a more modern version in STL with an upgrade to C++23. --- Makefile.am | 1 - include/ostream_join_iterator.h | 51 ----------------------------- src/cmd/options.cpp | 8 ++--- test/test_ostream_join_iterator.cpp | 44 ------------------------- 4 files changed, 3 insertions(+), 101 deletions(-) delete mode 100644 include/ostream_join_iterator.h delete mode 100644 test/test_ostream_join_iterator.cpp diff --git a/Makefile.am b/Makefile.am index 9a2d507a..43fd5f2c 100644 --- a/Makefile.am +++ b/Makefile.am @@ -298,7 +298,6 @@ test_test_SOURCES = \ test/test_oceencoder.cpp \ test/test_options.cpp \ test/test_optparser.cpp \ - test/test_ostream_join_iterator.cpp \ test/test_parser.cpp \ test/test_parsetree.cpp \ test/test_parseutil.cpp \ diff --git a/include/ostream_join_iterator.h b/include/ostream_join_iterator.h deleted file mode 100644 index 702be3a3..00000000 --- a/include/ostream_join_iterator.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2024 Aon Cyber Solutions - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#pragma once - -#include -#include - -template> -class ostream_join_iterator : - public std::iterator -{ -public: - typedef CharT char_type; - typedef Traits traits_type; - typedef std::basic_ostream ostream_type; - - ostream_join_iterator(ostream_type& s, const char_type* c = nullptr) : - stream(&s), string(c), print_string(nullptr) {} - - ostream_join_iterator& operator=(const T& val) { - if (print_string) *stream << print_string; - print_string = string; - *stream << val; - return *this; - } - - ostream_join_iterator& operator*() { return *this; } - ostream_join_iterator& operator++() { return *this; } - ostream_join_iterator& operator++(int) { return *this; } - -private: - ostream_type* stream; - const char_type* string; - const char_type* print_string; -}; diff --git a/src/cmd/options.cpp b/src/cmd/options.cpp index 0bca839d..7eeac50f 100644 --- a/src/cmd/options.cpp +++ b/src/cmd/options.cpp @@ -21,7 +21,6 @@ #include -#include "ostream_join_iterator.h" #include "util.h" std::ostream& Options::openOutput() const { @@ -54,10 +53,9 @@ std::vector> Options::getPatternLines() const os << p << '\t'; // encodings - std::copy( - Encodings.begin(), Encodings.end(), - ostream_join_iterator(os, ",") - ); + for (const auto& e : Encodings) { + os << e << ','; + } os << '\t'; diff --git a/test/test_ostream_join_iterator.cpp b/test/test_ostream_join_iterator.cpp deleted file mode 100644 index 7e2e6539..00000000 --- a/test/test_ostream_join_iterator.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2024 Aon Cyber Solutions - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include - -#include "ostream_join_iterator.h" - -#include -#include - -TEST_CASE("joinEmpty") { - const int *a = 0; - std::ostringstream ss; - std::copy(a, a, ostream_join_iterator(ss, ", ")); - REQUIRE("" == ss.str()); -} - -TEST_CASE("joinSingleton") { - const int a[] = { 1 }; - std::ostringstream ss; - std::copy(a, a + 1, ostream_join_iterator(ss, ", ")); - REQUIRE("1" == ss.str()); -} - -TEST_CASE("joinMultiple") { - const int a[] = { 1, 2, 3 }; - std::ostringstream ss; - std::copy(a, a + 3, ostream_join_iterator(ss, ", ")); - REQUIRE("1, 2, 3" == ss.str()); -} From afd89cc239436d34450ef891b53efdd160d31b60 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 16:00:16 -0400 Subject: [PATCH 11/12] B - lg_add_pattern_list() was always returning -1 unless it was passed a null pointer for err. This is still gross and needs some refactoring. lg_add_pattern_list() now apes lg_add_pattern() in returning -1 on error and 0 on success... but this is _not_ how lg_parse_pattern() works, and it leads to some impedance mismatch throughout. --- src/lib/lightgrep_c_api.cpp | 32 ++++++++++++++++++++++---------- test/test_c_api.cpp | 21 ++++++++++++--------- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/lib/lightgrep_c_api.cpp b/src/lib/lightgrep_c_api.cpp index 0394b673..ee89ce1a 100644 --- a/src/lib/lightgrep_c_api.cpp +++ b/src/lib/lightgrep_c_api.cpp @@ -187,7 +187,7 @@ int lg_add_pattern(LG_HFSM hFsm, namespace { template - void addPatternHelper(LG_HFSM hFsm, + int addPatternHelper(LG_HFSM hFsm, LG_HPATTERN hPat, const std::string& pat, LG_KeyOptions* keyOpts, @@ -195,21 +195,28 @@ namespace { int lnum, LG_Error**& err) { - lg_parse_pattern(hPat, pat.c_str(), keyOpts, err); + auto ret = lg_parse_pattern(hPat, pat.c_str(), keyOpts, err); if (*err) { (*err)->Index = lnum; err = &((*err)->Next); - return; + return -1; + } + else if (ret == 0) { + return -1; } + bool hadError = false; for (const std::string& enc : encodings) { - lg_add_pattern(hFsm, hPat, enc.c_str(), lnum, err); + ret = lg_add_pattern(hFsm, hPat, enc.c_str(), lnum, err); if (*err) { (*err)->Index = lnum; err = &((*err)->Next); - continue; + } + if (ret == -1) { + hadError = true; } } + return hadError ? -1 : 0; } int addPatternList(LG_HFSM hFsm, @@ -220,6 +227,7 @@ namespace { const LG_KeyOptions* defaultOptions, LG_Error** err) { + bool hadError = false; std::unique_ptr ph( lg_create_pattern(), lg_destroy_pattern @@ -248,6 +256,7 @@ namespace { if (ccur == cend) { // FIXME: is this possible? if (err) { + hadError = true; *err = makeError("no pattern", nullptr, nullptr, source, lnum); err = &((*err)->Next); } @@ -266,6 +275,7 @@ namespace { if (etok.begin() == etok.end()) { if (err) { + hadError = true; *err = makeError( "no encoding list", pat.c_str(), nullptr, source, lnum @@ -285,16 +295,18 @@ namespace { } } } - - addPatternHelper(hFsm, ph.get(), pat, &opts, etok, lnum, err); + if (addPatternHelper(hFsm, ph.get(), pat, &opts, etok, lnum, err) != 0) { + hadError = true; + } } else { // use default encodings and options - addPatternHelper(hFsm, ph.get(), pat, &opts, defEncs, lnum, err); + if (addPatternHelper(hFsm, ph.get(), pat, &opts, defEncs, lnum, err) != 0) { + hadError = true; + } } } - - return err ? -1 : 0; + return hadError ? -1 : 0; } } diff --git a/test/test_c_api.cpp b/test/test_c_api.cpp index d25defee..aeef881f 100644 --- a/test/test_c_api.cpp +++ b/test/test_c_api.cpp @@ -244,10 +244,11 @@ TEST_CASE("testLgAddPatternList") { LG_Error* err = nullptr; - lg_add_pattern_list( + auto ret = lg_add_pattern_list( fsm.get(), pats, "testLgAddPatternList", defEncs, defEncsNum, &defOpts, &err ); + REQUIRE(ret == 0); std::unique_ptr e{err, lg_free_error}; REQUIRE(!err); @@ -272,10 +273,11 @@ TEST_CASE("testLgAddPatternListFixedString") { LG_Error* err = nullptr; - lg_add_pattern_list( + auto ret = lg_add_pattern_list( fsm.get(), pats, "testLgAddPatternListFixedString", defEncs, defEncsNum, &defOpts, &err ); + REQUIRE(ret == 0); std::unique_ptr e{err, lg_free_error}; REQUIRE(!err); @@ -298,11 +300,11 @@ TEST_CASE("testLgAddPatternListCRLFHeck") { LG_Error* err = nullptr; - lg_add_pattern_list( + auto ret = lg_add_pattern_list( fsm.get(), pats.c_str(), "testLgAddPatternListCRLFHeck", defEncs, 1, &opts, &err ); - + REQUIRE(ret == 0); REQUIRE(!err); const char* exp_pats[] = { "foo", "bar", "\baz", "quux", "xyzzy" }; @@ -357,10 +359,11 @@ TEST_CASE("testLgAddPatternListBadEncoding") { LG_Error* err = nullptr; - lg_add_pattern_list( + auto ret = lg_add_pattern_list( fsm.get(), pats, "testLgAddPatternListBadEncoding", defEncs, defEncsNum, &defOpts, &err ); + REQUIRE(ret == -1); std::unique_ptr e{err, lg_free_error}; @@ -404,11 +407,11 @@ TEST_CASE("testLgAddPatternListCopyOnWritePatternMap") { std::unique_ptr e{err, lg_free_error}; // put some patterns into the fsm - lg_add_pattern_list( + auto ret = lg_add_pattern_list( fsm.get(), pats1, "whatever", defEncs, defEncsNum, &defOpts, &err ); - + REQUIRE(ret == 0); REQUIRE(!err); REQUIRE(lg_fsm_pattern_count(fsm.get()) == 1); @@ -425,11 +428,11 @@ TEST_CASE("testLgAddPatternListCopyOnWritePatternMap") { // put more patterns into the fsm const char pats2[] = "bar\tUTF-8\t0\t0\n"; - lg_add_pattern_list( + ret = lg_add_pattern_list( fsm.get(), pats2, "whatever", defEncs, defEncsNum, &defOpts, &err ); - + REQUIRE(ret == 0); REQUIRE(!err); REQUIRE(lg_fsm_pattern_count(fsm.get()) == 2); From 4408545701710c1112ba439d129c2e622be04d89 Mon Sep 17 00:00:00 2001 From: Jon Stewart Date: Wed, 19 Mar 2025 16:23:13 -0400 Subject: [PATCH 12/12] R - only copy the default encodings if we're in the branch of them being specified. Pass the encodings into addPatternHelper() as const. Rename 'el' to 'encList'. --- src/lib/lightgrep_c_api.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/lib/lightgrep_c_api.cpp b/src/lib/lightgrep_c_api.cpp index ee89ce1a..252e8f94 100644 --- a/src/lib/lightgrep_c_api.cpp +++ b/src/lib/lightgrep_c_api.cpp @@ -190,7 +190,7 @@ namespace { int addPatternHelper(LG_HFSM hFsm, LG_HPATTERN hPat, const std::string& pat, - LG_KeyOptions* keyOpts, + const LG_KeyOptions* keyOpts, const E& encodings, int lnum, LG_Error**& err) @@ -266,26 +266,25 @@ namespace { // read the pattern const std::string pat(*ccur); - LG_KeyOptions opts(*defaultOptions); - - if (++ccur != cend) { + if (++ccur != cend) { // has encodings & maybe options // read the encoding list - const std::string el(*ccur); - const tokenizer etok(el, char_separator(",")); + const std::string encList(*ccur); + const tokenizer etok(encList, char_separator(",")); if (etok.begin() == etok.end()) { if (err) { - hadError = true; *err = makeError( "no encoding list", pat.c_str(), nullptr, source, lnum ); err = &((*err)->Next); } + hadError = true; continue; } // read the options + LG_KeyOptions opts(*defaultOptions); if (++ccur != cend) { opts.FixedString = boost::lexical_cast(*ccur); if (++ccur != cend) { @@ -299,9 +298,8 @@ namespace { hadError = true; } } - else { - // use default encodings and options - if (addPatternHelper(hFsm, ph.get(), pat, &opts, defEncs, lnum, err) != 0) { + else { // use default encodings and options + if (addPatternHelper(hFsm, ph.get(), pat, defaultOptions, defEncs, lnum, err) != 0) { hadError = true; } }