From 889b135a44d0723652d4345d701997ab9c208d85 Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Tue, 24 Mar 2026 11:18:24 -0400 Subject: [PATCH 01/14] Reintroduced Python language bindings --- DEVELOPMENT.md | 39 +- README.md | 25 +- SConstruct | 5 +- ruff.toml | 1 + src/SConscript | 8 +- src/bindings/python/README.md | 158 +++++++ src/bindings/python/SConscript | 55 +++ src/bindings/python/hammer_tests.py | 646 ++++++++++++++++++++++++++++ src/bindings/python/setup.py | 38 ++ src/bindings/swig/hammer.i | 372 ++++++++++++++++ 10 files changed, 1336 insertions(+), 11 deletions(-) create mode 100644 src/bindings/python/README.md create mode 100644 src/bindings/python/SConscript create mode 100644 src/bindings/python/hammer_tests.py create mode 100644 src/bindings/python/setup.py create mode 100644 src/bindings/swig/hammer.i diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 3e5dfc9..197969c 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -18,10 +18,13 @@ Use `clang-format` to keep C code consistent. Examples: - Format all C sources and headers: + ```bash clang-format -i **/*.c **/*.h ``` + - Format a single file: + ```bash clang-format -i path/to/file.c ``` @@ -39,49 +42,81 @@ The repository centralizes the semantic version in the `VERSION` file. Update th ### Test coverage Install coverage tools: + ```bash sudo apt install lcov xdg-utils ``` Generate coverage and open the HTML report: + ```bash scons -c --variant=debug && scons --coverage --variant=debug test && mkdir -p coverage && lcov --directory build/debug/src --zerocounters && lcov --ignore-errors gcov --capture --initial --directory build/debug/src --output-file coverage/base.info && scons --coverage --variant=debug test && lcov --ignore-errors gcov --capture --directory build/debug/src --output-file coverage/test.info && lcov --add-tracefile coverage/base.info --add-tracefile coverage/test.info --output-file coverage/coverage.info && genhtml coverage/coverage.info --output-directory coverage/html && xdg-open coverage/html/index.html ``` Notes: + - On WSL, replace `xdg-open` with `wslview` (from the `wslu` package). - Coverage and object files (`.gcov`, `.gcno`, `.gcda`, `.o`) are generated under `build/debug/` or `build/opt/`. - To generate `.gcov` files manually: `scons --coverage --variant=debug gcov`. +### Building and testing language bindings + +#### Python + +Install the required tools: + +```bash +sudo apt install swig +pip install setuptools +``` + +Build/run the Python bindings only: + +```bash +scons bindings=python testpython +``` + +Run the full test suite including Python bindings: + +```bash +scons bindings=python test +``` + ### Linting Python and SCons files Install `ruff`: + ```bash sudo apt install pipx pipx install ruff ``` Lint all Python and SCons files with: + ```bash -ruff check $(find . -name "*.py" -o -name "SConstruct" -o -name "SConscript") +ruff check $(find . -path ./build -prune -o \( -name "*.py" -o -name "SConstruct" -o -name "SConscript" \) -print) ``` Notes: + - `ruff` configuration lives in `ruff.toml`. ### Generating documentation (Doxygen) Install Doxygen: + ```bash sudo apt install doxygen ``` Build the documentation: + ```bash doxygen Doxyfile ``` Output will be in `docs/html/`. Open the main page: + ```bash xdg-open docs/html/index.html ``` @@ -133,10 +168,12 @@ In other words, don't let the (memory manager) streams cross. ### Parse Result Behavior **Regarding parse_result_t:** + - If a parse fails, the parse_result_t will be NULL. - If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL. **Regarding input location:** + - If parse is successful, input is left at beginning of next thing to be read. - If parse fails, location is UNPREDICTABLE. diff --git a/README.md b/README.md index 0029a2a..a20d336 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ Hammer is a parsing library. Like many modern parsing libraries, it provides a p Hammer is written in C and provides a packrat parsing backend. ## MicroHammer + MicroHammer is a slimmed-down version of Hammer with the goal of providing a lightweight, Linux-focused version of Hammer with a minimal, clean codebase. [Link to public release.](https://github.com/riversideresearch/hammer/releases/) The main feature of MicroHammer is its significantly smaller codebase, allowing for easier maintenance and onboarding. Key differences from the full Hammer library include: @@ -13,8 +14,7 @@ The main feature of MicroHammer is its significantly smaller codebase, allowing - More thorough and consistent documentation - Windows / macOS not supported - Packrat parsing backend only -- No bindings for other languages - +- Language bindings for Python (see [Python Bindings](src/bindings/python/README.md)) ## Features @@ -70,6 +70,21 @@ To learn about hammer, check: - [Hammer Primer](https://github.com/sergeybratus/HammerPrimer) (outdated in terms of code, but good to get the general thinking) - [Try Hammer](https://github.com/sboesen/TryHammer) +## Language Bindings + +Hammer provides bindings for use from languages other than C. + +### Python + +Requires [SWIG](https://www.swig.org/) 4.x, Python 3.8+, and `setuptools`. + +```bash +sudo apt install swig +scons bindings=python +``` + +See [src/bindings/python/README.md](src/bindings/python/README.md) for the full API reference and usage guide. + ## Examples The `examples/` directory contains some simple examples, currently including: @@ -83,12 +98,12 @@ For information on contributing to Hammer, including development setup, code for ## Contact -Send an email to parsing@riversideresearch.org +Send an email to ## Acknowledgment This material is based upon work supported by the Defense Advanced Research Projects Agency (DARPA) under Prime Contract No. HR001119C0077. Any opinions, findings, and conclusions or recommendations expressed in this material are those of the author(s) and do not necessarily reflect the views of the Defense Advanced Research Projects Agency (DARPA). -This work is a fork of the repository found at: https://gitlab.special-circumstanc.es/hammer/hammer +This work is a fork of the repository found at: -Distribution A: Approved for Public Release \ No newline at end of file +Distribution A: Approved for Public Release diff --git a/SConstruct b/SConstruct index 4e80348..3d0a1bc 100644 --- a/SConstruct +++ b/SConstruct @@ -21,7 +21,10 @@ vars.Add( vars.Add( PathVariable("prefix", "Where to install in the FHS", "/usr/local", PathVariable.PathAccept) ) -vars.Add("python", "Python interpreter", "python") +vars.Add( + ListVariable("bindings", "Language bindings to build", "none", ["python"]) +) +vars.Add("python", "Python interpreter", "python3") tools = ["default", "scanreplace"] diff --git a/ruff.toml b/ruff.toml index 5a810e3..724b67a 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,6 +1,7 @@ # https://docs.astral.sh/ruff/configuration/ line-length = 100 +exclude = ["build/"] [lint] ignore = ["F821"] diff --git a/src/SConscript b/src/SConscript index 0e4681c..29a2d94 100644 --- a/src/SConscript +++ b/src/SConscript @@ -222,7 +222,7 @@ if GetOption("with_tests"): AlwaysBuild(ctest) testruns.append(ctest) -if libhammer_shared is not None: - Export("libhammer_static libhammer_shared") -else: - Export("libhammer_static") +Export("libhammer_static libhammer_shared") + +for b in env.get("bindings", []): + env.SConscript(["bindings/%s/SConscript" % b]) diff --git a/src/bindings/python/README.md b/src/bindings/python/README.md new file mode 100644 index 0000000..2f2fe71 --- /dev/null +++ b/src/bindings/python/README.md @@ -0,0 +1,158 @@ +# Hammer Python Bindings + +Python bindings for the Hammer parser combinator library, generated with [SWIG](https://www.swig.org/). + +## Prerequisites + +- [SWIG](https://www.swig.org/) 4.x +- Python 3.8+ +- `setuptools` (`pip install setuptools`) +- Hammer shared library built (see top-level README) + +```bash +sudo apt install swig +pip install setuptools +``` + +## Building + +From the repository root, pass `bindings=python` to SCons: + +```bash +scons bindings=python +``` + +This copies the SWIG interface into the build tree, runs `setup.py build_ext --inplace`, and produces `hammer.py` and `_hammer.so` under `build/opt/src/bindings/python/`. + +The default interpreter is `python3`. To use a specific version: + +```bash +scons bindings=python python=python3.12 +``` + +## Running the Tests + +```bash +scons bindings=python testpython +``` + +Or run the test suite directly after building: + +```bash +cd build/opt/src/bindings/python +LD_LIBRARY_PATH=../.. python -m unittest hammer_tests +``` + +## Usage + +Add the build directory to your Python path, then import `hammer`: + +```python +import sys +sys.path.insert(0, "build/opt/src/bindings/python") +import hammer as h +``` + +If you ran `scons install` (or `scons bindings=python installpython`), the package is available system-wide without any path manipulation. + +### Basic Example + +```python +import hammer as h + +# Parse the literal bytes "GET " +method = h.token(b"GET ") + +# Parse one or more printable ASCII characters +printable = h.many1(h.ch_range(b"\x21", b"\x7e")) + +# Sequence: method followed by the path +request_line = h.sequence(method, printable) + +result = request_line.parse(b"GET /index.html") +print(result) # (b'GET ', (b'/', b'i', b'n', ...)) +``` + +### Parser Combinators + +| Python function | Description | +|----------------------|----------------------------------------------------------| +| `h.token(b"...")` | Match a literal byte string | +| `h.ch(byte)` | Match a single byte (integer or `bytes` of length 1) | +| `h.ch_range(lo, hi)` | Match any byte in `[lo, hi]` | +| `h.in_(charset)` | Match any byte in the given `bytes` charset | +| `h.not_in(charset)` | Match any byte not in the given `bytes` charset | +| `h.sequence(*ps)` | Match each parser in order; return tuple of results | +| `h.choice(*ps)` | Try each parser in order; return first success | +| `h.many(p)` | Match `p` zero or more times; return tuple | +| `h.many1(p)` | Match `p` one or more times; return tuple | +| `h.repeat_n(p, n)` | Match `p` exactly `n` times; return tuple | +| `h.optional(p)` | Match `p` or produce a `Placeholder` on failure | +| `h.ignore(p)` | Match `p` but suppress its result from sequences | +| `h.sepBy(p, sep)` | Match `p` separated by `sep`, zero or more times | +| `h.sepBy1(p, sep)` | Match `p` separated by `sep`, one or more times | +| `h.left(p1, p2)` | Match both; return result of `p1` | +| `h.right(p1, p2)` | Match both; return result of `p2` | +| `h.middle(p1,p2,p3)` | Match all three; return result of `p2` | +| `h.butnot(p1, p2)` | Match `p1` only if `p2` does not also match | +| `h.difference(p1,p2)`| Match `p1` only when `p2` matches less input | +| `h.xor(p1, p2)` | Match exactly one of `p1` or `p2`, not both | +| `h.and_(p)` | Succeed if `p` would match, but consume no input | +| `h.not_(p)` | Succeed if `p` would not match, consuming no input | +| `h.whitespace(p)` | Skip leading whitespace, then match `p` | +| `h.action(p, fn)` | Apply `fn` to the result of `p` | +| `h.attr_bool(p, fn)` | Match `p` only if predicate `fn` returns `True` | +| `h.int_range(p,lo,hi)`| Match `p` only if the integer result is in `[lo, hi]` | +| `h.indirect()` | Create a forward-declared parser for recursive grammars | +| `h.epsilon_p()` | Always succeed, consuming no input | +| `h.end_p()` | Succeed only at end of input | +| `h.nothing_p()` | Always fail | + +### Integer Parsers + +```python +h.uint8() # unsigned 8-bit +h.uint16() # unsigned 16-bit, big-endian +h.uint32() # unsigned 32-bit, big-endian +h.uint64() # unsigned 64-bit, big-endian +h.int8() # signed 8-bit +h.int16() # signed 16-bit, big-endian +h.int32() # signed 32-bit, big-endian +h.int64() # signed 64-bit, big-endian +``` + +### Actions and Predicates + +```python +# Transform a parse result +digits = h.action(h.many1(h.ch_range(b"0", b"9")), + lambda bs: int(b"".join(bs))) + +# Reject a result based on a condition +even_byte = h.attr_bool(h.uint8(), lambda n: n % 2 == 0) +``` + +### Recursive Grammars + +Use `h.indirect()` and `.bind()` to define recursive parsers: + +```python +expr = h.indirect() +atom = h.ch_range(b"a", b"z") +expr.bind(h.choice(h.sequence(atom, expr), h.epsilon_p())) + +result = expr.parse(b"abc") # (b'a', (b'b', (b'c',))) +``` + +### Parse Results + +- A successful parse returns the parsed value (bytes, int, tuple, or a value returned by an action). +- A failed parse returns `None`. +- `h.optional()` uses `h.Placeholder()` to represent a missing optional element. + +## Notes + +- All byte-oriented parsers expect and return `bytes` objects. Pass input as `b"..."`. +- `ch()` accepts either an integer byte value or a single-byte `bytes` object. +- `ch_range()`, `in_()`, and `not_in()` accept `bytes` arguments only. +- The `sequence()` and `many()` family return Python `tuple` objects. diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript new file mode 100644 index 0000000..3e9b8c8 --- /dev/null +++ b/src/bindings/python/SConscript @@ -0,0 +1,55 @@ +# Copyright (c) 2026 Riverside Research +# -*- python -*- + +from __future__ import absolute_import, division, print_function + +import os +import shutil + +Import("env libhammer_shared testruns targets") + +if libhammer_shared is None: + print("Warning: Python bindings require the shared library (not available in coverage/gprof builds). Skipping.") + Return() + +if not shutil.which("swig"): + print("Warning: SWIG not found. Install swig to build Python bindings. Skipping.") + Return() + +pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES=0) + +swig = pythonenv.Command("hammer.i", "#src/bindings/swig/hammer.i", Copy("$TARGET", "$SOURCE")) +setup = ["setup.py"] +pydir = os.path.join(env["BUILD_BASE"], "src/bindings/python") +pysetup = os.path.join(pydir, "setup.py") +libhammer_python = pythonenv.Command( + ["hammer.py", "hammer_wrap.c"], + [swig, setup], + "%s %s build_ext --inplace" % (env["python"], pysetup), +) +Default(libhammer_python) + +pytestenv = pythonenv.Clone() +pytestenv["ENV"]["LD_LIBRARY_PATH"] = os.path.dirname(str(libhammer_shared[0])) +pytestenv["ENV"]["PYTHONPATH"] = pydir +pytests = ["hammer_tests.py"] +pytestexec = pytestenv.Command( + "hammer_tests.stamp", + pytests + list(libhammer_python), + "LD_LIBRARY_PATH=%s PYTHONPATH=%s %s -m unittest discover -s %s -p 'hammer_tests.py' && touch $TARGET" + % ( + os.path.dirname(str(libhammer_shared[0])), + pydir, + env["python"], + pydir, + ), +) +pytest = Alias("testpython", [pytestexec], pytestexec) +AlwaysBuild(pytestexec) +testruns.append(pytest) + +pyinstallexec = pythonenv.Command( + None, libhammer_python, "%s %s install" % (env["python"], pysetup) +) +pyinstall = Alias("installpython", [pyinstallexec], pyinstallexec) +targets.append(pyinstall) diff --git a/src/bindings/python/hammer_tests.py b/src/bindings/python/hammer_tests.py new file mode 100644 index 0000000..0380bfd --- /dev/null +++ b/src/bindings/python/hammer_tests.py @@ -0,0 +1,646 @@ +from __future__ import absolute_import, division, print_function + +import unittest + +import hammer as h + + +class TestTokenParser(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.token(b"95\xa2") + + def test_success(self): + self.assertEqual(self.parser.parse(b"95\xa2"), b"95\xa2") + + def test_partial_fails(self): + self.assertEqual(self.parser.parse(b"95"), None) + + +class TestChParser(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser_int = h.ch(0xa2) + cls.parser_chr = h.ch(b"\xa2") + + def test_success(self): + self.assertEqual(self.parser_int.parse(b"\xa2"), 0xa2) + self.assertEqual(self.parser_chr.parse(b"\xa2"), b"\xa2") + + def test_failure(self): + self.assertEqual(self.parser_int.parse(b"\xa3"), None) + self.assertEqual(self.parser_chr.parse(b"\xa3"), None) + + +class TestChRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.ch_range(b"a", b"c") + + def test_success(self): + self.assertEqual(self.parser.parse(b"b"), b"b") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"d"), None) + + +class TestInt64(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int64() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\xff\xff\xff\xfe\x00\x00\x00\x00"), -0x200000000) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\xff\xff\xff\xfe\x00\x00\x00"), None) + + +class TestInt32(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int32() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\xff\xfe\x00\x00"), -0x20000) + self.assertEqual(self.parser.parse(b"\x00\x02\x00\x00"), 0x20000) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\xff\xfe\x00"), None) + self.assertEqual(self.parser.parse(b"\x00\x02\x00"), None) + + +class TestInt16(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int16() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\xfe\x00"), -0x200) + self.assertEqual(self.parser.parse(b"\x02\x00"), 0x200) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\xfe"), None) + self.assertEqual(self.parser.parse(b"\x02"), None) + + +class TestInt8(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int8() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x88"), -0x78) + + def test_failure(self): + self.assertEqual(self.parser.parse(b""), None) + + +class TestUint64(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint64() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x00\x00\x00\x02\x00\x00\x00\x00"), 0x200000000) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\x00\x00\x00\x02\x00\x00\x00"), None) + + +class TestUint32(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint32() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x00\x02\x00\x00"), 0x20000) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\x00\x02\x00"), None) + + +class TestUint16(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint16() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x02\x00"), 0x200) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\x02"), None) + + +class TestUint8(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint8() + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x78"), 0x78) + + def test_failure(self): + self.assertEqual(self.parser.parse(b""), None) + + +class TestIntRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int_range(h.uint8(), 3, 10) + + def test_success(self): + self.assertEqual(self.parser.parse(b"\x05"), 5) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"\x0b"), None) + + +class TestWhitespace(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.whitespace(h.ch(b"a")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), b"a") + self.assertEqual(self.parser.parse(b" a"), b"a") + self.assertEqual(self.parser.parse(b" a"), b"a") + self.assertEqual(self.parser.parse(b"\ta"), b"a") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"_a"), None) + + +class TestWhitespaceEnd(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.whitespace(h.end_p()) + + def test_success(self): + self.assertEqual(self.parser.parse(b""), None) + self.assertEqual(self.parser.parse(b" "), None) + + def test_failure(self): + self.assertEqual(self.parser.parse(b" x"), None) + + +class TestLeft(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.left(h.ch(b"a"), h.ch(b" ")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a "), b"a") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + self.assertEqual(self.parser.parse(b" "), None) + self.assertEqual(self.parser.parse(b"ab"), None) + + +class TestRight(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.right(h.ch(b" "), h.ch(b"a")) + + def test_success(self): + self.assertEqual(self.parser.parse(b" a"), b"a") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + self.assertEqual(self.parser.parse(b" "), None) + self.assertEqual(self.parser.parse(b"ba"), None) + + +class TestMiddle(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.middle(h.ch(b" "), h.ch(b"a"), h.ch(b" ")) + + def test_success(self): + self.assertEqual(self.parser.parse(b" a "), b"a") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + self.assertEqual(self.parser.parse(b" "), None) + self.assertEqual(self.parser.parse(b" a"), None) + self.assertEqual(self.parser.parse(b"a "), None) + self.assertEqual(self.parser.parse(b" b "), None) + self.assertEqual(self.parser.parse(b"ba "), None) + self.assertEqual(self.parser.parse(b" ab"), None) + + +class TestAction(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.action( + h.sequence( + h.choice(h.ch(b"a"), h.ch(b"A")), + h.choice(h.ch(b"b"), h.ch(b"B")), + ), + lambda x: [y.upper() for y in x], + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"ab"), [b"A", b"B"]) + self.assertEqual(self.parser.parse(b"AB"), [b"A", b"B"]) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"XX"), None) + + +class TestIn(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.in_(b"abc") + + def test_success(self): + self.assertEqual(self.parser.parse(b"b"), b"b") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"d"), None) + + +class TestNotIn(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.not_in(b"abc") + + def test_success(self): + self.assertEqual(self.parser.parse(b"d"), b"d") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + + +class TestEndP(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.end_p()) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"aa"), None) + + +class TestNothingP(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.nothing_p() + + def test_success(self): + pass + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + + +class TestSequence(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.ch(b"b")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"ab"), (b"a", b"b")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + self.assertEqual(self.parser.parse(b"b"), None) + + +class TestSequenceWhitespace(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.whitespace(h.ch(b"b"))) + + def test_success(self): + self.assertEqual(self.parser.parse(b"ab"), (b"a", b"b")) + self.assertEqual(self.parser.parse(b"a b"), (b"a", b"b")) + self.assertEqual(self.parser.parse(b"a b"), (b"a", b"b")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a c"), None) + + +class TestChoice(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.choice(h.ch(b"a"), h.ch(b"b")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), b"a") + self.assertEqual(self.parser.parse(b"b"), b"b") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"c"), None) + + +class TestButNot(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.butnot(h.ch(b"a"), h.token(b"ab")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), b"a") + self.assertEqual(self.parser.parse(b"aa"), b"a") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"ab"), None) + + +class TestButNotRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.butnot(h.ch_range(b"0", b"9"), h.ch(b"6")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"4"), b"4") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"6"), None) + + +class TestDifference(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.difference(h.token(b"ab"), h.ch(b"a")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"ab"), b"ab") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a"), None) + + +class TestXor(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.xor(h.ch_range(b"0", b"6"), h.ch_range(b"5", b"9")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"0"), b"0") + self.assertEqual(self.parser.parse(b"9"), b"9") + + def test_failure(self): + self.assertEqual(self.parser.parse(b"5"), None) + self.assertEqual(self.parser.parse(b"a"), None) + + +class TestMany(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.many(h.choice(h.ch(b"a"), h.ch(b"b"))) + + def test_success(self): + self.assertEqual(self.parser.parse(b""), ()) + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + self.assertEqual(self.parser.parse(b"b"), (b"b",)) + self.assertEqual( + self.parser.parse(b"aabbaba"), (b"a", b"a", b"b", b"b", b"a", b"b", b"a") + ) + + def test_failure(self): + pass + + +class TestMany1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.many1(h.choice(h.ch(b"a"), h.ch(b"b"))) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + self.assertEqual(self.parser.parse(b"b"), (b"b",)) + self.assertEqual( + self.parser.parse(b"aabbaba"), (b"a", b"a", b"b", b"b", b"a", b"b", b"a") + ) + + def test_failure(self): + self.assertEqual(self.parser.parse(b""), None) + self.assertEqual(self.parser.parse(b"daabbabadef"), None) + + +class TestRepeatN(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.repeat_n(h.choice(h.ch(b"a"), h.ch(b"b")), 2) + + def test_success(self): + self.assertEqual(self.parser.parse(b"abdef"), (b"a", b"b")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"adef"), None) + self.assertEqual(self.parser.parse(b"dabdef"), None) + + +class TestOptional(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence( + h.ch(b"a"), + h.optional(h.choice(h.ch(b"b"), h.ch(b"c"))), + h.ch(b"d"), + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"abd"), (b"a", b"b", b"d")) + self.assertEqual(self.parser.parse(b"acd"), (b"a", b"c", b"d")) + self.assertEqual(self.parser.parse(b"ad"), (b"a", h.Placeholder(), b"d")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"aed"), None) + self.assertEqual(self.parser.parse(b"ab"), None) + self.assertEqual(self.parser.parse(b"ac"), None) + + +class TestIgnore(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.ignore(h.ch(b"b")), h.ch(b"c")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"abc"), (b"a", b"c")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"ac"), None) + + +class TestSepBy(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sepBy( + h.choice(h.ch(b"1"), h.ch(b"2"), h.ch(b"3")), h.ch(b",") + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"1,2,3"), (b"1", b"2", b"3")) + self.assertEqual(self.parser.parse(b"1,3,2"), (b"1", b"3", b"2")) + self.assertEqual(self.parser.parse(b"1,3"), (b"1", b"3")) + self.assertEqual(self.parser.parse(b"3"), (b"3",)) + self.assertEqual(self.parser.parse(b""), ()) + + def test_failure(self): + pass + + +class TestSepBy1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sepBy1( + h.choice(h.ch(b"1"), h.ch(b"2"), h.ch(b"3")), h.ch(b",") + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"1,2,3"), (b"1", b"2", b"3")) + self.assertEqual(self.parser.parse(b"1,3,2"), (b"1", b"3", b"2")) + self.assertEqual(self.parser.parse(b"1,3"), (b"1", b"3")) + self.assertEqual(self.parser.parse(b"3"), (b"3",)) + + def test_failure(self): + self.assertEqual(self.parser.parse(b""), None) + + +class TestEpsilonP1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.epsilon_p(), h.ch(b"b")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"ab"), (b"a", b"b")) + + def test_failure(self): + pass + + +class TestEpsilonP2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.epsilon_p(), h.ch(b"a")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + + def test_failure(self): + pass + + +class TestEpsilonP3(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"a"), h.epsilon_p()) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + + def test_failure(self): + pass + + +class TestAttrBool(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.attr_bool( + h.many1(h.choice(h.ch(b"a"), h.ch(b"b"))), + lambda x: x[0] == x[1], + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"aa"), (b"a", b"a")) + self.assertEqual(self.parser.parse(b"bb"), (b"b", b"b")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"ab"), None) + + +class TestAnd1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.and_(h.ch(b"0")), h.ch(b"0")) + + def test_success(self): + self.assertEqual(self.parser.parse(b"0"), (b"0",)) + + def test_failure(self): + pass + + +class TestAnd2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.and_(h.ch(b"0")), h.ch(b"1")) + + def test_success(self): + pass + + def test_failure(self): + self.assertEqual(self.parser.parse(b"0"), None) + + +class TestAnd3(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch(b"1"), h.and_(h.ch(b"2"))) + + def test_success(self): + self.assertEqual(self.parser.parse(b"12"), (b"1",)) + + def test_failure(self): + pass + + +class TestNot1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence( + h.ch(b"a"), + h.choice(h.ch(b"+"), h.token(b"++")), + h.ch(b"b"), + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a+b"), (b"a", b"+", b"b")) + + def test_failure(self): + self.assertEqual(self.parser.parse(b"a++b"), None) + + +class TestNot2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence( + h.ch(b"a"), + h.choice( + h.sequence(h.ch(b"+"), h.not_(h.ch(b"+"))), + h.token(b"++"), + ), + h.ch(b"b"), + ) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a+b"), (b"a", (b"+",), b"b")) + self.assertEqual(self.parser.parse(b"a++b"), (b"a", b"++", b"b")) + + def test_failure(self): + pass + + +class TestRightrec(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.indirect() + a = h.ch(b"a") + cls.parser.bind(h.choice(h.sequence(a, cls.parser), h.epsilon_p())) + + def test_success(self): + self.assertEqual(self.parser.parse(b"a"), (b"a",)) + self.assertEqual(self.parser.parse(b"aa"), (b"a", (b"a",))) + self.assertEqual(self.parser.parse(b"aaa"), (b"a", (b"a", (b"a",)))) + + def test_failure(self): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/src/bindings/python/setup.py b/src/bindings/python/setup.py new file mode 100644 index 0000000..be9d6ed --- /dev/null +++ b/src/bindings/python/setup.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +import os +import os.path +import sys + +from setuptools import Extension, setup + +invoked = os.getcwd() +if os.path.dirname(sys.argv[0]) != "": + os.chdir(os.path.dirname(sys.argv[0])) + +setup( + name="hammer", + version="1.1.1", + author="Riverside Research", + description="The Hammer parser combinator library", + ext_modules=[ + Extension( + "_hammer", + ["hammer.i"], + swig_opts=["-DHAMMER_INTERNAL__NO_STDARG_H", "-I../../"], + define_macros=[("SWIG", None)], + depends=[ + "allocator.h", + "glue.h", + "hammer.h", + "internal.h", + ], + extra_compile_args=["-fPIC", "-std=gnu99"], + include_dirs=["../../"], + library_dirs=["../../"], + libraries=["hammer"], + ) + ], + py_modules=["hammer"], +) + +os.chdir(invoked) diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i new file mode 100644 index 0000000..9cda0dd --- /dev/null +++ b/src/bindings/swig/hammer.i @@ -0,0 +1,372 @@ +%module hammer +%begin %{ +#define SWIG_PYTHON_STRICT_BYTE_CHAR +#include +%} + +%nodefaultctor; + +%include "stdint.i" + +#if defined(SWIGPYTHON) +%ignore HCountedArray_; +%apply (char *STRING, size_t LENGTH) {(uint8_t* str, size_t len)} +%apply (uint8_t* str, size_t len) {(const uint8_t* input, size_t length)} +%apply (uint8_t* str, size_t len) {(const uint8_t* str, const size_t len)} +%apply (uint8_t* str, size_t len) {(const uint8_t* charset, size_t length)} + + +%rename("_%s") ""; +// %rename(_h_ch) h_ch; + +%inline { + static PyObject *_helper_Placeholder = NULL, *_helper_ParseError = NULL; + + static void register_helpers(PyObject* parse_error, PyObject *placeholder) { + _helper_ParseError = parse_error; + _helper_Placeholder = placeholder; + } + } + +%pythoncode %{ + try: + INTEGER_TYPES = (int, long) + except NameError: + INTEGER_TYPES = (int,) + + try: + TEXT_TYPE = unicode + def bchr(i): + return chr(i) + except NameError: + TEXT_TYPE = str + def bchr(i): + return bytes([i]) + + class Placeholder(object): + """The python equivalent of TT_NONE""" + def __str__(self): + return "Placeholder" + def __repr__(self): + return "Placeholder" + def __eq__(self, other): + return type(self) == type(other) + class ParseError(Exception): + """The parse failed; the message may have more information""" + pass + + _hammer._register_helpers(ParseError, + Placeholder) + %} + +%typemap(in) void*[] { + if (PyList_Check($input)) { + Py_INCREF($input); + int size = PyList_Size($input); + int i = 0; + int res = 0; + $1 = (void**)malloc((size+1)*sizeof(HParser*)); + for (i=0; itoken, $1->len); + } +%typemap(out) struct HCountedArray_* { + int i; + $result = PyList_New($1->used); + for (i=0; i<$1->used; i++) { + HParsedToken *t = $1->elements[i]; + PyObject *o = SWIG_NewPointerObj(SWIG_as_voidptr(t), SWIGTYPE_p_HParsedToken_, 0 | 0); + PyList_SetItem($result, i, o); + } + } +%typemap(out) struct HParseResult_* { + if ($1 == NULL) { + // TODO: raise parse failure + Py_INCREF(Py_None); + $result = Py_None; + } else { + $result = hpt_to_python($1->ast); + } + } +%typemap(newfree) struct HParseResult_* { + h_parse_result_free($input); + } +%inline %{ + static int h_tt_python; + %} +%init %{ + h_tt_python = h_allocate_token_type("com.upstandinghackers.hammer.python"); + %} + + + + +%typemap(in) (HPredicate pred, void* user_data) { + Py_INCREF($input); + $2 = $input; + $1 = call_predicate; + } + +%typemap(in) (const HAction a, void* user_data) { + Py_INCREF($input); + $2 = $input; + $1 = call_action; + } + +%inline %{ + + struct HParsedToken_; + struct HParseResult_; + static PyObject* hpt_to_python(const struct HParsedToken_ *token); + + static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data); + static bool call_predicate(struct HParseResult_ *p, void* user_data); + %} +#else + #warning no uint8_t* typemaps defined +#endif + + // All the include paths are relative to the build, i.e., ../../. If you need to build these manually (i.e., not with scons), keep that in mind. +// Suppress GCC attributes that SWIG cannot parse. +#define __attribute__(x) + +// Ignore va_list variants — SWIG cannot generate correct wrappers for va_list parameters. +%ignore h_sequence__v; +%ignore h_sequence__mv; +%ignore h_drop_from___v; +%ignore h_drop_from___mv; +%ignore h_choice__v; +%ignore h_choice__mv; +%ignore h_permutation__v; +%ignore h_permutation__mv; + +// Ignore functions declared in hammer.h but not present in the library. +%ignore h_get_backend_with_params_by_name__m; + +%{ +#include "allocator.h" +#include "hammer.h" +#ifndef SWIGPERL +// Perl's embed.h conflicts with err.h, which internal.h includes. Ugh. +#include "internal.h" +#endif +#include "glue.h" +%} +%include "allocator.h" +%include "hammer.h" + +%extend HArena_ { + ~HArena_() { + h_delete_arena($self); + } + }; +%extend HParseResult_ { + ~HParseResult_() { + h_parse_result_free($self); + } +}; + +%newobject h_parse; +%delobject h_parse_result_free; +%newobject h_new_arena; +%delobject h_delete_arena; + +#ifdef SWIGPYTHON +%inline { + static PyObject* hpt_to_python(const HParsedToken *token) { + // Caller holds a reference to returned object + PyObject *ret; + if (token == NULL) { + Py_RETURN_NONE; + } + switch (token->token_type) { + case TT_NONE: + return PyObject_CallFunctionObjArgs(_helper_Placeholder, NULL); + break; + case TT_BYTES: + return PyBytes_FromStringAndSize((char*)token->token_data.bytes.token, token->token_data.bytes.len); + case TT_SINT: + return PyLong_FromLong(token->token_data.sint); + case TT_UINT: + return PyLong_FromUnsignedLong(token->token_data.uint); + case TT_SEQUENCE: + ret = PyTuple_New(token->token_data.seq->used); + for (int i = 0; i < token->token_data.seq->used; i++) { + PyTuple_SET_ITEM(ret, i, hpt_to_python(token->token_data.seq->elements[i])); + } + return ret; + default: + if (token->token_type == h_tt_python) { + ret = (PyObject*)token->token_data.user; + Py_INCREF(ret); + return ret; + } else { + return SWIG_NewPointerObj((void*)token, SWIGTYPE_p_HParsedToken_, 0 | 0); + } + + } + } + static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data) { + PyObject *callable = user_data; + PyObject *ret = PyObject_CallFunctionObjArgs(callable, + hpt_to_python(p->ast), + NULL); + if (ret == NULL) { + PyErr_Print(); + assert(ret != NULL); + } + HParsedToken *tok = h_make(p->arena, h_tt_python, ret); + return tok; + } + + static bool call_predicate(struct HParseResult_ *p, void* user_data) { + PyObject *callable = user_data; + PyObject *ret = PyObject_CallFunctionObjArgs(callable, + hpt_to_python(p->ast), + NULL); + bool rret = false; + if (ret == NULL) { + PyErr_Print(); + assert(ret != NULL); + } + rret = (bool)PyObject_IsTrue(ret); + Py_DECREF(ret); + return rret; + } + + } + +%rename("%s") ""; + +%extend HParser_ { + HParseResult* parse(const uint8_t* input, size_t length) { + return h_parse($self, input, length); + } + bool compile(HParserBackend backend) { + return h_compile($self, backend, NULL) == 0; + } + PyObject* __dir__() { + PyObject* ret = PyList_New(2); + PyList_SET_ITEM(ret, 0, PyUnicode_FromString("parse")); + PyList_SET_ITEM(ret, 1, PyUnicode_FromString("compile")); + return ret; + } +} + +%pythoncode %{ +def action(p, act): + return _h_action(p, act) +def attr_bool(p, pred): + return _h_attr_bool(p, pred) + +def ch(ch): + if isinstance(ch, (bytes, TEXT_TYPE)): + return token(ch) + else: + return _h_ch(ch) + +def ch_range(c1, c2): + dostr = isinstance(c1, bytes) + dostr2 = isinstance(c2, bytes) + if isinstance(c1, TEXT_TYPE) or isinstance(c2, TEXT_TYPE): + raise TypeError("ch_range only works on bytes") + if dostr != dostr2: + raise TypeError("Both arguments to ch_range must be the same type") + if dostr: + return action(_h_ch_range(c1, c2), bchr) + else: + return _h_ch_range(c1, c2) +def epsilon_p(): return _h_epsilon_p() +def end_p(): + return _h_end_p() +def in_(charset): + return action(_h_in(charset), bchr) +def not_in(charset): + return action(_h_not_in(charset), bchr) +def not_(p): return _h_not(p) +def int_range(p, i1, i2): + return _h_int_range(p, i1, i2) +def token(string): + return _h_token(string) +def whitespace(p): + return _h_whitespace(p) +def xor(p1, p2): + return _h_xor(p1, p2) +def butnot(p1, p2): + return _h_butnot(p1, p2) +def and_(p1): + return _h_and(p1) +def difference(p1, p2): + return _h_difference(p1, p2) + +def sepBy(p, sep): return _h_sepBy(p, sep) +def sepBy1(p, sep): return _h_sepBy1(p, sep) +def many(p): return _h_many(p) +def many1(p): return _h_many1(p) +def repeat_n(p, n): return _h_repeat_n(p, n) +def choice(*args): return _h_choice__a(list(args)) +def sequence(*args): return _h_sequence__a(list(args)) + +def optional(p): return _h_optional(p) +def nothing_p(): return _h_nothing_p() +def ignore(p): return _h_ignore(p) + +def left(p1, p2): return _h_left(p1, p2) +def middle(p1, p2, p3): return _h_middle(p1, p2, p3) +def right(p1, p2): return _h_right(p1, p2) + + +class HIndirectParser(_HParser_): + def __init__(self): + # Shoves the guts of an _HParser_ into a HIndirectParser. + tret = _h_indirect() + self.__dict__.clear() + self.__dict__.update(tret.__dict__) + + def __dir__(self): + return super(HIndirectParser, self).__dir__() + ['bind'] + def bind(self, parser): + _h_bind_indirect(self, parser) + +def indirect(): + return HIndirectParser() + +def bind_indirect(indirect, new_parser): + indirect.bind(new_parser) + +def uint8(): return _h_uint8() +def uint16(): return _h_uint16() +def uint32(): return _h_uint32() +def uint64(): return _h_uint64() +def int8(): return _h_int8() +def int16(): return _h_int16() +def int32(): return _h_int32() +def int64(): return _h_int64() + + +%} + +#endif From 895fac3e51b6b88eb594effb8d39f4ffe25beb6f Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Tue, 24 Mar 2026 11:28:25 -0400 Subject: [PATCH 02/14] Fix scons warnings when building Python language bindings --- src/bindings/swig/hammer.i | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i index 9cda0dd..323e43a 100644 --- a/src/bindings/swig/hammer.i +++ b/src/bindings/swig/hammer.i @@ -19,6 +19,7 @@ %rename("_%s") ""; // %rename(_h_ch) h_ch; +%warnfilter(454) register_helpers; %inline { static PyObject *_helper_Placeholder = NULL, *_helper_ParseError = NULL; @@ -56,7 +57,7 @@ pass _hammer._register_helpers(ParseError, - Placeholder) + Placeholder) %} %typemap(in) void*[] { @@ -94,7 +95,7 @@ $result = PyBytes_FromStringAndSize((char*)$1->token, $1->len); } %typemap(out) struct HCountedArray_* { - int i; + size_t i; $result = PyList_New($1->used); for (i=0; i<$1->used; i++) { HParsedToken *t = $1->elements[i]; @@ -163,6 +164,16 @@ %ignore h_permutation__v; %ignore h_permutation__mv; +// Ignore varargs variants — Python uses the __a (array) variants instead. +// Without this SWIG generates wrappers that call these sentinel-terminated functions +// without the required NULL terminator, causing -Wmissing-sentinel warnings. +%ignore h_sequence; +%ignore h_sequence__m; +%ignore h_choice; +%ignore h_choice__m; +%ignore h_permutation; +%ignore h_permutation__m; + // Ignore functions declared in hammer.h but not present in the library. %ignore h_get_backend_with_params_by_name__m; @@ -176,8 +187,13 @@ #include "glue.h" %} %include "allocator.h" +%warnfilter(451) hammer_h; %include "hammer.h" +// HArena_ is an opaque type (forward declaration only in allocator.h). +// Provide a body so SWIG can process the %extend below. +struct HArena_ {}; + %extend HArena_ { ~HArena_() { h_delete_arena($self); @@ -214,12 +230,12 @@ return PyLong_FromUnsignedLong(token->token_data.uint); case TT_SEQUENCE: ret = PyTuple_New(token->token_data.seq->used); - for (int i = 0; i < token->token_data.seq->used; i++) { + for (size_t i = 0; i < token->token_data.seq->used; i++) { PyTuple_SET_ITEM(ret, i, hpt_to_python(token->token_data.seq->elements[i])); } return ret; default: - if (token->token_type == h_tt_python) { + if (token->token_type == (HTokenType)h_tt_python) { ret = (PyObject*)token->token_data.user; Py_INCREF(ret); return ret; From cc9d25b50603531392bd9b1e3061ee175804c64a Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Tue, 24 Mar 2026 16:01:22 -0400 Subject: [PATCH 03/14] Readd Java/JNI language bindings and fix outdated references --- DEVELOPMENT.md | 17 +- README.md | 13 +- SConstruct | 2 +- src/bindings/java/HammerTests.java | 284 +++++++++++++++++++++++++++++ src/bindings/java/README.md | 197 ++++++++++++++++++++ src/bindings/java/SConscript | 124 +++++++++++++ src/bindings/swig/hammer.i | 113 +++++++++++- src/t_misc.c | 14 +- tests/t_misc.c | 14 +- 9 files changed, 753 insertions(+), 25 deletions(-) create mode 100644 src/bindings/java/HammerTests.java create mode 100644 src/bindings/java/README.md create mode 100644 src/bindings/java/SConscript diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 197969c..20d912f 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -61,25 +61,30 @@ Notes: ### Building and testing language bindings -#### Python - Install the required tools: ```bash -sudo apt install swig +sudo apt install swig default-jdk pip install setuptools ``` -Build/run the Python bindings only: +Build and test all language bindings: + +```bash +scons bindings=python,java test +``` + +To target a specific binding, pass it individually and use its alias (`testpython` or `testjava`): ```bash scons bindings=python testpython +scons bindings=java testjava ``` -Run the full test suite including Python bindings: +If `JAVA_HOME` is not set, the build locates `javac` via `PATH`. To use a specific JDK: ```bash -scons bindings=python test +JAVA_HOME=/usr/lib/jvm/java-21-openjdk-amd64 scons bindings=java ``` ### Linting Python and SCons files diff --git a/README.md b/README.md index a20d336..c93560b 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The main feature of MicroHammer is its significantly smaller codebase, allowing - More thorough and consistent documentation - Windows / macOS not supported - Packrat parsing backend only -- Language bindings for Python (see [Python Bindings](src/bindings/python/README.md)) +- Language bindings for Python and Java (see [Python Bindings](src/bindings/python/README.md), [Java Bindings](src/bindings/java/README.md)) ## Features @@ -85,6 +85,17 @@ scons bindings=python See [src/bindings/python/README.md](src/bindings/python/README.md) for the full API reference and usage guide. +### Java + +Requires [SWIG](https://www.swig.org/) 4.x and JDK 11+. + +```bash +sudo apt install swig default-jdk +scons bindings=java +``` + +See [src/bindings/java/README.md](src/bindings/java/README.md) for the full API reference and usage guide. + ## Examples The `examples/` directory contains some simple examples, currently including: diff --git a/SConstruct b/SConstruct index 3d0a1bc..2fe7a36 100644 --- a/SConstruct +++ b/SConstruct @@ -22,7 +22,7 @@ vars.Add( PathVariable("prefix", "Where to install in the FHS", "/usr/local", PathVariable.PathAccept) ) vars.Add( - ListVariable("bindings", "Language bindings to build", "none", ["python"]) + ListVariable("bindings", "Language bindings to build", "none", ["python", "java"]) ) vars.Add("python", "Python interpreter", "python3") diff --git a/src/bindings/java/HammerTests.java b/src/bindings/java/HammerTests.java new file mode 100644 index 0000000..fbc54a2 --- /dev/null +++ b/src/bindings/java/HammerTests.java @@ -0,0 +1,284 @@ +import com.riversideresearch.hammer.*; + +/** + * Basic smoke tests for the Hammer Java bindings, mirroring hammer_tests.py. + * + * The JNI library must be loadable via java.library.path. The SConscript sets + * this up automatically when running through the build system. + */ +public class HammerTests { + + static { + System.loadLibrary("hammer_jni"); + } + + // Token type constants — mirror of HTokenType_ in hammer.h. + static final int TT_NONE = 1; + static final int TT_BYTES = 2; + static final int TT_SINT = 4; + static final int TT_UINT = 8; + static final int TT_SEQUENCE = 16; + + static int passed = 0; + static int failed = 0; + + static void assertTrue(String name, boolean cond) { + if (cond) { + passed++; + } else { + failed++; + System.err.println("FAIL: " + name); + } + } + + static void assertNull(String name, Object obj) { + assertTrue(name + " (expected null)", obj == null); + } + + static void assertNotNull(String name, Object obj) { + assertTrue(name + " (expected non-null)", obj != null); + } + + static void assertEqual(String name, long expected, long actual) { + if (expected != actual) { + failed++; + System.err.println("FAIL: " + name + " — expected " + expected + ", got " + actual); + } else { + passed++; + } + } + + // ------------------------------------------------------------------------- + + static void testToken() { + byte[] input = {(byte)0x39, (byte)0x35, (byte)0xa2}; + HParser p = hammer.h_token(input); + + HParseResult r = p.parse(input); + assertNotNull("token:success", r); + assertTrue("token:type", r.getAst().tokenType() == TT_BYTES); + assertEqual("token:length", 3, r.getAst().bytesLength()); + assertEqual("token:byte0", 0x39, r.getAst().byteAt(0)); + assertEqual("token:byte1", 0x35, r.getAst().byteAt(1)); + assertEqual("token:byte2", 0xa2, r.getAst().byteAt(2) & 0xff); + + assertNull("token:partial_fail", p.parse(new byte[]{(byte)0x39, (byte)0x35})); + } + + static void testCh() { + HParser p = hammer.h_ch((short)0xa2); + + HParseResult r = p.parse(new byte[]{(byte)0xa2}); + assertNotNull("ch:success", r); + assertTrue("ch:type", r.getAst().tokenType() == TT_UINT); + assertEqual("ch:value", 0xa2L, r.getAst().uintValue()); + + assertNull("ch:fail", p.parse(new byte[]{(byte)0xa3})); + } + + static void testChRange() { + HParser p = hammer.h_ch_range((short)'a', (short)'c'); + + assertNotNull("ch_range:success", p.parse(new byte[]{(byte)'b'})); + assertNull("ch_range:fail", p.parse(new byte[]{(byte)'d'})); + } + + static void testInt64() { + HParser p = hammer.h_int64(); + byte[] input = {(byte)0xff,(byte)0xff,(byte)0xff,(byte)0xfe, + (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x00}; + + HParseResult r = p.parse(input); + assertNotNull("int64:success", r); + assertTrue("int64:type", r.getAst().tokenType() == TT_SINT); + assertEqual("int64:value", -0x200000000L, r.getAst().sintValue()); + + assertNull("int64:fail", p.parse(new byte[]{ + (byte)0xff,(byte)0xff,(byte)0xff,(byte)0xfe, + (byte)0x00,(byte)0x00,(byte)0x00})); + } + + static void testInt32() { + HParser p = hammer.h_int32(); + + HParseResult r = p.parse(new byte[]{(byte)0xff,(byte)0xfe,(byte)0x00,(byte)0x00}); + assertNotNull("int32:success", r); + assertEqual("int32:value", -0x20000L, r.getAst().sintValue()); + + assertNull("int32:fail", p.parse(new byte[]{(byte)0xff,(byte)0xfe,(byte)0x00})); + } + + static void testUint64() { + HParser p = hammer.h_uint64(); + byte[] input = {(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, + (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x00}; + + HParseResult r = p.parse(input); + assertNotNull("uint64:success", r); + assertTrue("uint64:type", r.getAst().tokenType() == TT_UINT); + assertEqual("uint64:value", 0x200000000L, r.getAst().uintValue()); + + assertNull("uint64:fail", p.parse(new byte[]{ + (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, + (byte)0x00,(byte)0x00,(byte)0x00})); + } + + static void testUint32() { + HParser p = hammer.h_uint32(); + + HParseResult r = p.parse(new byte[]{(byte)0x00,(byte)0x02,(byte)0x00,(byte)0x00}); + assertNotNull("uint32:success", r); + assertEqual("uint32:value", 0x20000L, r.getAst().uintValue()); + + assertNull("uint32:fail", p.parse(new byte[]{(byte)0x00,(byte)0x02,(byte)0x00})); + } + + static void testUint8() { + HParser p = hammer.h_uint8(); + + HParseResult r = p.parse(new byte[]{(byte)0x78}); + assertNotNull("uint8:success", r); + assertEqual("uint8:value", 0x78L, r.getAst().uintValue()); + + assertNull("uint8:fail", p.parse(new byte[]{})); + } + + static void testIntRange() { + HParser p = hammer.h_int_range(hammer.h_uint8(), 3, 10); + + HParseResult r = p.parse(new byte[]{5}); + assertNotNull("int_range:success", r); + assertEqual("int_range:value", 5L, r.getAst().uintValue()); + + assertNull("int_range:fail", p.parse(new byte[]{11})); + } + + static void testSequence() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_ch((short)'b'), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b'}); + assertNotNull("sequence:success", r); + assertTrue("sequence:type", r.getAst().tokenType() == TT_SEQUENCE); + assertEqual("sequence:length", 2L, r.getAst().seqLength()); + assertEqual("sequence:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("sequence:elem1", 'b', r.getAst().seqElement(1).uintValue()); + + assertNull("sequence:fail_partial", p.parse(new byte[]{(byte)'a'})); + assertNull("sequence:fail_wrong", p.parse(new byte[]{(byte)'b'})); + } + + static void testChoice() { + HParser p = hammer.h_choice__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_ch((short)'b'), + }); + + HParseResult r1 = p.parse(new byte[]{(byte)'a'}); + assertNotNull("choice:a_success", r1); + assertEqual("choice:a_value", 'a', r1.getAst().uintValue()); + + HParseResult r2 = p.parse(new byte[]{(byte)'b'}); + assertNotNull("choice:b_success", r2); + assertEqual("choice:b_value", 'b', r2.getAst().uintValue()); + + assertNull("choice:fail", p.parse(new byte[]{(byte)'c'})); + } + + static void testMany() { + HParser p = hammer.h_many(hammer.h_ch((short)'a')); + + HParseResult r = p.parse(new byte[]{(byte)'a',(byte)'a',(byte)'a'}); + assertNotNull("many:success", r); + assertEqual("many:count", 3L, r.getAst().seqLength()); + + HParseResult r0 = p.parse(new byte[]{}); + assertNotNull("many:empty", r0); + assertEqual("many:empty_count", 0L, r0.getAst().seqLength()); + } + + static void testMany1() { + HParser p = hammer.h_many1(hammer.h_ch((short)'a')); + + HParseResult r = p.parse(new byte[]{(byte)'a',(byte)'a'}); + assertNotNull("many1:success", r); + assertEqual("many1:count", 2L, r.getAst().seqLength()); + + assertNull("many1:fail_empty", p.parse(new byte[]{})); + } + + static void testEndP() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_end_p(), + }); + + assertNotNull("end_p:success", p.parse(new byte[]{(byte)'a'})); + assertNull("end_p:fail_trailing", p.parse(new byte[]{(byte)'a', (byte)'a'})); + } + + static void testOptional() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_optional(hammer.h_ch((short)'b')), + hammer.h_ch((short)'c'), + }); + + HParseResult r1 = p.parse(new byte[]{(byte)'a',(byte)'b',(byte)'c'}); + assertNotNull("optional:abc_success", r1); + assertEqual("optional:abc_length", 3L, r1.getAst().seqLength()); + + HParseResult r2 = p.parse(new byte[]{(byte)'a',(byte)'c'}); + assertNotNull("optional:ac_success", r2); + assertEqual("optional:ac_length", 3L, r2.getAst().seqLength()); + assertTrue("optional:ac_middle_none", + r2.getAst().seqElement(1).tokenType() == TT_NONE); + + assertNull("optional:fail", p.parse(new byte[]{(byte)'a',(byte)'e',(byte)'c'})); + } + + static void testSepBy() { + HParser p = hammer.h_sepBy( + hammer.h_choice__a(new HParser[]{ + hammer.h_ch((short)'1'), + hammer.h_ch((short)'2'), + hammer.h_ch((short)'3'), + }), + hammer.h_ch((short)',') + ); + + HParseResult r = p.parse(new byte[]{(byte)'1',(byte)',',(byte)'2',(byte)',',(byte)'3'}); + assertNotNull("sepBy:success", r); + assertEqual("sepBy:count", 3L, r.getAst().seqLength()); + + HParseResult r0 = p.parse(new byte[]{}); + assertNotNull("sepBy:empty", r0); + assertEqual("sepBy:empty_count", 0L, r0.getAst().seqLength()); + } + + // ------------------------------------------------------------------------- + + public static void main(String[] args) { + testToken(); + testCh(); + testChRange(); + testInt64(); + testInt32(); + testUint64(); + testUint32(); + testUint8(); + testIntRange(); + testSequence(); + testChoice(); + testMany(); + testMany1(); + testEndP(); + testOptional(); + testSepBy(); + + System.out.printf("Results: %d passed, %d failed%n", passed, failed); + if (failed > 0) System.exit(1); + } +} diff --git a/src/bindings/java/README.md b/src/bindings/java/README.md new file mode 100644 index 0000000..8d95ee2 --- /dev/null +++ b/src/bindings/java/README.md @@ -0,0 +1,197 @@ +# Hammer Java Bindings + +Java bindings for the Hammer parser combinator library, generated with [SWIG](https://www.swig.org/). + +## Prerequisites + +- [SWIG](https://www.swig.org/) 4.x +- JDK 11+ (`javac`, `jar`) +- Hammer shared library built (see top-level README) + +```bash +sudo apt install swig default-jdk +``` + +## Building + +From the repository root, pass `bindings=java` to SCons: + +```bash +scons bindings=java +``` + +This generates `hammer_wrap.c` and a set of Java source files via SWIG, compiles them into the JNI shared library `libhammer_jni.so`, and packages the Java classes into `hammer.jar`. Both artifacts are placed under `build/opt/src/bindings/java/`. + +To build both Python and Java bindings at once: + +```bash +scons bindings=python,java +``` + +## Running the Tests + +```bash +scons bindings=java testjava +``` + +Or run the full test suite including all language bindings: + +```bash +scons bindings=python,java test +``` + +## Usage + +Add `hammer.jar` to your classpath and ensure `libhammer_jni.so` (and `libhammer.so`) are on the library path at runtime. + +```bash +javac -cp build/opt/src/bindings/java/hammer.jar MyParser.java +LD_LIBRARY_PATH=build/opt/src:build/opt/src/bindings/java \ + java -Djava.library.path=build/opt/src/bindings/java \ + -cp .:build/opt/src/bindings/java/hammer.jar \ + MyParser +``` + +If you ran `scons install` (or `scons bindings=java installjava`), `hammer.jar` is installed to `/usr/local/share/java/` and `libhammer_jni.so` to `/usr/local/lib`. + +### Loading the Native Library + +Your application must load the JNI library once at startup, typically in a static initializer: + +```java +static { + System.loadLibrary("hammer_jni"); +} +``` + +### Basic Example + +```java +import com.riversideresearch.hammer.*; + +public class Example { + static { + System.loadLibrary("hammer_jni"); + } + + public static void main(String[] args) { + // Parse the literal bytes "GET " + HParser method = hammer.h_token(new byte[]{'G','E','T',' '}); + + // Parse one or more printable ASCII characters + HParser printable = hammer.h_many1( + hammer.h_ch_range((short)0x21, (short)0x7e) + ); + + // Sequence: method followed by the path + HParser requestLine = hammer.h_sequence__a(new HParser[]{method, printable}); + + HParseResult result = requestLine.parse("GET /index.html".getBytes()); + if (result == null) { + System.out.println("Parse failed"); + } else { + HParsedToken ast = result.getAst(); + System.out.println("Parsed sequence of length " + ast.seqLength()); + } + } +} +``` + +### Parser Combinators + +All Hammer functions are exposed as static methods of the `hammer` class in the `com.riversideresearch.hammer` package. + +| Java call | Description | +|-------------------------------------|-----------------------------------------------------------| +| `hammer.h_token(byte[])` | Match a literal byte array | +| `hammer.h_ch((short)b)` | Match a single byte value (0–255) | +| `hammer.h_ch_range((short)lo, (short)hi)` | Match any byte in `[lo, hi]` | +| `hammer.h_in(byte[])` | Match any byte in the given charset | +| `hammer.h_not_in(byte[])` | Match any byte not in the given charset | +| `hammer.h_sequence__a(HParser[])` | Match each parser in order; result is `TT_SEQUENCE` | +| `hammer.h_choice__a(HParser[])` | Try each parser in order; return first success | +| `hammer.h_many(p)` | Match `p` zero or more times; result is `TT_SEQUENCE` | +| `hammer.h_many1(p)` | Match `p` one or more times; result is `TT_SEQUENCE` | +| `hammer.h_repeat_n(p, n)` | Match `p` exactly `n` times; result is `TT_SEQUENCE` | +| `hammer.h_optional(p)` | Match `p` or produce a `TT_NONE` token on failure | +| `hammer.h_ignore(p)` | Match `p` but suppress its result from sequences | +| `hammer.h_sepBy(p, sep)` | Match `p` separated by `sep`, zero or more times | +| `hammer.h_sepBy1(p, sep)` | Match `p` separated by `sep`, one or more times | +| `hammer.h_left(p1, p2)` | Match both; return result of `p1` | +| `hammer.h_right(p1, p2)` | Match both; return result of `p2` | +| `hammer.h_middle(p1, p2, p3)` | Match all three; return result of `p2` | +| `hammer.h_butnot(p1, p2)` | Match `p1` only if `p2` does not also match | +| `hammer.h_difference(p1, p2)` | Match `p1` only when `p2` matches less input | +| `hammer.h_xor(p1, p2)` | Match exactly one of `p1` or `p2`, not both | +| `hammer.h_and(p)` | Succeed if `p` would match, but consume no input | +| `hammer.h_not(p)` | Succeed if `p` would not match, consuming no input | +| `hammer.h_whitespace(p)` | Skip leading whitespace, then match `p` | +| `hammer.h_int_range(p, lo, hi)` | Match `p` only if the integer result is in `[lo, hi]` | +| `hammer.h_epsilon_p()` | Always succeed, consuming no input | +| `hammer.h_end_p()` | Succeed only at end of input | +| `hammer.h_nothing_p()` | Always fail | + +### Integer Parsers + +```java +hammer.h_uint8() // unsigned 8-bit +hammer.h_uint16() // unsigned 16-bit, big-endian +hammer.h_uint32() // unsigned 32-bit, big-endian +hammer.h_uint64() // unsigned 64-bit, big-endian +hammer.h_int8() // signed 8-bit +hammer.h_int16() // signed 16-bit, big-endian +hammer.h_int32() // signed 32-bit, big-endian +hammer.h_int64() // signed 64-bit, big-endian +``` + +### Inspecting Parse Results + +A successful parse returns an `HParseResult`; a failed parse returns `null`. Call `result.getAst()` to retrieve the `HParsedToken` and inspect it using the methods below. + +`HParseResult` owns the memory for the entire parse tree. Do not hold references to tokens returned by `getAst()` or `seqElement()` after the `HParseResult` has been garbage-collected or explicitly deleted. + +| Method | Returns | Description | +|-------------------------------|----------|------------------------------------------------------| +| `token.tokenType()` | `int` | One of the `TT_*` constants below | +| `token.sintValue()` | `long` | Signed integer value (`TT_SINT` tokens) | +| `token.uintValue()` | `long` | Unsigned integer value (`TT_UINT` tokens); treat as unsigned with `Long.toUnsignedString()` | +| `token.seqLength()` | `long` | Number of elements (`TT_SEQUENCE` tokens) | +| `token.seqElement(i)` | `HParsedToken` | The `i`-th sequence element | +| `token.bytesLength()` | `long` | Byte count (`TT_BYTES` tokens) | +| `token.byteAt(i)` | `short` | Byte value at index `i` (0–255), or -1 if out of range | + +### Token Type Constants + +Compare `token.tokenType()` against these values: + +| Constant | Value | Produced by | +|----------------|-------|----------------------------------------------------| +| `TT_NONE` | 1 | `h_optional()` on failure, `h_end_p()`, `h_and()` | +| `TT_BYTES` | 2 | `h_token()`, `h_in()`, `h_not_in()` | +| `TT_SINT` | 4 | `h_int8/16/32/64()` | +| `TT_UINT` | 8 | `h_uint8/16/32/64()`, `h_ch()`, `h_ch_range()` | +| `TT_SEQUENCE` | 16 | `h_sequence__a()`, `h_many()`, `h_sepBy()`, etc. | + +### Recursive Grammars + +Use `h_indirect()` and `h_bind_indirect()` to define recursive parsers: + +```java +HParser expr = hammer.h_indirect(); +HParser atom = hammer.h_ch_range((short)'a', (short)'z'); +hammer.h_bind_indirect(expr, + hammer.h_choice__a(new HParser[]{ + hammer.h_sequence__a(new HParser[]{atom, expr}), + hammer.h_epsilon_p(), + }) +); + +HParseResult result = expr.parse("abc".getBytes()); +``` + +## Notes + +- Java's `byte` type is signed (-128 to 127). Single byte values passed to `h_ch()` and `h_ch_range()` use `short` (0–255) to avoid sign confusion. +- `h_sequence__a()` and `h_choice__a()` accept `HParser[]` arrays and are the recommended way to build combinators in Java. The sentinel-terminated varargs versions (`h_sequence`, `h_choice`) are not exposed. +- `uintValue()` returns a Java `long`. For values above `Long.MAX_VALUE` use `Long.toUnsignedString(token.uintValue())`. +- `HParseResult` implements a finalizer that calls `h_parse_result_free`, so explicit cleanup is not required, but calling `result.delete()` eagerly is good practice in tight loops. diff --git a/src/bindings/java/SConscript b/src/bindings/java/SConscript new file mode 100644 index 0000000..daec1b0 --- /dev/null +++ b/src/bindings/java/SConscript @@ -0,0 +1,124 @@ +# Copyright (c) 2026 Riverside Research +# -*- python -*- + +from __future__ import absolute_import, division, print_function + +import os +import sys +import shutil + +Import("env libhammer_shared testruns targets") + +if libhammer_shared is None: + print("Warning: Java bindings require the shared library (not available in coverage/gprof builds). Skipping.") + Return() + +if not shutil.which("swig"): + print("Warning: SWIG not found. Install swig to build Java bindings. Skipping.") + Return() + +# Locate javac, falling back to JAVA_HOME if not on PATH. +javac = shutil.which("javac") +if not javac: + java_home_env = os.environ.get("JAVA_HOME", "") + if java_home_env: + candidate = os.path.join(java_home_env, "bin", "javac") + if os.path.isfile(candidate): + javac = candidate +if not javac: + print("Warning: javac not found. Install a JDK to build Java bindings. Skipping.") + Return() + +java_home = os.path.dirname(os.path.dirname(os.path.realpath(javac))) +jni_include = os.path.join(java_home, "include") +if sys.platform.startswith("linux"): + jni_platform_include = os.path.join(jni_include, "linux") +elif sys.platform.startswith("darwin"): + jni_platform_include = os.path.join(jni_include, "darwin") +else: + jni_platform_include = jni_include + +if not os.path.isdir(jni_include): + print("Warning: JNI headers not found at %s. Skipping Java bindings." % jni_include) + Return() + +javaenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES=0) + +project_root = Dir("#").abspath +javadir = os.path.join(project_root, env["BUILD_BASE"], "src/bindings/java") +java_src_dir = os.path.join(javadir, "javasrc") +java_classes_dir = os.path.join(javadir, "classes") +src_build_dir = os.path.join(project_root, env["BUILD_BASE"], "src") +hammer_lib_dir = os.path.dirname(str(libhammer_shared[0])) + +# Copy hammer.i to the build directory so SWIG can be invoked from there. +swig_iface = javaenv.Command( + "hammer.i", "#src/bindings/swig/hammer.i", Copy("$TARGET", "$SOURCE") +) + +# Run SWIG: generates hammer_wrap.c (primary tracked output) and Java sources in java_src_dir. +swig_out = javaenv.Command( + "hammer_wrap.c", + swig_iface, + [ + "mkdir -p " + java_src_dir, + "swig -java -package com.riversideresearch.hammer" + " -DHAMMER_INTERNAL__NO_STDARG_H" + " -I" + src_build_dir + + " -outdir " + java_src_dir + + " -o $TARGET $SOURCE", + ], +) + +# Compile the JNI shared library from the SWIG-generated wrapper. +jnienv = javaenv.Clone() +jnienv.Append(CPPPATH=[jni_include, jni_platform_include, src_build_dir]) +jnienv.Append(CCFLAGS=["-fPIC", "-DSWIG", "-std=gnu99", "-Wno-strict-aliasing"]) +jni_lib = jnienv.SharedLibrary( + "hammer_jni", + swig_out, + LIBS=["hammer"], + LIBPATH=[hammer_lib_dir], +) +Default(jni_lib) + +# Compile all generated Java sources and package them into a jar. +jar_file = javaenv.Command( + "hammer.jar", + swig_out, + [ + "mkdir -p " + java_classes_dir, + "javac -d " + java_classes_dir + " " + java_src_dir + "/*.java", + "jar cf $TARGET -C " + java_classes_dir + " .", + ], +) +javaenv.Depends(jar_file, jni_lib) +Default(jar_file) + +# Run the Java test suite. +javatestenv = javaenv.Clone() +javatestenv["ENV"]["LD_LIBRARY_PATH"] = hammer_lib_dir + ":" + javadir +javatestexec = javatestenv.Command( + "hammer_tests.stamp", + ["#src/bindings/java/HammerTests.java"] + list(jar_file), + [ + "javac -cp " + os.path.join(javadir, "hammer.jar") + + " -d " + java_classes_dir + " $SOURCE", + "LD_LIBRARY_PATH=" + hammer_lib_dir + ":" + javadir + + " java" + " -Djava.library.path=" + javadir + + " -cp " + java_classes_dir + ":" + os.path.join(javadir, "hammer.jar") + + " HammerTests", + "touch $TARGET", + ], +) +javatest = Alias("testjava", [javatestexec], javatestexec) +AlwaysBuild(javatestexec) +testruns.append(javatest) + +javainstallexec = javaenv.Command( + None, jar_file, + "install -m 644 " + os.path.join(javadir, "hammer.jar") + " /usr/local/share/java/", +) +javainstall = Alias("installjava", [javainstallexec], javainstallexec) +targets.append(javainstall) diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i index 323e43a..a34f3d0 100644 --- a/src/bindings/swig/hammer.i +++ b/src/bindings/swig/hammer.i @@ -119,7 +119,7 @@ static int h_tt_python; %} %init %{ - h_tt_python = h_allocate_token_type("com.upstandinghackers.hammer.python"); + h_tt_python = h_allocate_token_type("com.riversideresearch.hammer.python"); %} @@ -146,8 +146,71 @@ static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data); static bool call_predicate(struct HParseResult_ *p, void* user_data); %} -#else +#elif !defined(SWIGJAVA) #warning no uint8_t* typemaps defined +#endif + +#if defined(SWIGJAVA) + +%ignore HCountedArray_; + +// Map byte[] ↔ (const uint8_t* input, size_t length) and all equivalent argument pairs. +%typemap(jni) (const uint8_t* input, size_t length) "jbyteArray" +%typemap(jtype) (const uint8_t* input, size_t length) "byte[]" +%typemap(jstype) (const uint8_t* input, size_t length) "byte[]" +%typemap(javain) (const uint8_t* input, size_t length) "$javainput" +%typemap(in) (const uint8_t* input, size_t length) { + $1 = (uint8_t*)JCALL2(GetByteArrayElements, jenv, $input, 0); + $2 = (size_t)JCALL1(GetArrayLength, jenv, $input); +} +%typemap(argout) (const uint8_t* input, size_t length) { + JCALL3(ReleaseByteArrayElements, jenv, $input, (jbyte*)$1, JNI_ABORT); +} +%typemap(freearg) (const uint8_t* input, size_t length) "" +%apply (const uint8_t* input, size_t length) { + (uint8_t* str, size_t len), + (const uint8_t* str, const size_t len), + (const uint8_t* charset, size_t length) +} + +// uint8_t as short — Java's byte is signed; short avoids sign-extension confusion. +%typemap(jni) uint8_t "jshort" +%typemap(jtype) uint8_t "short" +%typemap(jstype) uint8_t "short" +%typemap(javain) uint8_t "(short)($javainput & 0xff)" +%typemap(in) uint8_t { $1 = (uint8_t)($input & 0xff); } +%typemap(out) uint8_t { $result = (jshort)$1; } +%typemap(javaout) uint8_t { return (short)($jnicall & 0xff); } + +// void*[] (NULL-terminated parser array) — Java side passes HParser_[], marshalled via long[]. +%typemap(jni) void*[] "jlongArray" +%typemap(jtype) void*[] "long[]" +%typemap(jstype) void*[] "HParser[]" +%typemap(javain) void*[] "parsersToHandles($javainput)" +%typemap(in) void*[] { + int _sz = (int)JCALL1(GetArrayLength, jenv, $input); + jlong *_elems = JCALL2(GetLongArrayElements, jenv, $input, 0); + $1 = (void**)malloc((size_t)(_sz + 1) * sizeof(void *)); + for (int _i = 0; _i < _sz; _i++) $1[_i] = (void *)(intptr_t)_elems[_i]; + $1[_sz] = NULL; + JCALL3(ReleaseLongArrayElements, jenv, $input, _elems, JNI_ABORT); +} +%typemap(freearg) void*[] { free($1); } + +// Inject a helper into the hammer module class so callers can pass HParser[] where void*[] is +// expected. It lives in hammer.java (same package as HParser), so protected getCPtr is accessible. +%pragma(java) modulecode=%{ + static long[] parsersToHandles(HParser[] parsers) { + long[] handles = new long[parsers.length]; + for (int i = 0; i < parsers.length; i++) + handles[i] = HParser.getCPtr(parsers[i]); + return handles; + } +%} + +// parse() returns a Java-owned HParseResult whose finalizer calls h_parse_result_free. +%newobject HParser_::parse; + #endif // All the include paths are relative to the build, i.e., ../../. If you need to build these manually (i.e., not with scons), keep that in mind. @@ -187,7 +250,7 @@ #include "glue.h" %} %include "allocator.h" -%warnfilter(451) hammer_h; +%warnfilter(451) HResultTiming; %include "hammer.h" // HArena_ is an opaque type (forward declaration only in allocator.h). @@ -386,3 +449,47 @@ def int64(): return _h_int64() %} #endif + +#ifdef SWIGJAVA + +%extend HParser_ { + struct HParseResult_* parse(const uint8_t* input, size_t length) { + return h_parse($self, input, length); + } + bool compile(HParserBackend backend) { + return h_compile($self, backend, NULL) == 0; + } +} + +%extend HParsedToken_ { + /* Token type as int — compare against TT_NONE, TT_BYTES, TT_SINT, TT_UINT, TT_SEQUENCE. */ + int tokenType() { + return (int)$self->token_type; + } + long long sintValue() { + return (long long)$self->token_data.sint; + } + long long uintValue() { + return (long long)(unsigned long long)$self->token_data.uint; + } + size_t seqLength() { + return ($self->token_type == TT_SEQUENCE) ? $self->token_data.seq->used : 0; + } + /* Returns the i-th element of a TT_SEQUENCE token, or NULL if out of range. */ + struct HParsedToken_* seqElement(size_t i) { + if ($self->token_type == TT_SEQUENCE && i < $self->token_data.seq->used) + return $self->token_data.seq->elements[i]; + return NULL; + } + size_t bytesLength() { + return ($self->token_type == TT_BYTES) ? $self->token_data.bytes.len : 0; + } + /* Returns byte value at index i as a short (0-255), or -1 if out of range. */ + short byteAt(size_t i) { + if ($self->token_type == TT_BYTES && i < $self->token_data.bytes.len) + return (short)(unsigned short)$self->token_data.bytes.token[i]; + return -1; + } +} + +#endif diff --git a/src/t_misc.c b/src/t_misc.c index dccee7c..6102531 100644 --- a/src/t_misc.c +++ b/src/t_misc.c @@ -16,20 +16,20 @@ static void test_tt_user(void) { } static void test_tt_registry(void) { - int id = h_allocate_token_type("com.upstandinghackers.test.token_type"); + int id = h_allocate_token_type("com.riversideresearch.test.token_type"); g_check_cmp_int32(id, >=, TT_USER); - int id2 = h_allocate_token_type("com.upstandinghackers.test.token_type_2"); + int id2 = h_allocate_token_type("com.riversideresearch.test.token_type_2"); g_check_cmp_int32(id2, !=, id); g_check_cmp_int32(id2, >=, TT_USER); - g_check_cmp_int32(id, ==, h_get_token_type_number("com.upstandinghackers.test.token_type")); - g_check_cmp_int32(id2, ==, h_get_token_type_number("com.upstandinghackers.test.token_type_2")); - g_check_string("com.upstandinghackers.test.token_type", ==, h_get_token_type_name(id)); - g_check_string("com.upstandinghackers.test.token_type_2", ==, h_get_token_type_name(id2)); + g_check_cmp_int32(id, ==, h_get_token_type_number("com.riversideresearch.test.token_type")); + g_check_cmp_int32(id2, ==, h_get_token_type_number("com.riversideresearch.test.token_type_2")); + g_check_string("com.riversideresearch.test.token_type", ==, h_get_token_type_name(id)); + g_check_string("com.riversideresearch.test.token_type_2", ==, h_get_token_type_name(id2)); if (h_get_token_type_name(0) != NULL) { g_test_message("Unknown token type should not return a name"); g_test_fail(); } - g_check_cmp_int32(h_get_token_type_number("com.upstandinghackers.test.unkown_token_type"), ==, + g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unkown_token_type"), ==, 0); } diff --git a/tests/t_misc.c b/tests/t_misc.c index e854f6a..617ab66 100644 --- a/tests/t_misc.c +++ b/tests/t_misc.c @@ -18,20 +18,20 @@ static void test_tt_user(void) { } static void test_tt_registry(void) { - int id = h_allocate_token_type("com.upstandinghackers.test.token_type"); + int id = h_allocate_token_type("com.riversideresearch.test.token_type"); g_check_cmp_int32(id, >=, TT_USER); - int id2 = h_allocate_token_type("com.upstandinghackers.test.token_type_2"); + int id2 = h_allocate_token_type("com.riversideresearch.test.token_type_2"); g_check_cmp_int32(id2, !=, id); g_check_cmp_int32(id2, >=, TT_USER); - g_check_cmp_int32(id, ==, h_get_token_type_number("com.upstandinghackers.test.token_type")); - g_check_cmp_int32(id2, ==, h_get_token_type_number("com.upstandinghackers.test.token_type_2")); - g_check_string("com.upstandinghackers.test.token_type", ==, h_get_token_type_name(id)); - g_check_string("com.upstandinghackers.test.token_type_2", ==, h_get_token_type_name(id2)); + g_check_cmp_int32(id, ==, h_get_token_type_number("com.riversideresearch.test.token_type")); + g_check_cmp_int32(id2, ==, h_get_token_type_number("com.riversideresearch.test.token_type_2")); + g_check_string("com.riversideresearch.test.token_type", ==, h_get_token_type_name(id)); + g_check_string("com.riversideresearch.test.token_type_2", ==, h_get_token_type_name(id2)); if (h_get_token_type_name(0) != NULL) { g_test_message("Unknown token type should not return a name"); g_test_fail(); } - g_check_cmp_int32(h_get_token_type_number("com.upstandinghackers.test.unkown_token_type"), ==, + g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unkown_token_type"), ==, 0); } From 741e8e9dab2287973c09d96f87abc71366ce4b2d Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Thu, 26 Mar 2026 13:23:48 -0400 Subject: [PATCH 04/14] Readd Cpp language bindings and tests --- DEVELOPMENT.md | 7 +- README.md | 13 +- SConstruct | 2 +- src/bindings/cpp/README.md | 149 ++++++++++ src/bindings/cpp/SConscript | 117 ++++++++ src/bindings/cpp/cpp_tests.cpp | 361 ++++++++++++++++++++++++ src/bindings/cpp/hammer/hammer.hpp | 207 ++++++++++++++ src/bindings/cpp/hammer/hammer_test.hpp | 54 ++++ 8 files changed, 905 insertions(+), 5 deletions(-) create mode 100644 src/bindings/cpp/README.md create mode 100644 src/bindings/cpp/SConscript create mode 100644 src/bindings/cpp/cpp_tests.cpp create mode 100644 src/bindings/cpp/hammer/hammer.hpp create mode 100644 src/bindings/cpp/hammer/hammer_test.hpp diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 20d912f..53af720 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -64,21 +64,22 @@ Notes: Install the required tools: ```bash -sudo apt install swig default-jdk +sudo apt install swig default-jdk libgtest-dev pip install setuptools ``` Build and test all language bindings: ```bash -scons bindings=python,java test +scons bindings=python,java,cpp test ``` -To target a specific binding, pass it individually and use its alias (`testpython` or `testjava`): +To target a specific binding, pass it individually and use its alias (`testpython`, `testjava`, or `testcpp`): ```bash scons bindings=python testpython scons bindings=java testjava +scons bindings=cpp testcpp ``` If `JAVA_HOME` is not set, the build locates `javac` via `PATH`. To use a specific JDK: diff --git a/README.md b/README.md index c93560b..d98f940 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The main feature of MicroHammer is its significantly smaller codebase, allowing - More thorough and consistent documentation - Windows / macOS not supported - Packrat parsing backend only -- Language bindings for Python and Java (see [Python Bindings](src/bindings/python/README.md), [Java Bindings](src/bindings/java/README.md)) +- Language bindings for Python, Java, and C++ (see [Python Bindings](src/bindings/python/README.md), [Java Bindings](src/bindings/java/README.md), [C++ Bindings](src/bindings/cpp/README.md)) ## Features @@ -96,6 +96,17 @@ scons bindings=java See [src/bindings/java/README.md](src/bindings/java/README.md) for the full API reference and usage guide. +### C++ + +Requires `g++` and `libgtest-dev` (for tests). + +```bash +sudo apt install libgtest-dev +scons bindings=cpp +``` + +See [src/bindings/cpp/README.md](src/bindings/cpp/README.md) for the full API reference and usage guide. + ## Examples The `examples/` directory contains some simple examples, currently including: diff --git a/SConstruct b/SConstruct index 2fe7a36..86ffbed 100644 --- a/SConstruct +++ b/SConstruct @@ -22,7 +22,7 @@ vars.Add( PathVariable("prefix", "Where to install in the FHS", "/usr/local", PathVariable.PathAccept) ) vars.Add( - ListVariable("bindings", "Language bindings to build", "none", ["python", "java"]) + ListVariable("bindings", "Language bindings to build", "none", ["python", "java", "cpp"]) ) vars.Add("python", "Python interpreter", "python3") diff --git a/src/bindings/cpp/README.md b/src/bindings/cpp/README.md new file mode 100644 index 0000000..3b50a9a --- /dev/null +++ b/src/bindings/cpp/README.md @@ -0,0 +1,149 @@ +# Hammer C++ Bindings + +A header-only C++ wrapper around the Hammer parser combinator library. + +## Prerequisites + +- `g++` (C++14 or later) +- Hammer shared library built (see top-level README) +- `libgtest-dev` for running tests + +```bash +sudo apt install libgtest-dev +``` + +## Building + +```bash +scons bindings=cpp +``` + +## Running the Tests + +```bash +scons bindings=cpp testcpp +``` + +## Usage + +Add `src/bindings/cpp` (or the install prefix) to your include path, then include the header and link against `libhammer`: + +```bash +g++ -std=c++14 -I/usr/local/include/hammer -o myparser myparser.cpp -lhammer +``` + +### Basic Example + +```cpp +#include +#include + +static { + System.loadLibrary("hammer_jni"); +} + +int main() { + using namespace hammer; + + // Match the literal bytes "GET " + Parser method = Token("GET "); + + // Match one or more printable ASCII bytes + Parser printable = Many1(ChRange(0x21, 0x7e)); + + // Sequence: method then path + Parser request = Sequence(method, printable, NULL); + + ParseResult result = request.parse("GET /index.html"); + if (!result) { + std::cout << "Parse failed\n"; + } else { + std::cout << result.asUnambiguous() << "\n"; + } + return 0; +} +``` + +### Parser Combinators + +All combinators live in the `hammer` namespace. + +| C++ call | Description | +|-----------------------------------|--------------------------------------------------------| +| `Token(str)` | Match a literal string | +| `Ch(byte)` | Match a single byte value | +| `ChRange(lo, hi)` | Match any byte in `[lo, hi]` | +| `In(str)` | Match any byte in the given charset | +| `NotIn(str)` | Match any byte not in the given charset | +| `Sequence(p, ..., NULL)` | Match each parser in order; result is `TT_SEQUENCE` | +| `Choice(p, ..., NULL)` | Try each parser in order; return first success | +| `Many(p)` | Match `p` zero or more times; result is `TT_SEQUENCE` | +| `Many1(p)` | Match `p` one or more times; result is `TT_SEQUENCE` | +| `RepeatN(p, n)` | Match `p` exactly `n` times; result is `TT_SEQUENCE` | +| `Optional(p)` | Match `p` or produce a `TT_NONE` token on failure | +| `Ignore(p)` | Match `p` but suppress its result from sequences | +| `SepBy(p, sep)` | Match `p` separated by `sep`, zero or more times | +| `SepBy1(p, sep)` | Match `p` separated by `sep`, one or more times | +| `Left(p, q)` | Match both; return result of `p` | +| `Right(p, q)` | Match both; return result of `q` | +| `Middle(p, q, r)` | Match all three; return result of `q` | +| `ButNot(p, q)` | Match `p` only if `q` does not also match | +| `Difference(p, q)` | Match `p` only when `q` matches less input | +| `Xor(p, q)` | Match exactly one of `p` or `q`, not both | +| `And(p)` | Succeed if `p` would match, consuming no input | +| `Not(p)` | Succeed if `p` would not match, consuming no input | +| `Whitespace(p)` | Skip leading whitespace, then match `p` | +| `IntRange(p, lo, hi)` | Match `p` only if the integer result is in `[lo, hi]` | +| `Epsilon()` | Always succeed, consuming no input | +| `End()` | Succeed only at end of input | +| `Nothing()` | Always fail | +| `Action(p, fn)` | Apply action function `fn` to parse result of `p` | +| `AttrBool(p, pred)` | Accept result of `p` only if predicate `pred` is true | + +### Integer Parsers + +```cpp +hammer::Uint8() // unsigned 8-bit +hammer::Uint16() // unsigned 16-bit, big-endian +hammer::Uint32() // unsigned 32-bit, big-endian +hammer::Uint64() // unsigned 64-bit, big-endian +hammer::Int8() // signed 8-bit +hammer::Int16() // signed 16-bit, big-endian +hammer::Int32() // signed 32-bit, big-endian +hammer::Int64() // signed 64-bit, big-endian +``` + +### Inspecting Parse Results + +`ParseResult` is truthy on success and falsy on failure. Use `asUnambiguous()` for a human-readable representation, or `getAST()` to get the raw `ParsedToken` wrapper. + +`ParseResult` owns the parse tree and frees it in its destructor. + +### Recursive Grammars + +Use `Indirect` and `bind()` to define recursive parsers: + +```cpp +using namespace hammer; + +Indirect expr; +Parser atom = ChRange('a', 'z'); +expr.bind(Choice(Sequence(atom, expr, NULL), Epsilon(), NULL)); + +ParseResult result = expr.parse("abc"); +``` + +### Testing Helpers + +Include `` to get gtest assertion helpers: + +```cpp +#include + +TEST(MyTest, Example) { + hammer::Parser p = hammer::Ch('a'); + EXPECT_TRUE(ParsesTo(p, "a", "u0x61")); + EXPECT_TRUE(ParseFails(p, "b")); + EXPECT_TRUE(ParsesOK(p, "a")); +} +``` diff --git a/src/bindings/cpp/SConscript b/src/bindings/cpp/SConscript new file mode 100644 index 0000000..dff9979 --- /dev/null +++ b/src/bindings/cpp/SConscript @@ -0,0 +1,117 @@ +# Copyright (c) 2026 Riverside Research +# -*- python -*- + +from __future__ import absolute_import, division, print_function + +import glob as _glob +import os +import shutil +import subprocess + +Import("env libhammer_shared testruns targets") + +if libhammer_shared is None: + print("Warning: C++ bindings require the shared library (not available in coverage/gprof builds). Skipping.") + Return() + +cxx = str(env.get("CXX", "g++")) +if not shutil.which(cxx): + print("Warning: C++ compiler (%s) not found. Skipping C++ bindings." % cxx) + Return() + +project_root = Dir("#").abspath +src_dir = os.path.join(project_root, "src") +cpp_src_dir = os.path.join(project_root, "src/bindings/cpp") +hammer_lib_dir = os.path.dirname(str(libhammer_shared[0])) + +cppenv = env.Clone() +# Strip C-only flags from the cloned env; -std=c99 is invalid for g++. +# Also drop -Werror so gtest headers don't break the build. +cppenv["CCFLAGS"] = [ + f for f in cppenv.get("CCFLAGS", []) + if str(f) not in ("-std=c99", "-Werror") +] +cppenv.Append(CXXFLAGS=["-std=c++14"]) +cppenv.Append(CPPPATH=[cpp_src_dir, src_dir]) +cppenv.Append(LIBS=["hammer"]) +cppenv.Append(LIBPATH=[hammer_lib_dir]) +cppenv.MergeFlags("-Wno-missing-field-initializers") + +# Locate gtest. Try three strategies in order: +# 1. pkg-config (works when libgtest-dev ships a .pc file) +# 2. Pre-compiled system libs + headers (Ubuntu 20.04+) +# 3. Build from system source (fallback for older distros) +gtest_extra_objs = [] +gtest_found = False + +try: + flags = subprocess.check_output( + ["pkg-config", "--cflags", "--libs", "gtest_main"], + stderr=subprocess.DEVNULL, + ).decode().strip() + cppenv.MergeFlags(flags) + gtest_found = True +except Exception: + pass + +if not gtest_found: + gtest_h = "/usr/include/gtest/gtest.h" + gtest_main_libs = ( + _glob.glob("/usr/lib/*/libgtest_main.a") + + _glob.glob("/usr/lib/libgtest_main.a") + ) + if os.path.isfile(gtest_h) and gtest_main_libs: + cppenv.Append( + LIBPATH=[os.path.dirname(gtest_main_libs[0])], + LIBS=["gtest_main", "gtest", "pthread"], + ) + gtest_found = True + +if not gtest_found: + for gtest_base in [ + "/usr/src/googletest/googletest", + "/usr/src/googletest", + "/usr/src/gtest", + ]: + gtest_h_candidate = os.path.join(gtest_base, "include/gtest/gtest.h") + gtest_all_cc = os.path.join(gtest_base, "src/gtest-all.cc") + gtest_main_cc = os.path.join(gtest_base, "src/gtest_main.cc") + if os.path.isfile(gtest_h_candidate) and os.path.isfile(gtest_all_cc): + gtest_inc = os.path.join(gtest_base, "include") + genv = cppenv.Clone() + genv.Append(CPPPATH=[gtest_inc, gtest_base]) + genv.MergeFlags("-DGTEST_HAS_PTHREAD=0") + gtest_extra_objs = [genv.Object("gtest-all", gtest_all_cc)] + if os.path.isfile(gtest_main_cc): + gtest_extra_objs.append(genv.Object("gtest-main", gtest_main_cc)) + cppenv.Append(CPPPATH=[gtest_inc]) + cppenv.Append(LIBS=["pthread"]) + gtest_found = True + break + +if not gtest_found: + print("Warning: gtest not found. Install libgtest-dev to build C++ bindings. Skipping.") + Return() + +cpp_test = cppenv.Program( + "hammer_test", + gtest_extra_objs + ["#src/bindings/cpp/cpp_tests.cpp"], +) +Default(cpp_test) + +cpptest = Alias( + "testcpp", + [cpp_test], + "env LD_LIBRARY_PATH=%s %s" % (hammer_lib_dir, cpp_test[0].path), +) +AlwaysBuild(cpptest) +testruns.append(cpptest) + +cppinstall = Alias( + "installcpp", + env.Install("$incpath", [ + "#src/bindings/cpp/hammer/hammer.hpp", + "#src/bindings/cpp/hammer/hammer_test.hpp", + ]), +) +targets.append(cppinstall) diff --git a/src/bindings/cpp/cpp_tests.cpp b/src/bindings/cpp/cpp_tests.cpp new file mode 100644 index 0000000..64a18cf --- /dev/null +++ b/src/bindings/cpp/cpp_tests.cpp @@ -0,0 +1,361 @@ +#include +#include +#include + +// internal.h is not C++-compatible; forward-declare only what the tests need. +extern "C" { + HCountedArray *h_carray_new_sized(HArena *arena, size_t capacity); + void h_carray_append(HCountedArray *arr, void *element); +} + +#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) + +namespace { + using namespace ::hammer; + TEST(ParserTypes, Token) { + Parser p = Token("95\xA2"); + EXPECT_TRUE(ParsesTo(p, "95\xA2", "<39.35.a2>")); + EXPECT_TRUE(ParseFails(p, "95")); + } + + TEST(ParserTypes, Ch) { + Parser p = Ch(0xA2); + EXPECT_TRUE(ParsesTo(p, "\xA2", "u0xa2")); + EXPECT_TRUE(ParseFails(p, "\xA3")); + } + + TEST(ParserTypes, ChRange) { + Parser p = ChRange('a', 'c'); + EXPECT_TRUE(ParsesTo(p, "b", "u0x62")); + EXPECT_TRUE(ParseFails(p, "d")); + } + + TEST(ParserTypes, Int64) { + Parser p = Int64(); + EXPECT_TRUE(ParsesTo(p, std::string("\xff\xff\xff\xfe\x00\x00\x00\x00", 8), "s-0x200000000")); + EXPECT_TRUE(ParseFails(p, std::string("\xff\xff\xff\xfe\x00\x00\x00", 7))); + } + + TEST(ParserTypes, Int32) { + Parser p = Int32(); + EXPECT_TRUE(ParsesTo(p, std::string("\xff\xfe\x00\x00", 4), "s-0x20000")); + EXPECT_TRUE(ParseFails(p, std::string("\xff\xfe\x00", 3))); + EXPECT_TRUE(ParsesTo(p, std::string("\x00\x02\x00\x00",4) ,"s0x20000")); + EXPECT_TRUE(ParseFails(p, std::string("\x00\x02\x00", 3))); + } + + TEST(ParserTypes, Int16) { + Parser p = Int16(); + EXPECT_TRUE(ParsesTo(p, std::string("\xfe\x00", 2), "s-0x200")); + EXPECT_TRUE(ParseFails(p, "\xfe")); + EXPECT_TRUE(ParsesTo(p, std::string("\x02\x00", 2), "s0x200")); + EXPECT_TRUE(ParseFails(p, "\x01")); + } + + TEST(ParserTypes, Int8) { + Parser p = Int8(); + EXPECT_TRUE(ParsesTo(p, "\x88", "s-0x78")); + EXPECT_TRUE(ParseFails(p, "")); + } + + TEST(ParserTypes, Uint64) { + Parser p = Uint64(); + EXPECT_TRUE(ParsesTo(p, std::string("\x00\x00\x00\x02\x00\x00\x00\x00", 8), "u0x200000000")); + EXPECT_TRUE(ParseFails(p, std::string("\x00\x00\x00\x02\x00\x00\x00", 7))); + } + + TEST(ParserTypes, Uint32) { + Parser p = Uint32(); + EXPECT_TRUE(ParsesTo(p, std::string("\x00\x02\x00\x00", 4), "u0x20000")); + EXPECT_TRUE(ParseFails(p, std::string("\x00\x02\x00", 3))); + } + + TEST(ParserTypes, Uint16) { + Parser p = Uint16(); + EXPECT_TRUE(ParsesTo(p, std::string("\x02\x00", 2), "u0x200")); + EXPECT_TRUE(ParseFails(p, "\x02")); + } + + TEST(ParserTypes, Uint8) { + Parser p = Uint8(); + EXPECT_TRUE(ParsesTo(p, "\x78", "u0x78")); + EXPECT_TRUE(ParseFails(p, "")); + } + + TEST(ParserTypes, IntRange) { + Parser p = IntRange(Uint8(), 3, 10); + EXPECT_TRUE(ParsesTo(p, "\x05", "u0x5")); + EXPECT_TRUE(ParseFails(p, "\xb")); + } + + TEST(ParserTypes, Whitespace) { + Parser p = Whitespace(Ch('a')); + Parser q = Whitespace(End()); + EXPECT_TRUE(ParsesTo(p, "a", "u0x61")); + EXPECT_TRUE(ParsesTo(p, " a", "u0x61")); + EXPECT_TRUE(ParsesTo(p, " a", "u0x61")); + EXPECT_TRUE(ParsesTo(p, "\ta", "u0x61")); + EXPECT_TRUE(ParseFails(p, "_a")); + + EXPECT_TRUE(ParsesTo(q, "", "NULL")); + EXPECT_TRUE(ParsesTo(q, " ", "NULL")); + EXPECT_TRUE(ParseFails(p, " x")); + } + + TEST(ParserTypes, Left) { + Parser p = Left(Ch('a'), Ch(' ')); + EXPECT_TRUE(ParsesTo(p, "a ", "u0x61")); + EXPECT_TRUE(ParseFails(p, "a")); + EXPECT_TRUE(ParseFails(p, " ")); + EXPECT_TRUE(ParseFails(p, "ab")); + } + + TEST(ParserTypes, Right) { + Parser p = Right(Ch(' '), Ch('a')); + EXPECT_TRUE(ParsesTo(p, " a", "u0x61")); + EXPECT_TRUE(ParseFails(p, "a")); + EXPECT_TRUE(ParseFails(p, " ")); + EXPECT_TRUE(ParseFails(p, "ba")); + } + + TEST(ParserTypes, Middle) { + Parser p = Middle(Ch(' '), Ch('a'), Ch(' ')); + EXPECT_TRUE(ParsesTo(p, " a ", "u0x61")); + EXPECT_TRUE(ParseFails(p, "a")); + EXPECT_TRUE(ParseFails(p, " ")); + EXPECT_TRUE(ParseFails(p, " a")); + EXPECT_TRUE(ParseFails(p, " b ")); + EXPECT_TRUE(ParseFails(p, "ba ")); + EXPECT_TRUE(ParseFails(p, " ab")); + } + +#include + + HParsedToken* upcase(const HParseResult *p, void* user_data) { + switch(p->ast->token_type) { + case TT_SEQUENCE: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); + ret->token_type = TT_SEQUENCE; + for (size_t i=0; iast->seq->used; ++i) { + if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { + HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); + tmp->token_type = TT_UINT; + tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); + h_carray_append(seq, tmp); + } else { + h_carray_append(seq, p->ast->seq->elements[i]); + } + } + ret->seq = seq; + return ret; + } + case TT_UINT: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = toupper(p->ast->uint); + return ret; + } + default: + return (HParsedToken*)p->ast; + } + } + + TEST(ParserTypes, Action) { + Parser p = Action(Sequence(Choice(Ch('a'), Ch('A'), NULL), + Choice(Ch('b'), Ch('B'), NULL), + NULL), + upcase); + EXPECT_TRUE(ParsesTo(p, "ab", "(u0x41 u0x42)")); + EXPECT_TRUE(ParsesTo(p, "AB", "(u0x41 u0x42)")); + EXPECT_TRUE(ParseFails(p, "XX")); + } + + TEST(ParserTypes, In) { + Parser p = In("abc"); + EXPECT_TRUE(ParsesTo(p, "b", "u0x62")); + EXPECT_TRUE(ParseFails(p, "d")); + } + + TEST(ParserTypes, NotIn) { + Parser p = NotIn("abc"); + EXPECT_TRUE(ParsesTo(p, "d", "u0x64")); + EXPECT_TRUE(ParseFails(p, "a")); + } + + TEST(ParserTypes, End) { + Parser p = Sequence(Ch('a'), End(), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61)")); + EXPECT_TRUE(ParseFails(p, "aa")); + } + + TEST(ParserTypes, Nothing) { + Parser p = Nothing(); + EXPECT_TRUE(ParseFails(p, "a")); + } + + TEST(ParserTypes, Sequence) { + Parser p = Sequence(Ch('a'), Ch('b'), NULL); + Parser q = Sequence(Ch('a'), Whitespace(Ch('b')), NULL); + EXPECT_TRUE(ParsesTo(p, "ab", "(u0x61 u0x62)")); + EXPECT_TRUE(ParseFails(p, "a")); + EXPECT_TRUE(ParseFails(p, "b")); + EXPECT_TRUE(ParsesTo(q, "ab", "(u0x61 u0x62)")); + EXPECT_TRUE(ParsesTo(q, "a b", "(u0x61 u0x62)")); + EXPECT_TRUE(ParsesTo(q, "a b", "(u0x61 u0x62)")); + } + + TEST(ParserTypes, Choice) { + Parser p = Choice(Ch('a'), Ch('b'), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "u0x61")); + EXPECT_TRUE(ParsesTo(p, "b", "u0x62")); + EXPECT_TRUE(ParseFails(p, "c")); + } + + TEST(ParserTypes, ButNot) { + Parser p = ButNot(Ch('a'), Token("ab")); + Parser q = ButNot(ChRange('0', '9'), Ch('6')); + EXPECT_TRUE(ParsesTo(p, "a", "u0x61")); + EXPECT_TRUE(ParseFails(p, "ab")); + EXPECT_TRUE(ParsesTo(p, "aa", "u0x61")); + EXPECT_TRUE(ParseFails(q, "6")); + } + + TEST(ParserTypes, Difference) { + Parser p = Difference(Token("ab"), Ch('a')); + EXPECT_TRUE(ParsesTo(p, "ab", "<61.62>")); + EXPECT_TRUE(ParseFails(p, "a")); + } + + TEST(ParserTypes, Xor) { + Parser p = Xor(ChRange('0', '6'), ChRange('5', '9')); + EXPECT_TRUE(ParsesTo(p, "0", "u0x30")); + EXPECT_TRUE(ParsesTo(p, "9", "u0x39")); + EXPECT_TRUE(ParseFails(p, "5")); + EXPECT_TRUE(ParseFails(p, "a")); + } + + TEST(ParserTypes, Many) { + Parser p = Many(Choice(Ch('a'), Ch('b'), NULL)); + EXPECT_TRUE(ParsesTo(p, "", "()")); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61)")); + EXPECT_TRUE(ParsesTo(p, "b", "(u0x62)")); + EXPECT_TRUE(ParsesTo(p, "aabbaba", "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)")); + } + + TEST(ParserTypes, Many1) { + Parser p = Many1(Choice(Ch('a'), Ch('b'), NULL)); + EXPECT_TRUE(ParseFails(p, "")); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61)")); + EXPECT_TRUE(ParsesTo(p, "b", "(u0x62)")); + EXPECT_TRUE(ParsesTo(p, "aabbaba", "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)")); + EXPECT_TRUE(ParseFails(p, "daabbabadef")); + } + + TEST(ParserTypes, RepeatN) { + Parser p = RepeatN(Choice(Ch('a'), Ch('b'), NULL), 2); + EXPECT_TRUE(ParseFails(p, "adef")); + EXPECT_TRUE(ParsesTo(p, "abdef", "(u0x61 u0x62)")); + EXPECT_TRUE(ParseFails(p, "dabdef")); + } + + TEST(ParserTypes, Optional) { + Parser p = Sequence(Ch('a'), Optional(Choice(Ch('b'), Ch('c'), NULL)), Ch('d'), NULL); + EXPECT_TRUE(ParsesTo(p, "abd", "(u0x61 u0x62 u0x64)")); + EXPECT_TRUE(ParsesTo(p, "acd", "(u0x61 u0x63 u0x64)")); + EXPECT_TRUE(ParsesTo(p, "ad", "(u0x61 null u0x64)")); + EXPECT_TRUE(ParseFails(p, "aed")); + EXPECT_TRUE(ParseFails(p, "ab")); + EXPECT_TRUE(ParseFails(p, "ac")); + } + + TEST(ParserTypes, Ignore) { + Parser p = Sequence(Ch('a'), Ignore(Ch('b')), Ch('c'), NULL); + EXPECT_TRUE(ParsesTo(p, "abc", "(u0x61 u0x63)")); + EXPECT_TRUE(ParseFails(p, "ac")); + } + + TEST(ParserTypes, SepBy) { + Parser p = SepBy(Choice(Ch('1'), Ch('2'), Ch('3'), NULL), Ch(',')); + EXPECT_TRUE(ParsesTo(p, "1,2,3", "(u0x31 u0x32 u0x33)")); + EXPECT_TRUE(ParsesTo(p, "1,3,2", "(u0x31 u0x33 u0x32)")); + EXPECT_TRUE(ParsesTo(p, "1,3", "(u0x31 u0x33)")); + EXPECT_TRUE(ParsesTo(p, "3", "(u0x33)")); + EXPECT_TRUE(ParsesTo(p, "", "()")); + } + + TEST(ParserTypes, SepBy1) { + Parser p = SepBy1(Choice(Ch('1'), Ch('2'), Ch('3'), NULL), Ch(',')); + EXPECT_TRUE(ParsesTo(p, "1,2,3", "(u0x31 u0x32 u0x33)")); + EXPECT_TRUE(ParsesTo(p, "1,3,2", "(u0x31 u0x33 u0x32)")); + EXPECT_TRUE(ParsesTo(p, "1,3", "(u0x31 u0x33)")); + EXPECT_TRUE(ParsesTo(p, "3", "(u0x33)")); + EXPECT_TRUE(ParseFails(p, "")); + } + + TEST(ParserTypes, EpsilonP) { + Parser p = Sequence(Ch('a'), Epsilon(), Ch('b'), NULL); + Parser q = Sequence(Epsilon(), Ch('a'), NULL); + Parser r = Sequence(Ch('a'), Epsilon(), NULL); + EXPECT_TRUE(ParsesTo(p, "ab", "(u0x61 u0x62)")); + EXPECT_TRUE(ParsesTo(q, "a", "(u0x61)")); + EXPECT_TRUE(ParsesTo(r, "a", "(u0x61)")); + } + + bool validate_test_ab(HParseResult *p, void* user_data) { + if (TT_SEQUENCE != p->ast->token_type) + return false; + if (TT_UINT != p->ast->seq->elements[0]->token_type) + return false; + if (TT_UINT != p->ast->seq->elements[1]->token_type) + return false; + return (p->ast->seq->elements[0]->uint == p->ast->seq->elements[1]->uint); + } + + TEST(ParserTypes, AttrBool) { + Parser p = AttrBool(Many1(Choice(Ch('a'), Ch('b'), NULL)), + validate_test_ab, NULL); + EXPECT_TRUE(ParsesTo(p, "aa", "(u0x61 u0x61)")); + EXPECT_TRUE(ParsesTo(p, "bb", "(u0x62 u0x62)")); + EXPECT_TRUE(ParseFails(p, "ab")); + } + + TEST(ParserTypes, And) { + Parser p = Sequence(And(Ch('0')), Ch('0'), NULL); + Parser q = Sequence(And(Ch('0')), Ch('1'), NULL); + Parser r = Sequence(Ch('1'), And(Ch('2')), NULL); + EXPECT_TRUE(ParsesTo(p, "0", "(u0x30)")); + EXPECT_TRUE(ParseFails(q, "0")); + EXPECT_TRUE(ParsesTo(r, "12", "(u0x31)")); + } + + TEST(ParserTypes, Not) { + Parser p = Sequence(Ch('a'), + Choice(Ch('+'), Token("++"), NULL), + Ch('b'), NULL); + Parser q = Sequence(Ch('a'), + Choice(Sequence(Ch('+'), Not(Ch('+')), NULL), + Token("++"), NULL), + Ch('b'), NULL); + EXPECT_TRUE(ParsesTo(p, "a+b", "(u0x61 u0x2b u0x62)")); + EXPECT_TRUE(ParseFails(p, "a++b")); + EXPECT_TRUE(ParsesTo(q, "a+b", "(u0x61 (u0x2b) u0x62)")); + EXPECT_TRUE(ParsesTo(q, "a++b", "(u0x61 <2b.2b> u0x62)")); + } + + TEST(ParserTypes, Rightrec) { + Indirect p = Indirect(); + p.bind(Choice(Sequence(Ch('a'), p, NULL), Epsilon(), NULL)); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61)")); + EXPECT_TRUE(ParsesTo(p, "aa", "(u0x61 (u0x61))")); + EXPECT_TRUE(ParsesTo(p, "aaa", "(u0x61 (u0x61 (u0x61)))")); + } + +}; + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/bindings/cpp/hammer/hammer.hpp b/src/bindings/cpp/hammer/hammer.hpp new file mode 100644 index 0000000..4a1096c --- /dev/null +++ b/src/bindings/cpp/hammer/hammer.hpp @@ -0,0 +1,207 @@ +#ifndef HAMMER_HAMMER__HPP +#define HAMMER_HAMMER__HPP + +#include "hammer.h" +#include +#include +#include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused" + +// system_allocator is declared in internal.h but that header is not +// C++-compatible. Forward-declare only what this header needs. +extern "C" { + extern HAllocator system_allocator; +} + +namespace hammer { + class ParseResult; + class Parser { + public: + const HParser *parser; + + Parser(const HParser* inner) : parser(inner) {} + + //static Parser nil = Parser(NULL); + ParseResult parse(const std::string &string); + }; + + class ParsedToken { + // This object can suddenly become invalid if the underlying parse + // tree is destroyed. + + // This object should serve as a very thin wrapper around an HParsedToken*. + // In particular sizeof(ParsedToken) should== sizeof(HParsedToken*) + // This means that we only get one member variable and no virtual functions. + protected: + const HParsedToken *token; + + public: + + ParsedToken(const HParsedToken *inner) : token(inner) {} + ParsedToken(const ParsedToken &other) : token(other.token) {} + + inline HTokenType getType() { + return token->token_type; + } + + void* getUser() const {return token->user;} + uint64_t getUint() const {return token->uint;} + int64_t getSint() const {return token->sint;} + // TODO: Sequence getSeq() const {return Sequence(token->seq);} + std::string getBytes() const {return std::string((char*)token->bytes.token, token->bytes.len); } + + + std::string asUnambiguous() { + char* buf = h_write_result_unamb(token); + std::string s = std::string(buf); + (&system_allocator)->free(&system_allocator, buf); + return s; + } + }; + + class ParseResult { + protected: + HParseResult *_result; + public: + + ParseResult(HParseResult *result) : _result(result) {} + + ParsedToken getAST() { + return ParsedToken(_result->ast); + } + inline std::string asUnambiguous() { + return getAST().asUnambiguous(); + } + + operator bool() { + return _result != NULL; + } + bool operator !() { + return _result == NULL; + } + + ~ParseResult() { + h_parse_result_free(_result); + _result = NULL; + } + }; + + inline ParseResult Parser::parse(const std::string &string) { + return ParseResult(h_parse(parser, (uint8_t*)string.data(), string.length())); + } + + + static inline Parser Token(const std::string &str) { + return Parser(h_token((const uint8_t*)str.data(), str.length())); + } + static inline Parser Token(const uint8_t *buf, size_t len) { + return Parser(h_token(buf, len)); + } + static inline Parser Ch(char ch) { + return Parser(h_ch(ch)); + } + static inline Parser ChRange(uint8_t lower, uint8_t upper) { + return Parser(h_ch_range(lower,upper)); + } + + static inline Parser Int64() { return Parser(h_int64()); } + static inline Parser Int32() { return Parser(h_int32()); } + static inline Parser Int16() { return Parser(h_int16()); } + static inline Parser Int8 () { return Parser(h_int8 ()); } + + static inline Parser Uint64() { return Parser(h_uint64()); } + static inline Parser Uint32() { return Parser(h_uint32()); } + static inline Parser Uint16() { return Parser(h_uint16()); } + static inline Parser Uint8 () { return Parser(h_uint8 ()); } + + static inline Parser IntRange(Parser p, int64_t lower, int64_t upper) { + return Parser(h_int_range(p.parser, lower, upper)); + } + + static inline Parser Bits(size_t len, bool sign) { return Parser(h_bits(len, sign)); } + static inline Parser Whitespace(Parser p) { return Parser(h_whitespace(p.parser)); } + static inline Parser Left(Parser p, Parser q) { return Parser(h_left(p.parser, q.parser)); } + static inline Parser Right(Parser p, Parser q) { return Parser(h_right(p.parser, q.parser)); } + static inline Parser Middle(Parser p, Parser q, Parser r) { + return Parser(h_middle(p.parser, q.parser, r.parser)); + } + + // User is responsible for ensuring that function remains allocated. + Parser Action(Parser p, HAction action, void* user_data) { + return Parser(h_action(p.parser, action, user_data)); + } + + Parser Action(Parser p, HAction action) { + return Parser(h_action(p.parser, action, NULL)); + } + + Parser AttrBool(Parser p, HPredicate pred, void* user_data) { + return Parser(h_attr_bool(p.parser, pred, user_data)); + } + + Parser AttrBool(Parser p, HPredicate pred) { + return Parser(h_attr_bool(p.parser, pred, NULL)); + } + + static inline Parser In(const std::string &charset) { + return Parser(h_in((const uint8_t*)charset.data(), charset.length())); + } + static inline Parser In(const uint8_t *charset, size_t length) { + return Parser(h_in(charset, length)); + } + + static inline Parser NotIn(const std::string &charset) { + return Parser(h_not_in((const uint8_t*)charset.data(), charset.length())); + } + static inline Parser NotIn(const uint8_t *charset, size_t length) { + return Parser(h_not_in(charset, length)); + } + + static inline Parser End() { return Parser(h_end_p()); } + static inline Parser Nothing() { return Parser(h_nothing_p()); } + + static inline Parser Sequence(Parser p, ...) { + va_list ap; + va_start(ap, p); + // Reinterpret Parser (which starts with a single HParser* member) as HParser*. + HParser* ret = h_sequence__v(*(HParser**)(void*)&p, ap); + va_end(ap); + return Parser(ret); + } + + static inline Parser Choice(Parser p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_choice__v(*(HParser**)(void*)&p, ap); + va_end(ap); + return Parser(ret); + } + + static inline Parser ButNot(Parser p1, Parser p2) { return Parser(h_butnot(p1.parser, p2.parser)); } + static inline Parser Difference(Parser p1, Parser p2) { return Parser(h_difference(p1.parser, p2.parser)); } + static inline Parser Xor(Parser p1, Parser p2) { return Parser(h_xor(p1.parser, p2.parser)); } + static inline Parser Many(Parser p) { return Parser(h_many(p.parser)); } + static inline Parser Many1(Parser p) { return Parser(h_many1(p.parser)); } + static inline Parser RepeatN(Parser p, size_t n) { return Parser(h_repeat_n(p.parser, n)); } + static inline Parser Optional(Parser p) { return Parser(h_optional(p.parser)); } + static inline Parser Ignore(Parser p) { return Parser(h_ignore(p.parser)); } + static inline Parser SepBy(Parser p, Parser sep) { return Parser(h_sepBy(p.parser, sep.parser)); } + static inline Parser SepBy1(Parser p, Parser sep) { return Parser(h_sepBy1(p.parser, sep.parser)); } + static inline Parser Epsilon() { return Parser(h_epsilon_p()); } + static inline Parser LengthValue(Parser length, Parser value) { return Parser(h_length_value(length.parser, value.parser)); } + + static inline Parser And(Parser p) { return Parser(h_and(p.parser)); } + static inline Parser Not(Parser p) { return Parser(h_not(p.parser)); } + + class Indirect : public Parser { + public: + Indirect() : Parser(h_indirect()) {} + void bind(Parser p) { + h_bind_indirect((HParser*)parser, p.parser); + } + }; +} + +#pragma GCC diagnostic pop +#endif diff --git a/src/bindings/cpp/hammer/hammer_test.hpp b/src/bindings/cpp/hammer/hammer_test.hpp new file mode 100644 index 0000000..e9f5f26 --- /dev/null +++ b/src/bindings/cpp/hammer/hammer_test.hpp @@ -0,0 +1,54 @@ +#ifndef HAMMER_HAMMER_TEST__HPP +#define HAMMER_HAMMER_TEST__HPP +#include + +#include +#include + +#define HAMMER_DECL_UNUSED H_GCC_ATTRIBUTE((unused)) + +static ::testing::AssertionResult ParseFails(hammer::Parser parser, + const std::string &input) HAMMER_DECL_UNUSED; +static ::testing::AssertionResult ParseFails(hammer::Parser parser, + const std::string &input) { + hammer::ParseResult result = parser.parse(input); + if (result) { + return ::testing::AssertionFailure() << "Parse succeeded with " << result.asUnambiguous() << "; expected failure"; + } else { + return ::testing::AssertionSuccess(); + } +} + +static ::testing::AssertionResult ParsesOK(hammer::Parser parser, + const std::string &input) HAMMER_DECL_UNUSED; +static ::testing::AssertionResult ParsesOK(hammer::Parser parser, + const std::string &input) { + hammer::ParseResult result = parser.parse(input); + if (!result) { + return ::testing::AssertionFailure() << "Parse failed; expected success"; + } else { + return ::testing::AssertionSuccess(); + } +} + +static ::testing::AssertionResult ParsesTo(hammer::Parser parser, + const std::string &input, + const std::string &expected_result) HAMMER_DECL_UNUSED; +static ::testing::AssertionResult ParsesTo(hammer::Parser parser, + const std::string &input, + const std::string &expected_result) { + hammer::ParseResult result = parser.parse(input); + if (!result) { + return ::testing::AssertionFailure() << "Parse failed; expected success"; + } else if (result.asUnambiguous() != expected_result) { + return ::testing::AssertionFailure() + << "Parse succeeded with wrong result: got " + << result.asUnambiguous() + << "; expected " + << expected_result; + } else { + return ::testing::AssertionSuccess(); + } +} + +#endif // defined(HAMMER_HAMMER_TEST__HPP) From df89707d1a701b9a88589d3ea81f2c46f34ee0cc Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Fri, 27 Mar 2026 13:09:50 -0400 Subject: [PATCH 05/14] Add full language binding tests to pipeline --- .github/workflows/pipeline.yml | 5 ++++- DEVELOPMENT.md | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 9ae24fb..1b926ab 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -30,11 +30,14 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y scons gcc libglib2.0-dev pkg-config + sudo apt-get install -y scons gcc libglib2.0-dev pkg-config swig default-jdk libgtest-dev python3-setuptools - name: Run tests run: scons test + - name: Run binding tests + run: scons bindings=all test + build-deb: runs-on: ubuntu-latest needs: [clang-format, scons-test] diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 53af720..1f0d6c8 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -71,7 +71,7 @@ pip install setuptools Build and test all language bindings: ```bash -scons bindings=python,java,cpp test +scons bindings=all test ``` To target a specific binding, pass it individually and use its alias (`testpython`, `testjava`, or `testcpp`): From b176d2612beb8749ddd72def3d33a425117b2174 Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Fri, 27 Mar 2026 15:48:59 -0400 Subject: [PATCH 06/14] Refactor testing output with sections for core and language binding tests as well as summary stats --- SConstruct | 40 +++++++++++++++ src/SConscript | 25 ++++++--- src/bindings/cpp/SConscript | 22 ++++++-- src/bindings/java/SConscript | 11 +++- src/bindings/python/SConscript | 26 ++++++---- tools/test_reporter.py | 92 ++++++++++++++++++++++++++++++++++ 6 files changed, 191 insertions(+), 25 deletions(-) create mode 100644 tools/test_reporter.py diff --git a/SConstruct b/SConstruct index 86ffbed..6f08525 100644 --- a/SConstruct +++ b/SConstruct @@ -191,12 +191,16 @@ env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) # env.Append(CPPPATH=os.path.join('#', 'hammer')) testruns = [] +binding_results = [] # [(display_name, results_file_path), ...] +binding_test_stamps = [] # [SCons node of each binding test stamp, ...] targets = ["$libpath", "$incpath", "$parsersincpath", "$backendsincpath", "$pkgconfigpath"] Export("env") Export("testruns") Export("targets") +Export("binding_results") +Export("binding_test_stamps") if not GetOption("in_place"): env["BUILD_BASE"] = "build/$VARIANT" @@ -212,6 +216,42 @@ else: for testrun in testruns: env.Alias("test", testrun) +if binding_results: + _br = list(binding_results) + + def _print_summary(target, source, env, br=_br): + rows = [] + for name, rf in br: + try: + parts = open(env.subst(rf)).read().strip().split() + rows.append((name, int(parts[1]), int(parts[2]))) + except Exception: + pass + if not rows: + return 0 + w = max(len(r[0]) for r in rows) + tp = sum(r[1] for r in rows) + tf = sum(r[2] for r in rows) + print("\nTest Results:") + for name, p, f in rows: + t = p + f + status = "passed" if not f else "FAILED" + print(f" {name:<{w}} : {p}/{t} {status}") + print(" " + "\u2500" * (w + 18)) + total = tp + tf + status = "passed" if not tf else "FAILED" + print(f" {'Total':<{w}} : {tp}/{total} {status}\n") + return 0 + + build_base = env.subst("$BUILD_BASE") + summary = env.Command( + os.path.join(build_base, "src", "binding_tests_summary.stamp"), + binding_test_stamps, + Action(_print_summary, strfunction=lambda *a: ""), + ) + AlwaysBuild(summary) + env.Alias("test", summary) + # Add gcov target to generate coverage files in build directory if GetOption("coverage"): build_base = env.subst("$BUILD_BASE") diff --git a/src/SConscript b/src/SConscript index 29a2d94..bb0a9c2 100644 --- a/src/SConscript +++ b/src/SConscript @@ -5,8 +5,9 @@ from __future__ import absolute_import, division, print_function import glob import os import re +import sys -Import("env testruns") +Import("env testruns binding_results binding_test_stamps") dist_headers = [ "hammer.h", @@ -211,16 +212,24 @@ if GetOption("with_tests"): if os.path.basename(_libdir) == "build": _libdir = os.path.dirname(_libdir) _exe = ctestexec[0].path - _run_and_summarize = ( - "tmp=$$(mktemp); env LD_LIBRARY_PATH=" - + _libdir - + " " - + _exe - + ' > "$$tmp"; status=$$?; cat "$$tmp"; rm "$$tmp"; exit $$status' + reporter = os.path.join(Dir("#").abspath, "tools", "test_reporter.py") + core_results_dir = os.path.join(Dir("#").abspath, env.subst("$BUILD_BASE"), "src") + core_results_file = os.path.join(core_results_dir, "hammer_core.results") + ctestexec_stamp = testenv.Command( + "core_tests.stamp", + [ctestexec], + [ + "LD_LIBRARY_PATH=%s %s %s --binding Core --results-file %s -- %s" + % (_libdir, sys.executable, reporter, core_results_file, _exe), + "touch $TARGET", + ], ) - ctest = Alias("testc", [ctestexec], _run_and_summarize) + AlwaysBuild(ctestexec_stamp) + ctest = Alias("testc", [ctestexec_stamp], ctestexec_stamp) AlwaysBuild(ctest) testruns.append(ctest) + binding_results.append(("Core", core_results_file)) + binding_test_stamps.append(ctestexec_stamp[0]) Export("libhammer_static libhammer_shared") diff --git a/src/bindings/cpp/SConscript b/src/bindings/cpp/SConscript index dff9979..6bc4877 100644 --- a/src/bindings/cpp/SConscript +++ b/src/bindings/cpp/SConscript @@ -7,8 +7,9 @@ import glob as _glob import os import shutil import subprocess +import sys -Import("env libhammer_shared testruns targets") +Import("env libhammer_shared testruns targets binding_results binding_test_stamps") if libhammer_shared is None: print("Warning: C++ bindings require the shared library (not available in coverage/gprof builds). Skipping.") @@ -99,13 +100,24 @@ cpp_test = cppenv.Program( ) Default(cpp_test) -cpptest = Alias( - "testcpp", +reporter = os.path.join(Dir("#").abspath, "tools", "test_reporter.py") +cppdir = os.path.join(Dir("#").abspath, env["BUILD_BASE"], "src/bindings/cpp") +results_file = os.path.join(cppdir, "hammer_cpp.results") + +cpptestexec = cppenv.Command( + "hammer_tests.stamp", [cpp_test], - "env LD_LIBRARY_PATH=%s %s" % (hammer_lib_dir, cpp_test[0].path), + [ + "LD_LIBRARY_PATH=%s %s %s --binding C++ --results-file %s -- %s" + % (hammer_lib_dir, sys.executable, reporter, results_file, cpp_test[0].path), + "touch $TARGET", + ], ) -AlwaysBuild(cpptest) +cpptest = Alias("testcpp", [cpptestexec], cpptestexec) +AlwaysBuild(cpptestexec) testruns.append(cpptest) +binding_results.append(("C++", results_file)) +binding_test_stamps.append(cpptestexec[0]) cppinstall = Alias( "installcpp", diff --git a/src/bindings/java/SConscript b/src/bindings/java/SConscript index daec1b0..4e14d6e 100644 --- a/src/bindings/java/SConscript +++ b/src/bindings/java/SConscript @@ -7,7 +7,7 @@ import os import sys import shutil -Import("env libhammer_shared testruns targets") +Import("env libhammer_shared testruns targets binding_results binding_test_stamps") if libhammer_shared is None: print("Warning: Java bindings require the shared library (not available in coverage/gprof builds). Skipping.") @@ -96,6 +96,9 @@ javaenv.Depends(jar_file, jni_lib) Default(jar_file) # Run the Java test suite. +reporter = os.path.join(Dir("#").abspath, "tools", "test_reporter.py") +results_file = os.path.join(javadir, "hammer_java.results") + javatestenv = javaenv.Clone() javatestenv["ENV"]["LD_LIBRARY_PATH"] = hammer_lib_dir + ":" + javadir javatestexec = javatestenv.Command( @@ -105,7 +108,9 @@ javatestexec = javatestenv.Command( "javac -cp " + os.path.join(javadir, "hammer.jar") + " -d " + java_classes_dir + " $SOURCE", "LD_LIBRARY_PATH=" + hammer_lib_dir + ":" + javadir + - " java" + " " + sys.executable + " " + reporter + + " --binding Java --results-file " + results_file + + " -- java" " -Djava.library.path=" + javadir + " -cp " + java_classes_dir + ":" + os.path.join(javadir, "hammer.jar") + " HammerTests", @@ -115,6 +120,8 @@ javatestexec = javatestenv.Command( javatest = Alias("testjava", [javatestexec], javatestexec) AlwaysBuild(javatestexec) testruns.append(javatest) +binding_results.append(("Java", results_file)) +binding_test_stamps.append(javatestexec[0]) javainstallexec = javaenv.Command( None, jar_file, diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript index 3e9b8c8..23320d7 100644 --- a/src/bindings/python/SConscript +++ b/src/bindings/python/SConscript @@ -5,8 +5,9 @@ from __future__ import absolute_import, division, print_function import os import shutil +import sys -Import("env libhammer_shared testruns targets") +Import("env libhammer_shared testruns targets binding_results binding_test_stamps") if libhammer_shared is None: print("Warning: Python bindings require the shared library (not available in coverage/gprof builds). Skipping.") @@ -29,24 +30,29 @@ libhammer_python = pythonenv.Command( ) Default(libhammer_python) +reporter = os.path.join(Dir("#").abspath, "tools", "test_reporter.py") +project_root = Dir("#").abspath +abs_pydir = os.path.join(project_root, env["BUILD_BASE"], "src/bindings/python") +results_file = os.path.join(abs_pydir, "hammer_python.results") +lib_dir = os.path.dirname(str(libhammer_shared[0])) + pytestenv = pythonenv.Clone() -pytestenv["ENV"]["LD_LIBRARY_PATH"] = os.path.dirname(str(libhammer_shared[0])) -pytestenv["ENV"]["PYTHONPATH"] = pydir pytests = ["hammer_tests.py"] pytestexec = pytestenv.Command( "hammer_tests.stamp", pytests + list(libhammer_python), - "LD_LIBRARY_PATH=%s PYTHONPATH=%s %s -m unittest discover -s %s -p 'hammer_tests.py' && touch $TARGET" - % ( - os.path.dirname(str(libhammer_shared[0])), - pydir, - env["python"], - pydir, - ), + [ + "LD_LIBRARY_PATH=%s PYTHONPATH=%s %s %s --binding Python --results-file %s" + " -- %s -m unittest discover -s %s -p 'hammer_tests.py'" + % (lib_dir, abs_pydir, sys.executable, reporter, results_file, env["python"], abs_pydir), + "touch $TARGET", + ], ) pytest = Alias("testpython", [pytestexec], pytestexec) AlwaysBuild(pytestexec) testruns.append(pytest) +binding_results.append(("Python", results_file)) +binding_test_stamps.append(pytestexec[0]) pyinstallexec = pythonenv.Command( None, libhammer_python, "%s %s install" % (env["python"], pysetup) diff --git a/tools/test_reporter.py b/tools/test_reporter.py new file mode 100644 index 0000000..3aa4737 --- /dev/null +++ b/tools/test_reporter.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +""" +Runs a language binding test, streams its output, parses pass/fail counts, +and writes the results to a file for the final summary. + +Usage: + python3 test_reporter.py --binding NAME --results-file PATH -- CMD [ARGS...] +""" +import argparse +import re +import subprocess +import sys + + +def _parse(binding, text): + b = binding.lower() + if b == "core": + # TAP format (GLib 2.38+ default): "ok N /path" / "not ok N /path" + tap_ok = len(re.findall(r"^ok \d+", text, re.MULTILINE)) + tap_fail = len(re.findall(r"^not ok \d+", text, re.MULTILINE)) + if tap_ok + tap_fail > 0: + return tap_ok, tap_fail + # GLib non-TAP summary: "OK, N passed; N skipped" or "FAIL, N passed, N failed; ..." + m = re.search(r"(?:OK|FAIL),\s*(\d+) (?:tests )?passed(?:.*?(\d+) (?:tests )?failed)?", text, re.DOTALL) + if m: + return int(m.group(1)), int(m.group(2) or 0) + # Last resort: count individual result lines + ok = len(re.findall(r":\s+OK\b", text)) + fail = len(re.findall(r":\s+FAIL\b", text)) + return ok, fail + elif b == "python": + m = re.search(r"Ran (\d+) test", text) + n = int(m.group(1)) if m else 0 + failures = sum( + int(m2.group(1)) + for pat in (r"failures=(\d+)", r"errors=(\d+)") + for m2 in (re.search(pat, text),) + if m2 + ) + return n - failures, failures + elif b == "java": + m = re.search(r"Results: (\d+) passed, (\d+) failed", text) + return (int(m.group(1)), int(m.group(2))) if m else (0, 0) + elif b in ("c++", "cpp"): + mp = re.search(r"\[ PASSED \] (\d+) test", text) + mf = re.search(r"\[ FAILED \] (\d+) test", text) + return (int(mp.group(1)) if mp else 0, int(mf.group(1)) if mf else 0) + return 0, 0 + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--binding", required=True, help="Display name for this binding") + ap.add_argument("--results-file", required=True, help="Path to write pass/fail counts") + ap.add_argument("cmd", nargs=argparse.REMAINDER) + args = ap.parse_args() + + cmd = args.cmd[1:] if args.cmd and args.cmd[0] == "--" else args.cmd + if not cmd: + print("test_reporter: no command given", file=sys.stderr) + sys.exit(1) + + if args.binding.lower() == "core": + header = "── Core Tests ──" + else: + header = f"── {args.binding} Bindings ──" + print(f"\n\033[1m{header}\033[0m\n", flush=True) + + lines = [] + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + ) + for line in proc.stdout: + sys.stdout.write(line) + sys.stdout.flush() + lines.append(line) + proc.wait() + + output = "".join(lines) + passed, failed = _parse(args.binding, output) + + with open(args.results_file, "w") as f: + f.write(f"{args.binding} {passed} {failed}\n") + + sys.exit(proc.returncode) + + +if __name__ == "__main__": + main() From c68a0605b69a2f229175df33b8f93575f25ec007 Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Fri, 27 Mar 2026 16:11:25 -0400 Subject: [PATCH 07/14] Fix tests by replacing em dahses --- src/bindings/java/HammerTests.java | 4 ++-- src/bindings/swig/hammer.i | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/bindings/java/HammerTests.java b/src/bindings/java/HammerTests.java index fbc54a2..f5c3eda 100644 --- a/src/bindings/java/HammerTests.java +++ b/src/bindings/java/HammerTests.java @@ -12,7 +12,7 @@ public class HammerTests { System.loadLibrary("hammer_jni"); } - // Token type constants — mirror of HTokenType_ in hammer.h. + // Token type constants - mirror of HTokenType_ in hammer.h. static final int TT_NONE = 1; static final int TT_BYTES = 2; static final int TT_SINT = 4; @@ -42,7 +42,7 @@ static void assertNotNull(String name, Object obj) { static void assertEqual(String name, long expected, long actual) { if (expected != actual) { failed++; - System.err.println("FAIL: " + name + " — expected " + expected + ", got " + actual); + System.err.println("FAIL: " + name + " - expected " + expected + ", got " + actual); } else { passed++; } diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i index a34f3d0..54c4b04 100644 --- a/src/bindings/swig/hammer.i +++ b/src/bindings/swig/hammer.i @@ -173,7 +173,7 @@ (const uint8_t* charset, size_t length) } -// uint8_t as short — Java's byte is signed; short avoids sign-extension confusion. +// uint8_t as short - Java's byte is signed; short avoids sign-extension confusion. %typemap(jni) uint8_t "jshort" %typemap(jtype) uint8_t "short" %typemap(jstype) uint8_t "short" @@ -182,7 +182,7 @@ %typemap(out) uint8_t { $result = (jshort)$1; } %typemap(javaout) uint8_t { return (short)($jnicall & 0xff); } -// void*[] (NULL-terminated parser array) — Java side passes HParser_[], marshalled via long[]. +// void*[] (NULL-terminated parser array) - Java side passes HParser_[], marshalled via long[]. %typemap(jni) void*[] "jlongArray" %typemap(jtype) void*[] "long[]" %typemap(jstype) void*[] "HParser[]" @@ -217,7 +217,7 @@ // Suppress GCC attributes that SWIG cannot parse. #define __attribute__(x) -// Ignore va_list variants — SWIG cannot generate correct wrappers for va_list parameters. +// Ignore va_list variants - SWIG cannot generate correct wrappers for va_list parameters. %ignore h_sequence__v; %ignore h_sequence__mv; %ignore h_drop_from___v; @@ -227,7 +227,7 @@ %ignore h_permutation__v; %ignore h_permutation__mv; -// Ignore varargs variants — Python uses the __a (array) variants instead. +// Ignore varargs variants - Python uses the __a (array) variants instead. // Without this SWIG generates wrappers that call these sentinel-terminated functions // without the required NULL terminator, causing -Wmissing-sentinel warnings. %ignore h_sequence; @@ -462,7 +462,7 @@ def int64(): return _h_int64() } %extend HParsedToken_ { - /* Token type as int — compare against TT_NONE, TT_BYTES, TT_SINT, TT_UINT, TT_SEQUENCE. */ + /* Token type as int - compare against TT_NONE, TT_BYTES, TT_SINT, TT_UINT, TT_SEQUENCE. */ int tokenType() { return (int)$self->token_type; } From 035e0ce877ee649c8dacfd4450cf58ddd31634c2 Mon Sep 17 00:00:00 2001 From: Justin Jones Date: Tue, 31 Mar 2026 14:53:56 -0400 Subject: [PATCH 08/14] Fix scons javainstall --- src/bindings/java/SConscript | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/bindings/java/SConscript b/src/bindings/java/SConscript index 4e14d6e..04b28fa 100644 --- a/src/bindings/java/SConscript +++ b/src/bindings/java/SConscript @@ -123,9 +123,8 @@ testruns.append(javatest) binding_results.append(("Java", results_file)) binding_test_stamps.append(javatestexec[0]) -javainstallexec = javaenv.Command( - None, jar_file, - "install -m 644 " + os.path.join(javadir, "hammer.jar") + " /usr/local/share/java/", -) -javainstall = Alias("installjava", [javainstallexec], javainstallexec) +jar_install = javaenv.Install('$prefix/share/java', jar_file) +jni_install = javaenv.Install('$prefix/lib', jni_lib) + +javainstall = Alias("installjava", [jar_install, jni_install]) targets.append(javainstall) From 820446df85de1ec3cf43f78dfeccaa34a04cebc2 Mon Sep 17 00:00:00 2001 From: Justin Jones Date: Tue, 31 Mar 2026 15:58:02 -0400 Subject: [PATCH 09/14] fix spelling mistake --- src/t_misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/t_misc.c b/src/t_misc.c index 6102531..d99278e 100644 --- a/src/t_misc.c +++ b/src/t_misc.c @@ -29,7 +29,7 @@ static void test_tt_registry(void) { g_test_message("Unknown token type should not return a name"); g_test_fail(); } - g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unkown_token_type"), ==, + g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unknown_token_type"), ==, 0); } From 2c4c6267335d0a1d578c58d5e1d4ad26add4367d Mon Sep 17 00:00:00 2001 From: Mahmoud Elbasiouny Date: Wed, 1 Apr 2026 15:15:15 -0400 Subject: [PATCH 10/14] Remove Java static initializer from docs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/bindings/cpp/README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/bindings/cpp/README.md b/src/bindings/cpp/README.md index 3b50a9a..f290843 100644 --- a/src/bindings/cpp/README.md +++ b/src/bindings/cpp/README.md @@ -38,10 +38,6 @@ g++ -std=c++14 -I/usr/local/include/hammer -o myparser myparser.cpp -lhammer #include #include -static { - System.loadLibrary("hammer_jni"); -} - int main() { using namespace hammer; From 8371e3067e8fd4f9ae7ba480aace0c0f94e4a51f Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Mon, 30 Mar 2026 23:43:27 -0400 Subject: [PATCH 11/14] Make ParseResult move-only to prevent double-free on copy --- src/bindings/cpp/hammer/hammer.hpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/bindings/cpp/hammer/hammer.hpp b/src/bindings/cpp/hammer/hammer.hpp index 4a1096c..608caea 100644 --- a/src/bindings/cpp/hammer/hammer.hpp +++ b/src/bindings/cpp/hammer/hammer.hpp @@ -67,6 +67,18 @@ namespace hammer { ParseResult(HParseResult *result) : _result(result) {} + ParseResult(const ParseResult&) = delete; + ParseResult& operator=(const ParseResult&) = delete; + ParseResult(ParseResult&& other) noexcept : _result(other._result) { other._result = nullptr; } + ParseResult& operator=(ParseResult&& other) noexcept { + if (this != &other) { + h_parse_result_free(_result); + _result = other._result; + other._result = nullptr; + } + return *this; + } + ParsedToken getAST() { return ParsedToken(_result->ast); } From 49c9d5a298aeeae5d6e6a03ee4982fcb6cb03534 Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Tue, 31 Mar 2026 21:37:24 -0400 Subject: [PATCH 12/14] Added tests for each introduced binding combinator --- src/bindings/cpp/cpp_tests.cpp | 68 +++++ src/bindings/cpp/hammer/hammer.hpp | 44 +++ src/bindings/java/HammerTests.java | 473 +++++++++++++++++++++++++++-- 3 files changed, 558 insertions(+), 27 deletions(-) diff --git a/src/bindings/cpp/cpp_tests.cpp b/src/bindings/cpp/cpp_tests.cpp index 64a18cf..45a8130 100644 --- a/src/bindings/cpp/cpp_tests.cpp +++ b/src/bindings/cpp/cpp_tests.cpp @@ -353,6 +353,74 @@ namespace { EXPECT_TRUE(ParsesTo(p, "aaa", "(u0x61 (u0x61 (u0x61)))")); } + TEST(ParserTypes, Bytes) { + Parser p = Bytes(2); + EXPECT_TRUE(ParsesTo(p, "ab", "<61.62>")); + EXPECT_TRUE(ParseFails(p, "a")); + EXPECT_TRUE(ParseFails(p, "")); + } + + TEST(ParserTypes, Permutation) { + Parser p = Permutation(Ch('a'), Ch('b'), NULL); + EXPECT_TRUE(ParsesTo(p, "ab", "(u0x61 u0x62)")); + EXPECT_TRUE(ParsesTo(p, "ba", "(u0x61 u0x62)")); + EXPECT_TRUE(ParseFails(p, "aa")); + EXPECT_TRUE(ParseFails(p, "b")); + } + + TEST(ParserTypes, DropFrom) { + Parser seq = Sequence(Ch('a'), Ch('b'), Ch('c'), NULL); + Parser p = DropFrom(seq, 1, -1); + EXPECT_TRUE(ParsesTo(p, "abc", "(u0x61 u0x63)")); + EXPECT_TRUE(ParseFails(p, "ab")); + } + + TEST(ParserTypes, WithEndianness) { + Parser p = WithEndianness(BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN, Uint16()); + EXPECT_TRUE(ParsesTo(p, std::string("\x01\x00", 2), "u0x1")); + EXPECT_TRUE(ParseFails(p, "\x01")); + } + + TEST(ParserTypes, PutGetValue) { + Parser p = Sequence(PutValue(Ch('a'), "c"), GetValue("c"), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61 u0x61)")); + EXPECT_TRUE(ParseFails(p, "b")); + } + + TEST(ParserTypes, FreeValue) { + Parser p = Sequence(PutValue(Ch('a'), "c"), FreeValue("c"), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61 u0x61)")); + EXPECT_TRUE(ParseFails(p, "b")); + } + + static HParser* bind_next(HAllocator *mm__, const HParsedToken *x, void *env) { + (void)mm__; (void)env; (void)x; + return h_ch('b'); + } + + TEST(ParserTypes, Bind) { + Parser p = Bind(Ch('a'), bind_next); + EXPECT_TRUE(ParsesTo(p, "ab", "u0x62")); + EXPECT_TRUE(ParseFails(p, "ac")); + EXPECT_TRUE(ParseFails(p, "b")); + } + + TEST(ParserTypes, Skip) { + Parser p = Sequence(Skip(8), Ch('b'), NULL); + EXPECT_TRUE(ParsesTo(p, "ab", "(u0x62)")); + EXPECT_TRUE(ParseFails(p, "b")); + } + + TEST(ParserTypes, Tell) { + Parser p = Sequence(Ch('a'), Tell(), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61 u0x8)")); + } + + TEST(ParserTypes, Seek) { + Parser p = Sequence(Ch('a'), Seek(0, SEEK_SET), Ch('a'), NULL); + EXPECT_TRUE(ParsesTo(p, "a", "(u0x61 u0 u0x61)")); + } + }; int main(int argc, char** argv) { diff --git a/src/bindings/cpp/hammer/hammer.hpp b/src/bindings/cpp/hammer/hammer.hpp index 608caea..106be41 100644 --- a/src/bindings/cpp/hammer/hammer.hpp +++ b/src/bindings/cpp/hammer/hammer.hpp @@ -213,6 +213,50 @@ namespace hammer { h_bind_indirect((HParser*)parser, p.parser); } }; + + static inline Parser Bytes(size_t len) { return Parser(h_bytes(len)); } + + static inline Parser Permutation(Parser p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__v(*(HParser**)(void*)&p, ap); + va_end(ap); + return Parser(ret); + } + + // Indices to drop are terminated by -1, e.g. DropFrom(seq, 0, 2, -1). + static inline Parser DropFrom(Parser p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_drop_from___v(*(HParser**)(void*)&p, ap); + va_end(ap); + return Parser(ret); + } + + static inline Parser WithEndianness(char endianness, Parser p) { + return Parser(h_with_endianness(endianness, p.parser)); + } + + static inline Parser PutValue(Parser p, const char *name) { + return Parser(h_put_value(p.parser, name)); + } + static inline Parser GetValue(const char *name) { + return Parser(h_get_value(name)); + } + static inline Parser FreeValue(const char *name) { + return Parser(h_free_value(name)); + } + + static inline Parser Bind(Parser p, HContinuation k, void *env) { + return Parser(h_bind(p.parser, k, env)); + } + static inline Parser Bind(Parser p, HContinuation k) { + return Parser(h_bind(p.parser, k, NULL)); + } + + static inline Parser Skip(size_t n) { return Parser(h_skip(n)); } + static inline Parser Seek(ssize_t offset, int whence) { return Parser(h_seek(offset, whence)); } + static inline Parser Tell() { return Parser(h_tell()); } } #pragma GCC diagnostic pop diff --git a/src/bindings/java/HammerTests.java b/src/bindings/java/HammerTests.java index f5c3eda..235fc02 100644 --- a/src/bindings/java/HammerTests.java +++ b/src/bindings/java/HammerTests.java @@ -83,6 +83,28 @@ static void testChRange() { assertNull("ch_range:fail", p.parse(new byte[]{(byte)'d'})); } + static void testInt8() { + HParser p = hammer.h_int8(); + + HParseResult r = p.parse(new byte[]{(byte)0x88}); + assertNotNull("int8:success", r); + assertTrue("int8:type", r.getAst().tokenType() == TT_SINT); + assertEqual("int8:value", -0x78L, r.getAst().sintValue()); + + assertNull("int8:fail", p.parse(new byte[]{})); + } + + static void testInt16() { + HParser p = hammer.h_int16(); + + HParseResult r = p.parse(new byte[]{(byte)0xfe, (byte)0x00}); + assertNotNull("int16:success", r); + assertTrue("int16:type", r.getAst().tokenType() == TT_SINT); + assertEqual("int16:value", -0x200L, r.getAst().sintValue()); + + assertNull("int16:fail", p.parse(new byte[]{(byte)0xfe})); + } + static void testInt64() { HParser p = hammer.h_int64(); byte[] input = {(byte)0xff,(byte)0xff,(byte)0xff,(byte)0xfe, @@ -108,19 +130,25 @@ static void testInt32() { assertNull("int32:fail", p.parse(new byte[]{(byte)0xff,(byte)0xfe,(byte)0x00})); } - static void testUint64() { - HParser p = hammer.h_uint64(); - byte[] input = {(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, - (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x00}; + static void testUint8() { + HParser p = hammer.h_uint8(); - HParseResult r = p.parse(input); - assertNotNull("uint64:success", r); - assertTrue("uint64:type", r.getAst().tokenType() == TT_UINT); - assertEqual("uint64:value", 0x200000000L, r.getAst().uintValue()); + HParseResult r = p.parse(new byte[]{(byte)0x78}); + assertNotNull("uint8:success", r); + assertEqual("uint8:value", 0x78L, r.getAst().uintValue()); - assertNull("uint64:fail", p.parse(new byte[]{ - (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, - (byte)0x00,(byte)0x00,(byte)0x00})); + assertNull("uint8:fail", p.parse(new byte[]{})); + } + + static void testUint16() { + HParser p = hammer.h_uint16(); + + HParseResult r = p.parse(new byte[]{(byte)0x02, (byte)0x00}); + assertNotNull("uint16:success", r); + assertTrue("uint16:type", r.getAst().tokenType() == TT_UINT); + assertEqual("uint16:value", 0x200L, r.getAst().uintValue()); + + assertNull("uint16:fail", p.parse(new byte[]{(byte)0x02})); } static void testUint32() { @@ -133,14 +161,19 @@ static void testUint32() { assertNull("uint32:fail", p.parse(new byte[]{(byte)0x00,(byte)0x02,(byte)0x00})); } - static void testUint8() { - HParser p = hammer.h_uint8(); + static void testUint64() { + HParser p = hammer.h_uint64(); + byte[] input = {(byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, + (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x00}; - HParseResult r = p.parse(new byte[]{(byte)0x78}); - assertNotNull("uint8:success", r); - assertEqual("uint8:value", 0x78L, r.getAst().uintValue()); + HParseResult r = p.parse(input); + assertNotNull("uint64:success", r); + assertTrue("uint64:type", r.getAst().tokenType() == TT_UINT); + assertEqual("uint64:value", 0x200000000L, r.getAst().uintValue()); - assertNull("uint8:fail", p.parse(new byte[]{})); + assertNull("uint64:fail", p.parse(new byte[]{ + (byte)0x00,(byte)0x00,(byte)0x00,(byte)0x02, + (byte)0x00,(byte)0x00,(byte)0x00})); } static void testIntRange() { @@ -153,6 +186,84 @@ static void testIntRange() { assertNull("int_range:fail", p.parse(new byte[]{11})); } + static void testIn() { + HParser p = hammer.h_in(new byte[]{(byte)'a', (byte)'b', (byte)'c'}); + + HParseResult r = p.parse(new byte[]{(byte)'b'}); + assertNotNull("in:success", r); + assertTrue("in:type", r.getAst().tokenType() == TT_UINT); + assertEqual("in:value", 'b', r.getAst().uintValue()); + + assertNull("in:fail", p.parse(new byte[]{(byte)'d'})); + } + + static void testNotIn() { + HParser p = hammer.h_not_in(new byte[]{(byte)'a', (byte)'b', (byte)'c'}); + + HParseResult r = p.parse(new byte[]{(byte)'d'}); + assertNotNull("not_in:success", r); + assertTrue("not_in:type", r.getAst().tokenType() == TT_UINT); + assertEqual("not_in:value", 'd', r.getAst().uintValue()); + + assertNull("not_in:fail", p.parse(new byte[]{(byte)'a'})); + } + + static void testBytes() { + HParser p = hammer.h_bytes(2); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b'}); + assertNotNull("bytes:success", r); + assertTrue("bytes:type", r.getAst().tokenType() == TT_BYTES); + assertEqual("bytes:length", 2L, r.getAst().bytesLength()); + assertEqual("bytes:byte0", 'a', r.getAst().byteAt(0)); + assertEqual("bytes:byte1", 'b', r.getAst().byteAt(1)); + + assertNull("bytes:fail", p.parse(new byte[]{(byte)'a'})); + } + + static void testWhitespace() { + HParser p = hammer.h_whitespace(hammer.h_ch((short)'a')); + + assertNotNull("whitespace:success", p.parse(new byte[]{(byte)'a'})); + assertNotNull("whitespace:leading_space", p.parse(new byte[]{(byte)' ', (byte)'a'})); + assertNotNull("whitespace:leading_tab", p.parse(new byte[]{(byte)'\t', (byte)'a'})); + assertNull("whitespace:fail", p.parse(new byte[]{(byte)'_', (byte)'a'})); + } + + static void testLeft() { + HParser p = hammer.h_left(hammer.h_ch((short)'a'), hammer.h_ch((short)' ')); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)' '}); + assertNotNull("left:success", r); + assertEqual("left:value", 'a', r.getAst().uintValue()); + + assertNull("left:fail_no_right", p.parse(new byte[]{(byte)'a'})); + assertNull("left:fail", p.parse(new byte[]{(byte)'b', (byte)' '})); + } + + static void testRight() { + HParser p = hammer.h_right(hammer.h_ch((short)' '), hammer.h_ch((short)'a')); + + HParseResult r = p.parse(new byte[]{(byte)' ', (byte)'a'}); + assertNotNull("right:success", r); + assertEqual("right:value", 'a', r.getAst().uintValue()); + + assertNull("right:fail_no_left", p.parse(new byte[]{(byte)'a'})); + assertNull("right:fail", p.parse(new byte[]{(byte)'b', (byte)'a'})); + } + + static void testMiddle() { + HParser p = hammer.h_middle( + hammer.h_ch((short)' '), hammer.h_ch((short)'a'), hammer.h_ch((short)' ')); + + HParseResult r = p.parse(new byte[]{(byte)' ', (byte)'a', (byte)' '}); + assertNotNull("middle:success", r); + assertEqual("middle:value", 'a', r.getAst().uintValue()); + + assertNull("middle:fail_no_right", p.parse(new byte[]{(byte)' ', (byte)'a'})); + assertNull("middle:fail", p.parse(new byte[]{(byte)'a'})); + } + static void testSequence() { HParser p = hammer.h_sequence__a(new HParser[]{ hammer.h_ch((short)'a'), @@ -187,6 +298,42 @@ static void testChoice() { assertNull("choice:fail", p.parse(new byte[]{(byte)'c'})); } + static void testButNot() { + HParser p = hammer.h_butnot( + hammer.h_ch((short)'a'), + hammer.h_token(new byte[]{(byte)'a', (byte)'b'})); + + assertNotNull("butnot:success", p.parse(new byte[]{(byte)'a'})); + assertNull("butnot:fail", p.parse(new byte[]{(byte)'a', (byte)'b'})); + + HParser q = hammer.h_butnot( + hammer.h_ch_range((short)'0', (short)'9'), + hammer.h_ch((short)'6')); + + assertNotNull("butnot_range:success", q.parse(new byte[]{(byte)'4'})); + assertNull("butnot_range:fail", q.parse(new byte[]{(byte)'6'})); + } + + static void testDifference() { + HParser p = hammer.h_difference( + hammer.h_token(new byte[]{(byte)'a', (byte)'b'}), + hammer.h_ch((short)'a')); + + assertNotNull("difference:success", p.parse(new byte[]{(byte)'a', (byte)'b'})); + assertNull("difference:fail", p.parse(new byte[]{(byte)'a'})); + } + + static void testXor() { + HParser p = hammer.h_xor( + hammer.h_ch_range((short)'0', (short)'6'), + hammer.h_ch_range((short)'5', (short)'9')); + + assertNotNull("xor:success_0", p.parse(new byte[]{(byte)'0'})); + assertNotNull("xor:success_9", p.parse(new byte[]{(byte)'9'})); + assertNull("xor:fail_overlap", p.parse(new byte[]{(byte)'5'})); + assertNull("xor:fail_neither", p.parse(new byte[]{(byte)'a'})); + } + static void testMany() { HParser p = hammer.h_many(hammer.h_ch((short)'a')); @@ -209,14 +356,20 @@ static void testMany1() { assertNull("many1:fail_empty", p.parse(new byte[]{})); } - static void testEndP() { - HParser p = hammer.h_sequence__a(new HParser[]{ + static void testRepeatN() { + HParser p = hammer.h_repeat_n(hammer.h_choice__a(new HParser[]{ hammer.h_ch((short)'a'), - hammer.h_end_p(), - }); + hammer.h_ch((short)'b'), + }), 2); - assertNotNull("end_p:success", p.parse(new byte[]{(byte)'a'})); - assertNull("end_p:fail_trailing", p.parse(new byte[]{(byte)'a', (byte)'a'})); + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b', (byte)'c'}); + assertNotNull("repeat_n:success", r); + assertTrue("repeat_n:seq", r.getAst().tokenType() == TT_SEQUENCE); + assertEqual("repeat_n:count", 2L, r.getAst().seqLength()); + assertEqual("repeat_n:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("repeat_n:elem1", 'b', r.getAst().seqElement(1).uintValue()); + + assertNull("repeat_n:fail_wrong", p.parse(new byte[]{(byte)'a', (byte)'d'})); } static void testOptional() { @@ -239,6 +392,23 @@ static void testOptional() { assertNull("optional:fail", p.parse(new byte[]{(byte)'a',(byte)'e',(byte)'c'})); } + static void testIgnore() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_ignore(hammer.h_ch((short)'b')), + hammer.h_ch((short)'c'), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b', (byte)'c'}); + assertNotNull("ignore:success", r); + // h_ignore strips result from sequence + assertEqual("ignore:length", 2L, r.getAst().seqLength()); + assertEqual("ignore:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("ignore:elem1", 'c', r.getAst().seqElement(1).uintValue()); + + assertNull("ignore:fail", p.parse(new byte[]{(byte)'a', (byte)'c'})); + } + static void testSepBy() { HParser p = hammer.h_sepBy( hammer.h_choice__a(new HParser[]{ @@ -258,25 +428,274 @@ static void testSepBy() { assertEqual("sepBy:empty_count", 0L, r0.getAst().seqLength()); } + static void testSepBy1() { + HParser p = hammer.h_sepBy1( + hammer.h_choice__a(new HParser[]{ + hammer.h_ch((short)'1'), + hammer.h_ch((short)'2'), + hammer.h_ch((short)'3'), + }), + hammer.h_ch((short)',') + ); + + HParseResult r = p.parse(new byte[]{(byte)'1',(byte)',',(byte)'2',(byte)',',(byte)'3'}); + assertNotNull("sepBy1:success", r); + assertEqual("sepBy1:count", 3L, r.getAst().seqLength()); + + HParseResult r1 = p.parse(new byte[]{(byte)'3'}); + assertNotNull("sepBy1:single", r1); + assertEqual("sepBy1:single_count", 1L, r1.getAst().seqLength()); + + assertNull("sepBy1:fail_empty", p.parse(new byte[]{})); + } + + static void testEpsilonP() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_epsilon_p(), + hammer.h_ch((short)'b'), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b'}); + assertNotNull("epsilon_p:success", r); + // epsilon_p TT_NONE is stripped from sequence result + assertEqual("epsilon_p:length", 2L, r.getAst().seqLength()); + assertEqual("epsilon_p:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("epsilon_p:elem1", 'b', r.getAst().seqElement(1).uintValue()); + } + + static void testEndP() { + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_end_p(), + }); + + assertNotNull("end_p:success", p.parse(new byte[]{(byte)'a'})); + assertNull("end_p:fail_trailing", p.parse(new byte[]{(byte)'a', (byte)'a'})); + } + + static void testNothingP() { + HParser p = hammer.h_nothing_p(); + + assertNull("nothing_p:fail_char", p.parse(new byte[]{(byte)'a'})); + assertNull("nothing_p:fail_empty", p.parse(new byte[]{})); + } + + static void testPermutation() { + HParser p = hammer.h_permutation__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_ch((short)'b'), + }); + + HParseResult r1 = p.parse(new byte[]{(byte)'a', (byte)'b'}); + assertNotNull("permutation:ab", r1); + assertTrue("permutation:ab_seq", r1.getAst().tokenType() == TT_SEQUENCE); + assertEqual("permutation:ab_length", 2L, r1.getAst().seqLength()); + assertEqual("permutation:ab_elem0", 'a', r1.getAst().seqElement(0).uintValue()); + assertEqual("permutation:ab_elem1", 'b', r1.getAst().seqElement(1).uintValue()); + + // Result is always in argument order regardless of parse order + HParseResult r2 = p.parse(new byte[]{(byte)'b', (byte)'a'}); + assertNotNull("permutation:ba", r2); + assertEqual("permutation:ba_elem0", 'a', r2.getAst().seqElement(0).uintValue()); + assertEqual("permutation:ba_elem1", 'b', r2.getAst().seqElement(1).uintValue()); + + assertNull("permutation:fail_aa", p.parse(new byte[]{(byte)'a', (byte)'a'})); + } + + static void testLengthValue() { + // uint8 for length, then that many uint8 values + HParser p = hammer.h_length_value(hammer.h_uint8(), hammer.h_uint8()); + + HParseResult r = p.parse(new byte[]{3, (byte)'a', (byte)'b', (byte)'c'}); + assertNotNull("length_value:success", r); + assertTrue("length_value:seq", r.getAst().tokenType() == TT_SEQUENCE); + assertEqual("length_value:count", 3L, r.getAst().seqLength()); + + assertNull("length_value:fail", p.parse(new byte[]{3, (byte)'a', (byte)'b'})); + } + + static void testAnd() { + // h_and succeeds without consuming input + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_and(hammer.h_ch((short)'0')), + hammer.h_ch((short)'0'), + }); + HParseResult r = p.parse(new byte[]{(byte)'0'}); + assertNotNull("and:success", r); + // h_and TT_NONE is stripped; only Ch('0') remains + assertEqual("and:length", 1L, r.getAst().seqLength()); + assertEqual("and:value", '0', r.getAst().seqElement(0).uintValue()); + + // and fails if lookahead fails + HParser q = hammer.h_sequence__a(new HParser[]{ + hammer.h_and(hammer.h_ch((short)'0')), + hammer.h_ch((short)'1'), + }); + assertNull("and:lookahead_mismatch", q.parse(new byte[]{(byte)'0'})); + + // and does not consume; trailing input is parsed by next parser + HParser r2 = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'1'), + hammer.h_and(hammer.h_ch((short)'2')), + }); + HParseResult res = r2.parse(new byte[]{(byte)'1', (byte)'2'}); + assertNotNull("and:no_consume", res); + assertEqual("and:no_consume_length", 1L, res.getAst().seqLength()); + } + + static void testNot() { + // h_not succeeds if inner parser fails (negative lookahead) + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'+'), + hammer.h_not(hammer.h_ch((short)'+')), + }); + + assertNotNull("not:success", p.parse(new byte[]{(byte)'+', (byte)'x'})); + assertNull("not:fail", p.parse(new byte[]{(byte)'+', (byte)'+'})); + } + + static void testRightrec() { + // Recursive parser: a* via indirect + HParser p = hammer.h_indirect(); + HParser a = hammer.h_ch((short)'a'); + hammer.h_bind_indirect(p, hammer.h_choice__a(new HParser[]{ + hammer.h_sequence__a(new HParser[]{a, p}), + hammer.h_epsilon_p(), + })); + + assertNotNull("rightrec:a", p.parse(new byte[]{(byte)'a'})); + assertNotNull("rightrec:aa", p.parse(new byte[]{(byte)'a', (byte)'a'})); + assertNotNull("rightrec:aaa", p.parse(new byte[]{(byte)'a', (byte)'a', (byte)'a'})); + } + + static void testWithEndianness() { + // BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN = 0 | 2 = 2 + // In little-endian byte order, {0x01, 0x00} = 0x0001 = 1 + HParser p = hammer.h_with_endianness( + (char)(hammerConstants.BYTE_LITTLE_ENDIAN | hammerConstants.BIT_BIG_ENDIAN), + hammer.h_uint16()); + + HParseResult r = p.parse(new byte[]{(byte)0x01, (byte)0x00}); + assertNotNull("with_endianness:success", r); + assertEqual("with_endianness:value", 1L, r.getAst().uintValue()); + + assertNull("with_endianness:fail", p.parse(new byte[]{(byte)0x01})); + } + + static void testPutGetValue() { + // put_value stashes the result; get_value retrieves it + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_put_value(hammer.h_ch((short)'a'), "c"), + hammer.h_get_value("c"), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a'}); + assertNotNull("put_get_value:success", r); + assertTrue("put_get_value:seq", r.getAst().tokenType() == TT_SEQUENCE); + assertEqual("put_get_value:length", 2L, r.getAst().seqLength()); + assertEqual("put_get_value:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("put_get_value:elem1", 'a', r.getAst().seqElement(1).uintValue()); + + assertNull("put_get_value:fail", p.parse(new byte[]{(byte)'b'})); + } + + static void testFreeValue() { + // free_value retrieves and removes the stashed value + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_put_value(hammer.h_ch((short)'a'), "c"), + hammer.h_free_value("c"), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a'}); + assertNotNull("free_value:success", r); + assertEqual("free_value:length", 2L, r.getAst().seqLength()); + assertEqual("free_value:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("free_value:elem1", 'a', r.getAst().seqElement(1).uintValue()); + + assertNull("free_value:fail", p.parse(new byte[]{(byte)'b'})); + } + + static void testSkip() { + // h_skip consumes n bits without adding to the parse result + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_skip(8), + hammer.h_ch((short)'b'), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a', (byte)'b'}); + assertNotNull("skip:success", r); + // h_skip TT_NONE is stripped from sequence + assertEqual("skip:length", 1L, r.getAst().seqLength()); + assertEqual("skip:value", 'b', r.getAst().seqElement(0).uintValue()); + + // Only 8 bits available; skip consumes them all, leaving nothing for h_ch + assertNull("skip:fail", p.parse(new byte[]{(byte)'b'})); + } + + static void testTell() { + // h_tell reports the current bit position as TT_UINT + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_tell(), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a'}); + assertNotNull("tell:success", r); + assertTrue("tell:seq", r.getAst().tokenType() == TT_SEQUENCE); + // Tell's TT_UINT is included in the sequence + assertEqual("tell:length", 2L, r.getAst().seqLength()); + // After reading 'a' (8 bits), position is 8 + assertEqual("tell:position", 8L, r.getAst().seqElement(1).uintValue()); + } + // ------------------------------------------------------------------------- public static void main(String[] args) { testToken(); testCh(); testChRange(); - testInt64(); + testInt8(); + testInt16(); testInt32(); - testUint64(); - testUint32(); + testInt64(); testUint8(); + testUint16(); + testUint32(); + testUint64(); testIntRange(); + testIn(); + testNotIn(); + testBytes(); + testWhitespace(); + testLeft(); + testRight(); + testMiddle(); testSequence(); testChoice(); + testButNot(); + testDifference(); + testXor(); testMany(); testMany1(); - testEndP(); + testRepeatN(); testOptional(); + testIgnore(); testSepBy(); + testSepBy1(); + testEpsilonP(); + testEndP(); + testNothingP(); + testPermutation(); + testLengthValue(); + testAnd(); + testNot(); + testRightrec(); + testWithEndianness(); + testPutGetValue(); + testFreeValue(); + testSkip(); + testTell(); System.out.printf("Results: %d passed, %d failed%n", passed, failed); if (failed > 0) System.exit(1); From 9d422162eab1547a1b550a17d2cd4a81814d90af Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Mon, 6 Apr 2026 15:16:42 -0400 Subject: [PATCH 13/14] Address PR review feedback: fix SWIG memory leaks, build correctness, expand docs and tests --- src/SConscript | 2 + src/bindings/cpp/README.md | 28 ++++++--- src/bindings/cpp/SConscript | 20 +++---- src/bindings/cpp/cpp_tests.cpp | 8 ++- src/bindings/cpp/hammer/hammer.hpp | 2 +- src/bindings/java/HammerTests.java | 19 +++++- src/bindings/java/README.md | 95 +++++++++++++++--------------- src/bindings/python/README.md | 18 ++++++ src/bindings/swig/hammer.i | 17 +++++- tests/t_misc.c | 2 +- 10 files changed, 139 insertions(+), 72 deletions(-) diff --git a/src/SConscript b/src/SConscript index bb0a9c2..dad7175 100644 --- a/src/SConscript +++ b/src/SConscript @@ -234,4 +234,6 @@ if GetOption("with_tests"): Export("libhammer_static libhammer_shared") for b in env.get("bindings", []): + if b == "none": + continue env.SConscript(["bindings/%s/SConscript" % b]) diff --git a/src/bindings/cpp/README.md b/src/bindings/cpp/README.md index f290843..a502fba 100644 --- a/src/bindings/cpp/README.md +++ b/src/bindings/cpp/README.md @@ -95,18 +95,30 @@ All combinators live in the `hammer` namespace. | `Nothing()` | Always fail | | `Action(p, fn)` | Apply action function `fn` to parse result of `p` | | `AttrBool(p, pred)` | Accept result of `p` only if predicate `pred` is true | +| `LengthValue(length, value)` | Parse a length field then that many repetitions of `value` | +| `Permutation(p, ..., NULL)` | Match all parsers in any order; result in argument order | +| `DropFrom(seq, idx, ..., -1)` | Drop indexed elements from a sequence result | +| `WithEndianness(flags, p)` | Override byte/bit endianness for `p` | +| `PutValue(p, name)` | Parse `p` and store the result under `name` | +| `GetValue(name)` | Retrieve a previously stored value by `name` | +| `FreeValue(name)` | Retrieve and free a previously stored value by `name` | +| `Bind(p, k)` | Monadic bind: pass result of `p` to continuation `k` | +| `Skip(n)` | Consume `n` bits without producing a result | +| `Seek(offset, whence)` | Reposition the input stream (like `fseek`) | +| `Tell()` | Produce the current bit position as a `TT_UINT` token | ### Integer Parsers ```cpp -hammer::Uint8() // unsigned 8-bit -hammer::Uint16() // unsigned 16-bit, big-endian -hammer::Uint32() // unsigned 32-bit, big-endian -hammer::Uint64() // unsigned 64-bit, big-endian -hammer::Int8() // signed 8-bit -hammer::Int16() // signed 16-bit, big-endian -hammer::Int32() // signed 32-bit, big-endian -hammer::Int64() // signed 64-bit, big-endian +hammer::Uint8() // unsigned 8-bit +hammer::Uint16() // unsigned 16-bit, big-endian +hammer::Uint32() // unsigned 32-bit, big-endian +hammer::Uint64() // unsigned 64-bit, big-endian +hammer::Int8() // signed 8-bit +hammer::Int16() // signed 16-bit, big-endian +hammer::Int32() // signed 32-bit, big-endian +hammer::Int64() // signed 64-bit, big-endian +hammer::Bits(len, sign) // arbitrary-width integer, `sign` selects signed/unsigned ``` ### Inspecting Parse Results diff --git a/src/bindings/cpp/SConscript b/src/bindings/cpp/SConscript index 6bc4877..27f6ddb 100644 --- a/src/bindings/cpp/SConscript +++ b/src/bindings/cpp/SConscript @@ -90,8 +90,17 @@ if not gtest_found: gtest_found = True break +cppinstall = Alias( + "installcpp", + env.Install("$incpath", [ + "#src/bindings/cpp/hammer/hammer.hpp", + "#src/bindings/cpp/hammer/hammer_test.hpp", + ]), +) +targets.append(cppinstall) + if not gtest_found: - print("Warning: gtest not found. Install libgtest-dev to build C++ bindings. Skipping.") + print("Warning: gtest not found. Install libgtest-dev to run C++ binding tests. Skipping tests.") Return() cpp_test = cppenv.Program( @@ -118,12 +127,3 @@ AlwaysBuild(cpptestexec) testruns.append(cpptest) binding_results.append(("C++", results_file)) binding_test_stamps.append(cpptestexec[0]) - -cppinstall = Alias( - "installcpp", - env.Install("$incpath", [ - "#src/bindings/cpp/hammer/hammer.hpp", - "#src/bindings/cpp/hammer/hammer_test.hpp", - ]), -) -targets.append(cppinstall) diff --git a/src/bindings/cpp/cpp_tests.cpp b/src/bindings/cpp/cpp_tests.cpp index 45a8130..8b443c8 100644 --- a/src/bindings/cpp/cpp_tests.cpp +++ b/src/bindings/cpp/cpp_tests.cpp @@ -1,6 +1,7 @@ #include #include #include +#include // internal.h is not C++-compatible; forward-declare only what the tests need. extern "C" { @@ -34,6 +35,7 @@ namespace { Parser p = Int64(); EXPECT_TRUE(ParsesTo(p, std::string("\xff\xff\xff\xfe\x00\x00\x00\x00", 8), "s-0x200000000")); EXPECT_TRUE(ParseFails(p, std::string("\xff\xff\xff\xfe\x00\x00\x00", 7))); + EXPECT_TRUE(ParseFails(p, "")); } TEST(ParserTypes, Int32) { @@ -62,23 +64,27 @@ namespace { Parser p = Uint64(); EXPECT_TRUE(ParsesTo(p, std::string("\x00\x00\x00\x02\x00\x00\x00\x00", 8), "u0x200000000")); EXPECT_TRUE(ParseFails(p, std::string("\x00\x00\x00\x02\x00\x00\x00", 7))); + EXPECT_TRUE(ParseFails(p, "")); } TEST(ParserTypes, Uint32) { Parser p = Uint32(); EXPECT_TRUE(ParsesTo(p, std::string("\x00\x02\x00\x00", 4), "u0x20000")); EXPECT_TRUE(ParseFails(p, std::string("\x00\x02\x00", 3))); + EXPECT_TRUE(ParseFails(p, "")); } TEST(ParserTypes, Uint16) { Parser p = Uint16(); EXPECT_TRUE(ParsesTo(p, std::string("\x02\x00", 2), "u0x200")); EXPECT_TRUE(ParseFails(p, "\x02")); + EXPECT_TRUE(ParseFails(p, "")); } TEST(ParserTypes, Uint8) { Parser p = Uint8(); EXPECT_TRUE(ParsesTo(p, "\x78", "u0x78")); + EXPECT_TRUE(ParsesTo(p, "\x00", "u0x0")); EXPECT_TRUE(ParseFails(p, "")); } @@ -129,8 +135,6 @@ namespace { EXPECT_TRUE(ParseFails(p, " ab")); } -#include - HParsedToken* upcase(const HParseResult *p, void* user_data) { switch(p->ast->token_type) { case TT_SEQUENCE: diff --git a/src/bindings/cpp/hammer/hammer.hpp b/src/bindings/cpp/hammer/hammer.hpp index 106be41..cb7c69f 100644 --- a/src/bindings/cpp/hammer/hammer.hpp +++ b/src/bindings/cpp/hammer/hammer.hpp @@ -48,7 +48,7 @@ namespace hammer { void* getUser() const {return token->user;} uint64_t getUint() const {return token->uint;} int64_t getSint() const {return token->sint;} - // TODO: Sequence getSeq() const {return Sequence(token->seq);} + // getSeq() is not provided; access token_data.seq directly via the HParsedToken* if needed. std::string getBytes() const {return std::string((char*)token->bytes.token, token->bytes.len); } diff --git a/src/bindings/java/HammerTests.java b/src/bindings/java/HammerTests.java index 235fc02..9a0abf3 100644 --- a/src/bindings/java/HammerTests.java +++ b/src/bindings/java/HammerTests.java @@ -1,7 +1,7 @@ import com.riversideresearch.hammer.*; /** - * Basic smoke tests for the Hammer Java bindings, mirroring hammer_tests.py. + * Functional tests for the Hammer Java (JNI) bindings covering all exposed combinators. * * The JNI library must be loadable via java.library.path. The SConscript sets * this up automatically when running through the build system. @@ -569,6 +569,22 @@ static void testRightrec() { assertNotNull("rightrec:aaa", p.parse(new byte[]{(byte)'a', (byte)'a', (byte)'a'})); } + static void testSeek() { + // h_seek repositions the input stream; seek to 0 (beginning) re-parses from start + HParser p = hammer.h_sequence__a(new HParser[]{ + hammer.h_ch((short)'a'), + hammer.h_seek(0, 0), // SEEK_SET = 0; rewind to bit 0 + hammer.h_ch((short)'a'), + }); + + HParseResult r = p.parse(new byte[]{(byte)'a'}); + assertNotNull("seek:success", r); + // seek TT_UINT offset is included, then the re-parsed 'a' + assertEqual("seek:length", 3L, r.getAst().seqLength()); + assertEqual("seek:elem0", 'a', r.getAst().seqElement(0).uintValue()); + assertEqual("seek:elem2", 'a', r.getAst().seqElement(2).uintValue()); + } + static void testWithEndianness() { // BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN = 0 | 2 = 2 // In little-endian byte order, {0x01, 0x00} = 0x0001 = 1 @@ -691,6 +707,7 @@ public static void main(String[] args) { testAnd(); testNot(); testRightrec(); + testSeek(); testWithEndianness(); testPutGetValue(); testFreeValue(); diff --git a/src/bindings/java/README.md b/src/bindings/java/README.md index 8d95ee2..8f4a530 100644 --- a/src/bindings/java/README.md +++ b/src/bindings/java/README.md @@ -37,7 +37,7 @@ scons bindings=java testjava Or run the full test suite including all language bindings: ```bash -scons bindings=python,java test +scons bindings=all test ``` ## Usage @@ -101,35 +101,38 @@ public class Example { All Hammer functions are exposed as static methods of the `hammer` class in the `com.riversideresearch.hammer` package. -| Java call | Description | -|-------------------------------------|-----------------------------------------------------------| -| `hammer.h_token(byte[])` | Match a literal byte array | -| `hammer.h_ch((short)b)` | Match a single byte value (0–255) | -| `hammer.h_ch_range((short)lo, (short)hi)` | Match any byte in `[lo, hi]` | -| `hammer.h_in(byte[])` | Match any byte in the given charset | -| `hammer.h_not_in(byte[])` | Match any byte not in the given charset | -| `hammer.h_sequence__a(HParser[])` | Match each parser in order; result is `TT_SEQUENCE` | -| `hammer.h_choice__a(HParser[])` | Try each parser in order; return first success | -| `hammer.h_many(p)` | Match `p` zero or more times; result is `TT_SEQUENCE` | -| `hammer.h_many1(p)` | Match `p` one or more times; result is `TT_SEQUENCE` | -| `hammer.h_repeat_n(p, n)` | Match `p` exactly `n` times; result is `TT_SEQUENCE` | -| `hammer.h_optional(p)` | Match `p` or produce a `TT_NONE` token on failure | -| `hammer.h_ignore(p)` | Match `p` but suppress its result from sequences | -| `hammer.h_sepBy(p, sep)` | Match `p` separated by `sep`, zero or more times | -| `hammer.h_sepBy1(p, sep)` | Match `p` separated by `sep`, one or more times | -| `hammer.h_left(p1, p2)` | Match both; return result of `p1` | -| `hammer.h_right(p1, p2)` | Match both; return result of `p2` | -| `hammer.h_middle(p1, p2, p3)` | Match all three; return result of `p2` | -| `hammer.h_butnot(p1, p2)` | Match `p1` only if `p2` does not also match | -| `hammer.h_difference(p1, p2)` | Match `p1` only when `p2` matches less input | -| `hammer.h_xor(p1, p2)` | Match exactly one of `p1` or `p2`, not both | -| `hammer.h_and(p)` | Succeed if `p` would match, but consume no input | -| `hammer.h_not(p)` | Succeed if `p` would not match, consuming no input | -| `hammer.h_whitespace(p)` | Skip leading whitespace, then match `p` | -| `hammer.h_int_range(p, lo, hi)` | Match `p` only if the integer result is in `[lo, hi]` | -| `hammer.h_epsilon_p()` | Always succeed, consuming no input | -| `hammer.h_end_p()` | Succeed only at end of input | -| `hammer.h_nothing_p()` | Always fail | +| Java call | Description | +| ----------------------------------------- | ----------------------------------------------------- | +| `hammer.h_token(byte[])` | Match a literal byte array | +| `hammer.h_ch((short)b)` | Match a single byte value (0–255) | +| `hammer.h_ch_range((short)lo, (short)hi)` | Match any byte in `[lo, hi]` | +| `hammer.h_in(byte[])` | Match any byte in the given charset | +| `hammer.h_not_in(byte[])` | Match any byte not in the given charset | +| `hammer.h_sequence__a(HParser[])` | Match each parser in order; result is `TT_SEQUENCE` | +| `hammer.h_choice__a(HParser[])` | Try each parser in order; return first success | +| `hammer.h_many(p)` | Match `p` zero or more times; result is `TT_SEQUENCE` | +| `hammer.h_many1(p)` | Match `p` one or more times; result is `TT_SEQUENCE` | +| `hammer.h_repeat_n(p, n)` | Match `p` exactly `n` times; result is `TT_SEQUENCE` | +| `hammer.h_optional(p)` | Match `p` or produce a `TT_NONE` token on failure | +| `hammer.h_ignore(p)` | Match `p` but suppress its result from sequences | +| `hammer.h_sepBy(p, sep)` | Match `p` separated by `sep`, zero or more times | +| `hammer.h_sepBy1(p, sep)` | Match `p` separated by `sep`, one or more times | +| `hammer.h_left(p1, p2)` | Match both; return result of `p1` | +| `hammer.h_right(p1, p2)` | Match both; return result of `p2` | +| `hammer.h_middle(p1, p2, p3)` | Match all three; return result of `p2` | +| `hammer.h_butnot(p1, p2)` | Match `p1` only if `p2` does not also match | +| `hammer.h_difference(p1, p2)` | Match `p1` only when `p2` matches less input | +| `hammer.h_xor(p1, p2)` | Match exactly one of `p1` or `p2`, not both | +| `hammer.h_and(p)` | Succeed if `p` would match, but consume no input | +| `hammer.h_not(p)` | Succeed if `p` would not match, consuming no input | +| `hammer.h_whitespace(p)` | Skip leading whitespace, then match `p` | +| `hammer.h_int_range(p, lo, hi)` | Match `p` only if the integer result is in `[lo, hi]` | +| `hammer.h_epsilon_p()` | Always succeed, consuming no input | +| `hammer.h_end_p()` | Succeed only at end of input | +| `hammer.h_nothing_p()` | Always fail | +| `hammer.h_put_value(p, name)` | Parse `p` and store the result under `name` | +| `hammer.h_get_value(name)` | Retrieve a previously stored value by `name` | +| `hammer.h_free_value(name)` | Retrieve and free a previously stored value by `name` | ### Integer Parsers @@ -150,27 +153,27 @@ A successful parse returns an `HParseResult`; a failed parse returns `null`. Cal `HParseResult` owns the memory for the entire parse tree. Do not hold references to tokens returned by `getAst()` or `seqElement()` after the `HParseResult` has been garbage-collected or explicitly deleted. -| Method | Returns | Description | -|-------------------------------|----------|------------------------------------------------------| -| `token.tokenType()` | `int` | One of the `TT_*` constants below | -| `token.sintValue()` | `long` | Signed integer value (`TT_SINT` tokens) | -| `token.uintValue()` | `long` | Unsigned integer value (`TT_UINT` tokens); treat as unsigned with `Long.toUnsignedString()` | -| `token.seqLength()` | `long` | Number of elements (`TT_SEQUENCE` tokens) | -| `token.seqElement(i)` | `HParsedToken` | The `i`-th sequence element | -| `token.bytesLength()` | `long` | Byte count (`TT_BYTES` tokens) | -| `token.byteAt(i)` | `short` | Byte value at index `i` (0–255), or -1 if out of range | +| Method | Returns | Description | +| --------------------- | -------------- | ------------------------------------------------------------------------------------------- | +| `token.tokenType()` | `int` | One of the `TT_*` constants below | +| `token.sintValue()` | `long` | Signed integer value (`TT_SINT` tokens) | +| `token.uintValue()` | `long` | Unsigned integer value (`TT_UINT` tokens); treat as unsigned with `Long.toUnsignedString()` | +| `token.seqLength()` | `long` | Number of elements (`TT_SEQUENCE` tokens) | +| `token.seqElement(i)` | `HParsedToken` | The `i`-th sequence element | +| `token.bytesLength()` | `long` | Byte count (`TT_BYTES` tokens) | +| `token.byteAt(i)` | `short` | Byte value at index `i` (0–255), or -1 if out of range | ### Token Type Constants Compare `token.tokenType()` against these values: -| Constant | Value | Produced by | -|----------------|-------|----------------------------------------------------| -| `TT_NONE` | 1 | `h_optional()` on failure, `h_end_p()`, `h_and()` | -| `TT_BYTES` | 2 | `h_token()`, `h_in()`, `h_not_in()` | -| `TT_SINT` | 4 | `h_int8/16/32/64()` | -| `TT_UINT` | 8 | `h_uint8/16/32/64()`, `h_ch()`, `h_ch_range()` | -| `TT_SEQUENCE` | 16 | `h_sequence__a()`, `h_many()`, `h_sepBy()`, etc. | +| Constant | Value | Produced by | +| ------------- | ----- | ------------------------------------------------- | +| `TT_NONE` | 1 | `h_optional()` on failure, `h_end_p()`, `h_and()` | +| `TT_BYTES` | 2 | `h_token()`, `h_in()`, `h_not_in()` | +| `TT_SINT` | 4 | `h_int8/16/32/64()` | +| `TT_UINT` | 8 | `h_uint8/16/32/64()`, `h_ch()`, `h_ch_range()` | +| `TT_SEQUENCE` | 16 | `h_sequence__a()`, `h_many()`, `h_sepBy()`, etc. | ### Recursive Grammars diff --git a/src/bindings/python/README.md b/src/bindings/python/README.md index 2f2fe71..d08395a 100644 --- a/src/bindings/python/README.md +++ b/src/bindings/python/README.md @@ -55,6 +55,21 @@ import hammer as h If you ran `scons install` (or `scons bindings=python installpython`), the package is available system-wide without any path manipulation. +#### Virtual Environments + +If you are using a Python virtual environment, activate it before building and installing so that `setup.py` installs into the venv's site-packages rather than the system Python: + +```bash +source /path/to/venv/bin/activate +scons bindings=python installpython +``` + +Alternatively, pass the venv interpreter explicitly so SCons uses it throughout: + +```bash +scons bindings=python python=/path/to/venv/bin/python installpython +``` + ### Basic Example ```python @@ -107,6 +122,9 @@ print(result) # (b'GET ', (b'/', b'i', b'n', ...)) | `h.epsilon_p()` | Always succeed, consuming no input | | `h.end_p()` | Succeed only at end of input | | `h.nothing_p()` | Always fail | +| `h.put_value(p, name)` | Parse `p` and store the result under `name` | +| `h.get_value(name)` | Retrieve a previously stored value by `name` | +| `h.free_value(name)` | Retrieve and free a previously stored value by `name` | ### Integer Parsers diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i index 54c4b04..eb15147 100644 --- a/src/bindings/swig/hammer.i +++ b/src/bindings/swig/hammer.i @@ -62,7 +62,6 @@ %typemap(in) void*[] { if (PyList_Check($input)) { - Py_INCREF($input); int size = PyList_Size($input); int i = 0; int res = 0; @@ -80,6 +79,7 @@ return NULL; } } +%typemap(freearg) void*[] { free($1); } %typemap(in) uint8_t { if (PyLong_Check($input)) { $1 = (uint8_t)PyLong_AsLong($input); @@ -88,7 +88,15 @@ PyErr_SetString(PyExc_ValueError, "Expecting an integer or bytes"); return NULL; } else { - $1 = *(uint8_t*)PyBytes_AsString($input); + if (PyBytes_Size($input) != 1) { + PyErr_SetString(PyExc_ValueError, "Expecting a single byte"); + return NULL; + } + const char *buf = PyBytes_AsString($input); + if (buf == NULL) { + return NULL; + } + $1 = (uint8_t)(unsigned char)buf[0]; } } %typemap(out) HBytes* { @@ -105,7 +113,7 @@ } %typemap(out) struct HParseResult_* { if ($1 == NULL) { - // TODO: raise parse failure + // Parse failure: return None (the documented Python binding behavior). Py_INCREF(Py_None); $result = Py_None; } else { @@ -445,6 +453,9 @@ def int16(): return _h_int16() def int32(): return _h_int32() def int64(): return _h_int64() +def put_value(p, name): return _h_put_value(p, name) +def get_value(name): return _h_get_value(name) +def free_value(name): return _h_free_value(name) %} diff --git a/tests/t_misc.c b/tests/t_misc.c index 617ab66..5a7a647 100644 --- a/tests/t_misc.c +++ b/tests/t_misc.c @@ -31,7 +31,7 @@ static void test_tt_registry(void) { g_test_message("Unknown token type should not return a name"); g_test_fail(); } - g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unkown_token_type"), ==, + g_check_cmp_int32(h_get_token_type_number("com.riversideresearch.test.unknown_token_type"), ==, 0); } From 9e91ff4bfc4edaa4d09577aa4b97244febc8dbd5 Mon Sep 17 00:00:00 2001 From: "Elbasiouny, Mahmoud" Date: Mon, 6 Apr 2026 15:16:42 -0400 Subject: [PATCH 14/14] Address PR review feedback: fix SWIG memory leaks, build correctness, expand docs and tests --- src/bindings/cpp/cpp_tests.cpp | 2 +- src/bindings/java/HammerTests.java | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/src/bindings/cpp/cpp_tests.cpp b/src/bindings/cpp/cpp_tests.cpp index 8b443c8..992e06b 100644 --- a/src/bindings/cpp/cpp_tests.cpp +++ b/src/bindings/cpp/cpp_tests.cpp @@ -84,7 +84,7 @@ namespace { TEST(ParserTypes, Uint8) { Parser p = Uint8(); EXPECT_TRUE(ParsesTo(p, "\x78", "u0x78")); - EXPECT_TRUE(ParsesTo(p, "\x00", "u0x0")); + EXPECT_TRUE(ParsesTo(p, std::string("\x00", 1), "u0")); EXPECT_TRUE(ParseFails(p, "")); } diff --git a/src/bindings/java/HammerTests.java b/src/bindings/java/HammerTests.java index 9a0abf3..a81cf00 100644 --- a/src/bindings/java/HammerTests.java +++ b/src/bindings/java/HammerTests.java @@ -569,22 +569,6 @@ static void testRightrec() { assertNotNull("rightrec:aaa", p.parse(new byte[]{(byte)'a', (byte)'a', (byte)'a'})); } - static void testSeek() { - // h_seek repositions the input stream; seek to 0 (beginning) re-parses from start - HParser p = hammer.h_sequence__a(new HParser[]{ - hammer.h_ch((short)'a'), - hammer.h_seek(0, 0), // SEEK_SET = 0; rewind to bit 0 - hammer.h_ch((short)'a'), - }); - - HParseResult r = p.parse(new byte[]{(byte)'a'}); - assertNotNull("seek:success", r); - // seek TT_UINT offset is included, then the re-parsed 'a' - assertEqual("seek:length", 3L, r.getAst().seqLength()); - assertEqual("seek:elem0", 'a', r.getAst().seqElement(0).uintValue()); - assertEqual("seek:elem2", 'a', r.getAst().seqElement(2).uintValue()); - } - static void testWithEndianness() { // BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN = 0 | 2 = 2 // In little-endian byte order, {0x01, 0x00} = 0x0001 = 1 @@ -707,7 +691,6 @@ public static void main(String[] args) { testAnd(); testNot(); testRightrec(); - testSeek(); testWithEndianness(); testPutGetValue(); testFreeValue();