diff --git a/BENCHMARKS.md b/BENCHMARKS.md index c3be6de..fdbf1f0 100644 --- a/BENCHMARKS.md +++ b/BENCHMARKS.md @@ -4,9 +4,10 @@ Comprehensive performance comparison between all json2xml implementations. ## Test Environment -- **Machine**: Apple Silicon (M-series, aarch64) -- **OS**: macOS -- **Date**: March 12, 2026 +- **Machine**: Apple Silicon (arm64) +- **OS**: macOS 26.4.1 (Darwin 25.4.0) +- **Python**: 3.14.4 +- **Date**: April 24, 2026 ### Implementations Tested @@ -14,7 +15,7 @@ Comprehensive performance comparison between all json2xml implementations. |----------------|------|-------| | Python | Library | Pure Python (json2xml) | | Rust | Library | Native extension via PyO3 (json2xml-rs) | -| Go | CLI | Standalone binary (json2xml-go v1.0.0) | +| Go | CLI | Standalone binary (json2xml-go) | | Zig | CLI | Standalone binary (json2xml-zig) | ## Test Data @@ -22,10 +23,10 @@ Comprehensive performance comparison between all json2xml implementations. | Size | Description | Bytes | |------|-------------|-------| | Small | Simple object `{"name": "John", "age": 30, "city": "New York"}` | 47 | -| Medium | 10 generated records with nested structures | ~3,211 | +| Medium | 10 generated records with nested structures | 3,212 | | bigexample.json | Real-world patent data | 2,018 | -| Large | 100 generated records with nested structures | ~32,220 | -| Very Large | 1,000 generated records with nested structures | ~323,114 | +| Large | 100 generated records with nested structures | 32,207 | +| Very Large | 1,000 generated records with nested structures | 323,148 | ## Results @@ -33,55 +34,55 @@ Comprehensive performance comparison between all json2xml implementations. | Test Case | Python | Rust | Go | Zig | |-----------|--------|------|-----|-----| -| Small (47B) | 78.39µs | 1.05µs | 4.31ms | 1.96ms | -| Medium (3.2KB) | 2.15ms | 15.47µs | 5.03ms | 2.34ms | -| bigexample (2KB) | 862.12µs | 6.44µs | 4.47ms | 2.38ms | -| Large (32KB) | 22.08ms | 150.91µs | 4.80ms | 2.89ms | -| Very Large (323KB) | 218.63ms | 1.47ms | 4.75ms | 5.38ms | +| Small (47B) | 31.49µs | 0.55µs | 4.09ms | 2.02ms | +| Medium (3.2KB) | 1.69ms | 16.15µs | 4.07ms | 2.09ms | +| bigexample (2KB) | 819.86µs | 6.44µs | 4.37ms | 2.11ms | +| Large (32KB) | 17.97ms | 168.21µs | 4.10ms | 2.42ms | +| Very Large (323KB) | 183.33ms | 1.42ms | 4.20ms | 5.12ms | ### Speedup vs Pure Python | Test Case | Rust | Go | Zig | |-----------|------|-----|-----| -| Small (47B) | **74.9x** | 0.0x* | 0.0x* | -| Medium (3.2KB) | **139.1x** | 0.4x* | 0.9x* | -| bigexample (2KB) | **133.9x** | 0.2x* | 0.4x* | -| Large (32KB) | **146.3x** | 4.6x | **7.6x** | -| Very Large (323KB) | **149.2x** | **46.1x** | **40.6x** | +| Small (47B) | **56.8x** | 0.0x* | 0.0x* | +| Medium (3.2KB) | **105.0x** | 0.4x* | 0.8x* | +| bigexample (2KB) | **127.2x** | 0.2x* | 0.4x* | +| Large (32KB) | **106.8x** | 4.4x | **7.4x** | +| Very Large (323KB) | **129.0x** | **43.6x** | **35.8x** | -*CLI tools have process spawn overhead (~2-4ms) which dominates for small inputs +*CLI tools have process spawn overhead (~2-4ms) which dominates for small inputs. ## Key Observations ### 1. Rust Extension is the Best Choice for Python Users 🦀 The Rust extension (json2xml-rs) provides: -- **~75-149x faster** than pure Python consistently across all input sizes +- **~57-129x faster** conversion than pure Python in this run - **Zero process overhead** - called directly from Python -- **Automatic fallback** - pure Python used if Rust unavailable +- **Automatic fallback** - pure Python used if Rust is unavailable or a feature requires it - **Easy install**: `pip install json2xml[fast]` ### 2. Go Excels for Very Large CLI Workloads 🚀 For very large inputs (323KB+): -- **46.1x faster** than Python +- **43.6x faster** than Python - But ~4ms startup overhead hurts small file performance -- Best for batch processing or large file conversions +- Best for batch processing or large file conversions from shell scripts -### 3. Zig is Now Highly Competitive ⚡ +### 3. Zig is Highly Competitive for CLI Use ⚡ -After recent optimizations: -- **40.6x faster** than Python for very large files -- **7.6x faster** for large files (32KB) +In this run: +- **35.8x faster** than Python for very large files +- **7.4x faster** for large files (32KB) - Faster startup than Go (~2ms vs ~4ms) -- Best balance of startup time and throughput +- Best balance of startup time and throughput for mixed CLI workloads ### 4. Process Spawn Overhead Matters CLI tools (Go, Zig) have process spawn overhead: - Go: ~4ms startup overhead - Zig: ~2ms startup overhead -- Dominates for small inputs (makes them appear slower than Python!) +- Dominates for small inputs (makes them appear slower than Python) - Negligible for large inputs where actual work dominates - Rust extension avoids this entirely by being a native Python module @@ -89,10 +90,10 @@ CLI tools (Go, Zig) have process spawn overhead: | Use Case | Recommended | Why | |----------|-------------|-----| -| Python library calls | **Rust** (`pip install json2xml[fast]`) | 75-149x faster, no overhead | -| Small files via CLI | **Zig** (json2xml-zig) | Fastest startup (~2ms) | -| Large files via CLI | **Go** or **Zig** | Both excellent (Go slightly faster) | -| Batch processing | **Go** or **Rust** | Both excellent | +| Python library calls | **Rust** (`pip install json2xml[fast]`) | 57-129x faster, no process overhead | +| Small files via CLI | **Zig** (json2xml-zig) | Fastest startup among native CLIs (~2ms) | +| Large files via CLI | **Go** or **Zig** | Both excellent; Zig wins at 32KB, Go wins at 323KB in this run | +| Batch processing | **Go** or **Rust** | Both excellent depending on shell vs Python integration | | Pure Python required | **Python** (json2xml) | Always available | ## Installation diff --git a/README.rst b/README.rst index f9fcecf..09e5500 100644 --- a/README.rst +++ b/README.rst @@ -43,7 +43,7 @@ Installation pip install json2xml -**With Native Rust Acceleration (up to 149x faster)** +**With Native Rust Acceleration (up to 129x faster)** For maximum performance, install the optional Rust extension: @@ -55,7 +55,7 @@ For maximum performance, install the optional Rust extension: # Or install the Rust extension separately pip install json2xml-rs -The Rust extension provides **75-149x faster** conversion compared to pure Python. It's automatically used when available, with seamless fallback to pure Python. +The Rust extension provides **57-129x faster** conversion compared to pure Python in the latest benchmark. It's automatically used when available, with seamless fallback to pure Python. **As a CLI Tool** @@ -301,7 +301,7 @@ Using tools directly: **Rust Extension Development** -The optional Rust extension (``json2xml-rs``) provides up to 149x faster performance. To develop or build the Rust extension: +The optional Rust extension (``json2xml-rs``) provides up to 129x faster performance in the latest benchmark. To develop or build the Rust extension: Prerequisites: @@ -428,21 +428,21 @@ For users who need maximum performance within Python, json2xml includes an optio - Rust Extension - Speedup * - **Small JSON** (47 bytes) - - 78µs - - 1.05µs - - **75x** + - 31.49µs + - 0.55µs + - **56.8x** * - **Medium JSON** (3.2 KB) - - 2.15ms - - 15µs - - **139x** + - 1.69ms + - 16.15µs + - **105.0x** * - **Large JSON** (32 KB) - - 22ms - - 151µs - - **146x** + - 17.97ms + - 168.21µs + - **106.8x** * - **Very Large JSON** (323 KB) - - 219ms - - 1.47ms - - **149x** + - 183.33ms + - 1.42ms + - **129.0x** **Usage with Rust Extension:** @@ -472,7 +472,7 @@ For other platforms, the pure Python version is used automatically. Performance Benchmarks ^^^^^^^^^^^^^^^^^^^^^^ -Comprehensive benchmarks comparing all implementations (Apple Silicon, January 2026): +Comprehensive benchmarks comparing all implementations (Apple Silicon, macOS 26.4.1, Python 3.14.4, April 2026): .. list-table:: :header-rows: 1 @@ -485,40 +485,40 @@ Comprehensive benchmarks comparing all implementations (Apple Silicon, January 2 - Zig - Best * - **Small** (47B) - - 40µs - - 1.5µs - - 4.6ms - - 3.7ms - - Rust (28x) + - 31.49µs + - 0.55µs + - 4.09ms + - 2.02ms + - Rust (56.8x) * - **Medium** (3.2KB) - - 2.1ms - - 71µs - - 4.1ms - - 3.3ms - - Rust (30x) + - 1.69ms + - 16.15µs + - 4.07ms + - 2.09ms + - Rust (105.0x) * - **Large** (32KB) - - 21ms - - 740µs - - 4ms - - 6.1ms - - Rust (28x) + - 17.97ms + - 168.21µs + - 4.10ms + - 2.42ms + - Rust (106.8x) * - **Very Large** (323KB) - - 213ms - - 7.5ms - - 4.4ms - - 33ms - - Go (48x) + - 183.33ms + - 1.42ms + - 4.20ms + - 5.12ms + - Rust (129.0x) **Key Findings:** -- **Rust extension**: ~28x faster than Python, zero overhead (best for Python users) -- **Go CLI**: 48x faster for large files (300KB+), but has ~4ms startup overhead -- **Zig CLI**: 3-6x faster for medium-large files +- **Rust extension**: 57-129x faster than Python, zero process overhead (best for Python users) +- **Go CLI**: 43.6x faster for very large files (300KB+), but has ~4ms startup overhead +- **Zig CLI**: 7.4x faster for large files and 35.8x faster for very large files, with ~2ms startup overhead **Recommendation by Use Case:** -- **Python library calls**: Use ``pip install json2xml[fast]`` (Rust, 28x faster) -- **Large file CLI processing**: Use `json2xml-go `_ (Go, 48x for 300KB+) +- **Python library calls**: Use ``pip install json2xml[fast]`` (Rust, up to 129x faster) +- **Large file CLI processing**: Use `json2xml-go `_ or `json2xml-zig `_ depending on your workload - **Pure Python required**: Use ``pip install json2xml`` For detailed benchmarks, see `BENCHMARKS.md `_. @@ -529,9 +529,9 @@ Other Implementations This library is also available in other languages: -- **Rust**: `json2xml-rs `_ - 28x faster, Python extension via PyO3 -- **Go**: `json2xml-go `_ - 48x faster for large files, native CLI -- **Zig**: `json2xml-zig `_ - 6x faster, native CLI +- **Rust**: `json2xml-rs `_ - up to 129x faster, Python extension via PyO3 +- **Go**: `json2xml-go `_ - 43.6x faster for very large files, native CLI +- **Zig**: `json2xml-zig `_ - 35.8x faster for very large files, native CLI Help and Support to maintain this project diff --git a/json2xml/cli.py b/json2xml/cli.py index a0f93dd..d53eca3 100644 --- a/json2xml/cli.py +++ b/json2xml/cli.py @@ -43,10 +43,11 @@ import argparse import sys -from typing import Any +from typing import NoReturn from json2xml import __version__ from json2xml.json2xml import Json2xml +from json2xml.types import JSONValue from json2xml.utils import ( JSONReadError, StringReadError, @@ -60,6 +61,12 @@ EMAIL = "mail@vinitkumar.me" +def exit_with_error(message: str) -> NoReturn: + """Print an error message and terminate CLI processing.""" + print(message, file=sys.stderr) + raise SystemExit(1) + + # @lat: [[architecture#CLI entrypoint]] def create_parser() -> argparse.ArgumentParser: """Create and configure the argument parser.""" @@ -230,7 +237,7 @@ def create_parser() -> argparse.ArgumentParser: # @lat: [[behavior#Input readers]] -def read_input(args: argparse.Namespace) -> dict[str, Any] | list[Any]: +def read_input(args: argparse.Namespace) -> JSONValue: """ Read JSON input from the specified source. @@ -250,15 +257,13 @@ def read_input(args: argparse.Namespace) -> dict[str, Any] | list[Any]: try: return readfromurl(args.url) except URLReadError as e: - print(f"Error reading from URL: {e}", file=sys.stderr) - sys.exit(1) + exit_with_error(f"Error reading from URL: {e}") if args.string: try: return readfromstring(args.string) except StringReadError as e: - print(f"Error parsing JSON string: {e}", file=sys.stderr) - sys.exit(1) + exit_with_error(f"Error parsing JSON string: {e}") if args.input_file: if args.input_file == "-": @@ -267,18 +272,16 @@ def read_input(args: argparse.Namespace) -> dict[str, Any] | list[Any]: try: return readfromjson(args.input_file) except JSONReadError as e: - print(f"Error reading JSON file: {e}", file=sys.stderr) - sys.exit(1) + exit_with_error(f"Error reading JSON file: {e}") # Check if there's data on stdin if not sys.stdin.isatty(): return read_from_stdin() - print("Error: No input provided. Use -h for help.", file=sys.stderr) - sys.exit(1) + exit_with_error("Error: No input provided. Use -h for help.") -def read_from_stdin() -> dict[str, Any] | list[Any]: +def read_from_stdin() -> JSONValue: """ Read JSON from standard input. @@ -291,12 +294,10 @@ def read_from_stdin() -> dict[str, Any] | list[Any]: try: json_str = sys.stdin.read().strip() if not json_str: - print("Error: Empty input", file=sys.stderr) - sys.exit(1) + exit_with_error("Error: Empty input") return readfromstring(json_str) except StringReadError as e: - print(f"Error parsing JSON from stdin: {e}", file=sys.stderr) - sys.exit(1) + exit_with_error(f"Error parsing JSON from stdin: {e}") def write_output(output: str | bytes, output_file: str | None) -> None: diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index cc2a8f9..ab5d74c 100644 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -102,7 +102,7 @@ def get_xml_type(val: Any) -> str: return type(val).__name__ -def escape_xml(s: str | int | float | numbers.Number) -> str: +def escape_xml(s: str | int | float | numbers.Number | None) -> str: """ Escape a string for use in XML. @@ -155,32 +155,24 @@ def key_is_valid_xml(key: str) -> bool: def make_valid_xml_name(key: str, attr: dict[str, Any]) -> tuple[str, dict[str, Any]]: - """Tests an XML name and fixes it if invalid""" - key = escape_xml(key) - # nothing happens at escape_xml if attr is not a string, we don't - # need to pass it to the method at all. - # attr = escape_xml(attr) + """Return a valid XML element name and carry the original key as metadata when needed.""" + key = str(key) - # pass through if key is already valid if key_is_valid_xml(key): return key, attr - # prepend a lowercase n if the key is numeric - if isinstance(key, int) or key.isdigit(): + if key.isdigit(): return f"n{key}", attr - # replace spaces with underscores if that fixes the problem - if key_is_valid_xml(key.replace(" ", "_")): - return key.replace(" ", "_"), attr + key_with_underscores = key.replace(" ", "_") + if key_is_valid_xml(key_with_underscores): + return key_with_underscores, attr - # allow namespace prefixes + ignore @flat in key - if key_is_valid_xml(key.replace(":", "").replace("@flat", "")): + if ":" in key and key_is_valid_xml(key.replace(":", "")): return key, attr - # key is still invalid - move it into a name attribute attr["name"] = key - key = "key" - return key, attr + return "key", attr def wrap_cdata(s: str | int | float | numbers.Number) -> str: @@ -345,13 +337,20 @@ def dict2xml_str( if attr_type: attr["type"] = get_xml_type(item) - val_attr: dict[str, str] = item.pop("@attrs", attr) # update attr with custom @attr if exists - rawitem = item["@val"] if "@val" in item else item + val_attr = dict(item["@attrs"]) if "@attrs" in item else dict(attr) + if "@val" in item: + rawitem = item["@val"] + elif "@attrs" in item: + rawitem = {key: value for key, value in item.items() if key != "@attrs"} + else: + rawitem = item if is_primitive_type(rawitem): - if isinstance(rawitem, dict): + if rawitem is None: + subtree = "" + elif isinstance(rawitem, bool): + subtree = str(rawitem).lower() + else: subtree = escape_xml(str(rawitem)) - if isinstance(rawitem, str): - subtree = escape_xml(rawitem) else: # we can not use convert_dict, because rawitem could be non-dict subtree = convert( @@ -423,8 +422,10 @@ def convert_dict( for key, val in obj.items(): attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} + key_is_flat = isinstance(key, str) and key.endswith("@flat") + xml_key = key[:-5] if key_is_flat else key - key, attr = make_valid_xml_name(key, attr) + key, attr = make_valid_xml_name(xml_key, attr) # since bool is also a subtype of number.Number and int, the check for bool # never comes and hence we get wrong value for the xml type bool @@ -468,7 +469,7 @@ def convert_dict( item=val, item_func=item_func, cdata=cdata, - item_name=key, + item_name=f"{key}@flat" if key_is_flat else key, item_wrap=item_wrap, list_headers=list_headers ) diff --git a/json2xml/json2xml.py b/json2xml/json2xml.py index 034cb2c..c622b34 100644 --- a/json2xml/json2xml.py +++ b/json2xml/json2xml.py @@ -5,6 +5,7 @@ from json2xml import dicttoxml +from .types import JSONValue from .utils import InvalidDataError @@ -15,7 +16,7 @@ class Json2xml: """ def __init__( self, - data: dict[str, Any] | list[Any] | None = None, + data: JSONValue = None, wrapper: str = "all", root: bool = True, pretty: bool = True, @@ -41,7 +42,7 @@ def to_xml(self) -> Any | None: """ Convert to xml using dicttoxml.dicttoxml and then pretty print it. """ - if self.data: + if self.data is not None: xml_data = dicttoxml.dicttoxml( self.data, root=self.root, diff --git a/json2xml/types.py b/json2xml/types.py new file mode 100644 index 0000000..c0c3049 --- /dev/null +++ b/json2xml/types.py @@ -0,0 +1,8 @@ +"""Shared JSON type aliases used by reader and converter APIs.""" +from __future__ import annotations + +from typing import TypeAlias + +JSONValue: TypeAlias = None | bool | int | float | str | list["JSONValue"] | dict[str, "JSONValue"] + +__all__ = ["JSONValue"] diff --git a/json2xml/utils.py b/json2xml/utils.py index ad5f427..b9b8553 100644 --- a/json2xml/utils.py +++ b/json2xml/utils.py @@ -1,8 +1,15 @@ -"""Utility methods for converting XML data to dictionary from various sources.""" +"""Utility methods for reading JSON data from various sources.""" +from __future__ import annotations + import json import urllib3 +from .types import JSONValue + +DEFAULT_URL_TIMEOUT = urllib3.Timeout(connect=5.0, read=30.0) +_HTTP = urllib3.PoolManager() + class JSONReadError(Exception): """Raised when there is an error reading JSON data.""" @@ -25,31 +32,42 @@ class StringReadError(Exception): # @lat: [[behavior#Input readers]] -def readfromjson(filename: str) -> dict[str, str]: - """Reads a JSON file and returns a dictionary.""" +def readfromjson(filename: str) -> JSONValue: + """Read JSON data from a file.""" try: with open(filename, encoding="utf-8") as jsondata: return json.load(jsondata) - except ValueError: - raise JSONReadError("Invalid JSON File") - except OSError: - raise JSONReadError("Invalid JSON File") + except (ValueError, OSError) as error: + raise JSONReadError("Invalid JSON File") from error + +def readfromurl(url: str, params: dict[str, str] | None = None) -> JSONValue: + """Load JSON data from a URL.""" + try: + response = _HTTP.request( + "GET", + url, + fields=params, + timeout=DEFAULT_URL_TIMEOUT, + retries=False, + ) + except urllib3.exceptions.HTTPError as error: + raise URLReadError("URL could not be read") from error + + if response.status != 200: + raise URLReadError("URL is not returning correct response") -def readfromurl(url: str, params: dict[str, str] | None = None) -> dict[str, str]: - """Loads JSON data from a URL and returns a dictionary.""" - http = urllib3.PoolManager() - response = http.request("GET", url, fields=params) - if response.status == 200: - return json.loads(response.data.decode('utf-8')) - raise URLReadError("URL is not returning correct response") + try: + return json.loads(response.data.decode("utf-8")) + except (UnicodeDecodeError, json.JSONDecodeError) as error: + raise URLReadError("URL did not return valid JSON") from error -def readfromstring(jsondata: object) -> dict[str, str]: - """Loads JSON data from a string and returns a dictionary.""" +def readfromstring(jsondata: object) -> JSONValue: + """Load JSON data from a string.""" if not isinstance(jsondata, str): raise StringReadError("Input is not a proper JSON string") try: return json.loads(jsondata) - except ValueError: - raise StringReadError("Input is not a proper JSON string") + except ValueError as error: + raise StringReadError("Input is not a proper JSON string") from error diff --git a/lat.md/architecture.md b/lat.md/architecture.md index 72ec904..6fc152e 100644 --- a/lat.md/architecture.md +++ b/lat.md/architecture.md @@ -12,7 +12,7 @@ Library callers usually construct [[json2xml/json2xml.py#Json2xml]] with a decod The pure Python serializer recursively maps Python values to XML elements, attributes, and text while preserving the project-specific options around wrappers, list handling, and type metadata. -[[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys. +[[json2xml/dicttoxml.py#dicttoxml]] is the public serializer. It handles the XML declaration, root wrapper, namespace emission, XPath mode, and then routes nested values through helper functions such as [[json2xml/dicttoxml.py#convert]], [[json2xml/dicttoxml.py#convert_dict]], and [[json2xml/dicttoxml.py#convert_list]]. [[json2xml/dicttoxml.py#get_xml_type]] and [[json2xml/dicttoxml.py#convert]] accept broad caller input and classify unsupported values at runtime, so tests can probe failure paths without lying to the type checker. Invalid XML names are normalized by [[json2xml/dicttoxml.py#make_valid_xml_name]] instead of crashing immediately on user keys, and special `@attrs`/`@val` handling avoids mutating caller data. ## Backend selection @@ -20,6 +20,12 @@ The fast-path module prefers the Rust extension when it can preserve Python sema [[json2xml/dicttoxml_fast.py#dicttoxml]] uses the Rust backend only when optional features such as `ids`, custom `item_func`, XML namespaces, XPath mode, or special `@` keys are not involved. A local stub for the optional `json2xml_rs` module keeps static analysis aligned with that fallback design, so type checking still passes when the extension is not installed. This keeps fast installs fast without letting the optimized path silently change behavior. +## Performance benchmarks + +The benchmark docs record measured implementation tradeoffs so users can choose between Python, Rust, Go, and Zig without guessing. + +The April 2026 benchmark on Apple Silicon shows the Rust extension as the best option for Python library calls, with 57-129x speedups over pure Python and no process overhead. Go and Zig remain useful for native CLI workflows where startup cost is acceptable. + ## CLI entrypoint The CLI is a thin adapter that parses options, resolves one input source, and forwards those options into the same converter used by the library API. diff --git a/lat.md/behavior.md b/lat.md/behavior.md index c82cfcc..a0ed7e8 100644 --- a/lat.md/behavior.md +++ b/lat.md/behavior.md @@ -6,13 +6,13 @@ This file captures the observable conversion and input rules that matter more th The input helpers convert files, strings, URLs, and stdin into Python data structures while surfacing source-specific errors to callers. -[[json2xml/utils.py#readfromjson]] wraps file and JSON decoding failures in `JSONReadError`. [[json2xml/utils.py#readfromstring]] accepts unknown caller input so invalid-type tests can call it honestly, then rejects non-string inputs and malformed JSON with `StringReadError`. [[json2xml/utils.py#readfromurl]] performs a GET request and raises `URLReadError` when the HTTP status is not `200`. +[[json2xml/utils.py#readfromjson]] wraps file and JSON decoding failures in `JSONReadError`. [[json2xml/utils.py#readfromstring]] accepts unknown caller input so invalid-type tests can call it honestly, then rejects non-string inputs and malformed JSON with `StringReadError`. [[json2xml/utils.py#readfromurl]] performs a bounded GET request and raises `URLReadError` for network, non-200, decoding, and JSON parse failures. ## Conversion output Default output includes an XML declaration, wraps content in `all`, pretty prints the document, and annotates elements with their source type unless callers disable those features. -[[json2xml/json2xml.py#Json2xml#to_xml]] calls [[json2xml/dicttoxml.py#dicttoxml]] with the configured wrapper, root, `attr_type`, `item_wrap`, `cdata`, and `list_headers` options. When `item_wrap=False`, list values repeat the parent tag instead of creating `` children. When `pretty=False`, the library returns the serializer bytes directly. +[[json2xml/json2xml.py#Json2xml#to_xml]] calls [[json2xml/dicttoxml.py#dicttoxml]] with the configured wrapper, root, `attr_type`, `item_wrap`, `cdata`, and `list_headers` options. It treats only `None` as absent input, so falsy JSON values still serialize. When `item_wrap=False`, list values repeat the parent tag instead of creating `` children. When `pretty=False`, the library returns the serializer bytes directly. The Rust fast path in [[rust/src/lib.rs#write_dict_contents]] and [[rust/src/lib.rs#write_list_contents]] mirrors those Python list-wrapper rules. `list_headers=True` suppresses the outer list container and repeats the parent tag only for nested dict items, while primitive items still use the same scalar tags that Python emits. diff --git a/lat.md/tests.md b/lat.md/tests.md index 05bd2a4..95f8132 100644 --- a/lat.md/tests.md +++ b/lat.md/tests.md @@ -18,6 +18,14 @@ When both URL and string inputs are present, the CLI should read from the URL fi When the positional input is `-`, the CLI should read stdin instead of trying to open a file literally named `-`. +## Input readers + +These tests verify the concrete reader helpers against realistic source behavior so parsing and error wrapping stay aligned with production use. + +### URL reader uses real HTTP and wraps failures + +URL input should read valid JSON over HTTP and wrap status, network, and decoding failures in `URLReadError`. + ## Conversion behavior These tests pin the XML shapes that matter most for interoperability, especially the modes that intentionally diverge from the default serializer. @@ -41,3 +49,23 @@ Supplying namespace prefixes and an `xsi` mapping should emit the expected `xmln ### Xml namespace inputs are not mutated across calls Reusing one `xml_namespaces` mapping across multiple `dicttoxml` calls should return identical XML each time so namespace declarations never accumulate on the shared dict. + +### Falsy JSON values convert to XML + +Falsy JSON values such as empty objects, empty arrays, zero, false, and empty strings should convert through the public API instead of being treated as missing data. + +### Special attributes do not mutate input + +Converting dictionaries that use `@attrs` and `@val` should preserve the caller's original data so objects can be reused safely. + +### Invalid XML names normalize without double escaping + +Invalid element names should fall back to `` with the original name escaped exactly once in the emitted attribute. + +### Flat suffix never creates invalid XML tags + +Keys ending in `@flat` should keep their flattening behavior where supported and must never leak the suffix into scalar or dict element names. + +### Rust and Python XML name parity + +The Rust accelerator and Python serializer should agree on supported XML name normalization cases so fast-path output does not drift silently. diff --git a/pyproject.toml b/pyproject.toml index 755659e..ab6524b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,6 @@ fast = ["json2xml-rs>=0.1.0"] [tool.pytest.ini_options] testpaths = ["tests"] python_files = ["test_*.py"] -xvs = true addopts = "--cov=json2xml --cov-report=xml:coverage/reports/coverage.xml --cov-report=term" [tool.ruff] exclude = [ diff --git a/tests/test_dict2xml.py b/tests/test_dict2xml.py index 37dfaee..bc4884b 100644 --- a/tests/test_dict2xml.py +++ b/tests/test_dict2xml.py @@ -1,3 +1,4 @@ +import copy import datetime import numbers from typing import TYPE_CHECKING, Any @@ -674,7 +675,76 @@ def test_make_valid_xml_name_invalid_chars(self) -> None: attr: dict[str, Any] = {} new_key, new_attr = dicttoxml.make_valid_xml_name(key, attr) assert new_key == "key" - assert new_attr == {"name": "<invalid>key"} + assert new_attr == {"name": "key"} + + def test_dicttoxml_val_none_emits_empty_element(self) -> None: + """Test @val=None serializes as empty text without leaking Python's repr.""" + result = dicttoxml.dicttoxml( + {"field": {"@attrs": {"source": "api"}, "@val": None}}, + root=False, + attr_type=False, + ) + + assert result == b'' + assert b"None" not in result + + def test_dicttoxml_val_bool_serializes_lowercase(self) -> None: + """Test @val booleans serialize as lowercase XML text with attributes intact.""" + result = dicttoxml.dicttoxml( + { + "active": {"@attrs": {"flag": "yes"}, "@val": True}, + "disabled": {"@attrs": {"flag": "no"}, "@val": False}, + }, + root=False, + attr_type=False, + ) + + assert result == ( + b'true' + b'false' + ) + assert b"True" not in result + assert b"False" not in result + + # @lat: [[tests#Conversion behavior#Special attributes do not mutate input]] + def test_dicttoxml_does_not_mutate_special_attribute_input(self) -> None: + """Test @attrs and @val conversion leaves caller data untouched.""" + data: dict[str, Any] = { + "product": { + "@attrs": {"sku": "bike-1"}, + "@val": "Road bike", + }, + "items": [ + { + "@attrs": {"position": "1"}, + "@val": 42, + } + ], + } + original = copy.deepcopy(data) + + result = dicttoxml.dicttoxml(data, root=False, attr_type=False) + + assert result == ( + b'Road bike' + b'42' + ) + assert data == original + + # @lat: [[tests#Conversion behavior#Invalid XML names normalize without double escaping]] + def test_invalid_xml_name_fallback_escapes_name_attribute_once(self) -> None: + """Test fallback name attributes are escaped once at emission time.""" + result = dicttoxml.dicttoxml({"a&b": "value"}, root=False, attr_type=False) + assert result == b'value' + + # @lat: [[tests#Conversion behavior#Flat suffix never creates invalid XML tags]] + def test_flat_suffix_on_scalar_and_dict_keys_stays_well_formed(self) -> None: + """Test @flat suffix keys do not leak into scalar or dict element names.""" + scalar = dicttoxml.dicttoxml({"name@flat": "Bike"}, root=False, attr_type=False) + nested = dicttoxml.dicttoxml({"item@flat": {"name": "Bike"}}, root=False, attr_type=False) + + assert scalar == b"Bike" + assert nested == b"Bike" def test_dict2xml_str_invalid_type(self) -> None: """Test dict2xml_str with invalid type.""" diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index 9231eed..5921c5e 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -4,6 +4,7 @@ import json from pyexpat import ExpatError +from typing import Any import pytest import xmltodict @@ -78,6 +79,22 @@ def test_json_to_xml_empty_data_conversion(self) -> None: xmldata = json2xml.Json2xml(data).to_xml() assert xmldata is None + # @lat: [[tests#Conversion behavior#Falsy JSON values convert to XML]] + @pytest.mark.parametrize( + ("data", "expected"), + [ + ({}, b""), + ([], b""), + (0, b"0"), + (False, b"false"), + ("", b""), + ], + ) + def test_json_to_xml_falsy_values(self, data: Any, expected: bytes) -> None: + xmldata = json2xml.Json2xml(data, pretty=False).to_xml() + assert isinstance(xmldata, bytes) + assert expected in xmldata + def test_custom_wrapper_and_indent(self) -> None: data = readfromstring( '{"login":"mojombo","id":1,"avatar_url":"https://avatars0.githubusercontent.com/u/1?v=4"}' diff --git a/tests/test_rust_dicttoxml.py b/tests/test_rust_dicttoxml.py index f7489ae..4c684a6 100644 --- a/tests/test_rust_dicttoxml.py +++ b/tests/test_rust_dicttoxml.py @@ -360,6 +360,27 @@ def test_very_large_integer_matches(self): rust, python = self.compare_outputs(data) assert rust == python + # @lat: [[tests#Conversion behavior#Rust and Python XML name parity]] + @pytest.mark.parametrize( + "data", + [ + {"123": "value"}, + {"my key": "value"}, + {"a&b": "value"}, + {"a None: + path = self.path.split("?", 1)[0] + status, body = self.responses.get(path, (404, b'{"error": "not found"}')) + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def log_message(self, format: str, *args: object) -> None: + pass + + +@pytest.fixture +# @lat: [[tests#Input readers#URL reader uses real HTTP and wraps failures]] +def json_server() -> "Iterator[str]": + server = ThreadingHTTPServer(("127.0.0.1", 0), JsonTestHandler) + thread = threading.Thread(target=server.serve_forever, daemon=True) + thread.start() + try: + host = server.server_address[0] + port = server.server_address[1] + yield f"http://{host}:{port}" + finally: + server.shutdown() + server.server_close() + thread.join(timeout=1) + + class TestExceptions: """Test custom exception classes.""" @@ -109,89 +154,42 @@ def test_readfromjson_os_error(self, mock_open: Mock) -> None: class TestReadFromUrl: """Test readfromurl function.""" - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_success(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_success(self, json_server: str) -> None: """Test successful URL reading.""" - # Mock response - mock_response = Mock() - mock_response.status = 200 - mock_response.data = b'{"key": "value", "number": 42}' - - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http - - result = readfromurl("http://example.com/data.json") + result = readfromurl(f"{json_server}/data.json") assert result == {"key": "value", "number": 42} - mock_pool_manager.assert_called_once() - mock_http.request.assert_called_once_with("GET", "http://example.com/data.json", fields=None) - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_success_with_params(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_success_with_params(self, json_server: str) -> None: """Test successful URL reading with parameters.""" - # Mock response - mock_response = Mock() - mock_response.status = 200 - mock_response.data = b'{"result": "success"}' - - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http - params = {"param1": "value1", "param2": "value2"} - result = readfromurl("http://example.com/api", params=params) + result = readfromurl(f"{json_server}/api", params=params) assert result == {"result": "success"} - mock_http.request.assert_called_once_with("GET", "http://example.com/api", fields=params) - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_http_error(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_http_error(self, json_server: str) -> None: """Test URL reading with HTTP error status.""" - # Mock response with error status - mock_response = Mock() - mock_response.status = 404 - - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http - with pytest.raises(URLReadError, match="URL is not returning correct response"): - readfromurl("http://example.com/nonexistent.json") + readfromurl(f"{json_server}/nonexistent.json") - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_server_error(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_server_error(self, json_server: str) -> None: """Test URL reading with server error status.""" - # Mock response with server error status - mock_response = Mock() - mock_response.status = 500 - - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http - with pytest.raises(URLReadError, match="URL is not returning correct response"): - readfromurl("http://example.com/error.json") + readfromurl(f"{json_server}/error.json") - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_invalid_json_response(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_invalid_json_response(self, json_server: str) -> None: """Test URL reading with invalid JSON response.""" - # Mock response with invalid JSON - mock_response = Mock() - mock_response.status = 200 - mock_response.data = b'invalid json content' + with pytest.raises(URLReadError, match="URL did not return valid JSON"): + readfromurl(f"{json_server}/invalid.json") - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http + def test_readfromurl_network_error(self) -> None: + """Test network failures are wrapped as URLReadError.""" + with socket.socket() as unused_socket: + unused_socket.bind(("127.0.0.1", 0)) + port = unused_socket.getsockname()[1] - with pytest.raises(json.JSONDecodeError): - readfromurl("http://example.com/invalid.json") + with pytest.raises(URLReadError, match="URL could not be read"): + readfromurl(f"http://127.0.0.1:{port}/data.json") class TestReadFromString: @@ -287,22 +285,11 @@ def test_readfromstring_then_convert_to_xml(self) -> None: assert b"test" in xml_result assert b"123" in xml_result - @patch('json2xml.utils.urllib3.PoolManager') - def test_readfromurl_then_convert_to_xml(self, mock_pool_manager: Mock) -> None: + def test_readfromurl_then_convert_to_xml(self, json_server: str) -> None: """Test reading from URL and converting to XML.""" from json2xml import dicttoxml - # Mock response - mock_response = Mock() - mock_response.status = 200 - mock_response.data = b'{"api": "response", "status": "ok"}' - - # Mock PoolManager - mock_http = Mock() - mock_http.request.return_value = mock_response - mock_pool_manager.return_value = mock_http - - data = readfromurl("http://example.com/api.json") + data = readfromurl(f"{json_server}/api.json") xml_result = dicttoxml.dicttoxml(data, attr_type=False, root=False) assert b"response" in xml_result