Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion parse_errors/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
"""re-raise parse errors with filename and line number."""

from .context import ParseContext, ParseError

try:
from ._version import __version__
except ImportError: # pragma: no cover
__version__ = "dev"

__all__ = []
__all__ = [
"ParseContext",
"ParseError",
]
70 changes: 70 additions & 0 deletions parse_errors/_jsonpath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
"""Convert JSONPath expressions to JSON Pointer (RFC 6901)."""

from __future__ import annotations

import re


# Matches a single step in a JSONPath: .key or [index] or ['key'] or ["key"]
_STEP = re.compile(
r"\.(?P<name>[^.\[]+)" # .key
r"|\[(?P<idx>\d+)\]" # [0]
r"|\[\'(?P<sq>[^\']*)\'\]" # ['key']
r'|\["(?P<dq>[^"]*)"\]' # ["key"]
)

# Pattern to extract JSONPath from msgspec-style error messages: "... - at `$.foo.bar`"
_AT_PATH = re.compile(r" - at `(\$[^`]*)`")


def jsonpath_to_pointer(jsonpath: str) -> str:
"""Convert a JSONPath string like ``$.foo[0].bar`` to a JSON Pointer like ``/foo/0/bar``.

Only supports simple dot-notation and bracket-index forms as produced by
msgspec. Does not support filter expressions or wildcards.

Args:
jsonpath: A JSONPath string starting with ``$``.

Returns:
A JSON Pointer string (RFC 6901), e.g. ``/foo/0/bar``.
"""
if jsonpath == "$":
return ""
if not jsonpath.startswith("$"):
raise ValueError(f"JSONPath must start with '$', got: {jsonpath!r}")

tail = jsonpath[1:] # strip leading $
parts: list[str] = []

pos = 0
while pos < len(tail):
m = _STEP.match(tail, pos)
if m is None:
raise ValueError(
f"Cannot parse JSONPath step at position {pos}: {tail[pos:]!r}"
)
name = m.group("name") or m.group("sq") or m.group("dq") or m.group("idx")
parts.append(_escape(name))
pos = m.end()

return "/" + "/".join(parts) if parts else ""


def extract_jsonpath(message: str) -> str | None:
"""Extract a JSONPath expression from an error message.

Looks for the pattern ``- at `$.path``` as used by msgspec.

Args:
message: The exception message string.

Returns:
The JSONPath string if found, otherwise ``None``.
"""
m = _AT_PATH.search(message)
return m.group(1) if m else None


def _escape(segment: str) -> str:
return segment.replace("~", "~0").replace("/", "~1")
83 changes: 83 additions & 0 deletions parse_errors/context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Context manager for better parse error messages."""

from __future__ import annotations

import os
import contextlib
from pathlib import Path
from typing import Iterator

from .source_map import detect_format, build_source_map, closest_entry
from ._jsonpath import extract_jsonpath, jsonpath_to_pointer

__all__ = ["ParseError", "ParseContext"]


class ParseError(Exception):
"""A parse or validation error augmented with filename and line number."""

def __init__(
self, message: str, filename: str | os.PathLike[str], line: int, column: int = 0
):
self.filename = str(filename)
self.line = line
self.column = column
super().__init__(message)


@contextlib.contextmanager
def ParseContext(
filename: str | os.PathLike[str],
*,
data: str | bytes | None = None,
format: str | None = None,
) -> Iterator[None]:
"""Context manager that re-raises parse/validation errors with location info.

Catches exceptions whose message contains a JSONPath (e.g. as emitted by
msgspec) and re-raises a :class:`ParseError` with the filename and
1-based line number derived from the file's source map.

Args:
filename: Path to the file being parsed.
data: The file contents as a string or bytes (UTF-8). If provided, the
file is not read from disk. Regardless of type, reported
locations (line, column) are always in characters, not bytes.
format: One of ``"json"``, ``"yaml"``, or ``"toml"``. If omitted the
format is inferred from the file extension.
"""
try:
yield
except Exception as exc:
message = str(exc)
# This is focused on msgspec-style exceptions, which use JSONPath for
# some reason. If there are other formats we know can be raised,
# adjust this.
jsonpath = extract_jsonpath(message)
if jsonpath is None:
raise

try:
pointer = jsonpath_to_pointer(jsonpath)
except ValueError: # pragma: no cover
raise exc

path = Path(filename)
fmt = format or detect_format(path)
assert fmt is not None

source = data if data is not None else path.read_bytes()
source_map = build_source_map(source, fmt)

entry = closest_entry(source_map, pointer)
if entry is None: # pragma: no cover
raise exc

loc = entry.value_start
# Lines are 0-based in source maps; convert to 1-based for humans.
raise ParseError(
f"{path}:{loc.line + 1}:{loc.column + 1}: {message}",
filename=path,
line=loc.line + 1,
column=loc.column + 1,
) from exc
10 changes: 6 additions & 4 deletions parse_errors/toml_source_map/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import tree_sitter_toml
import tree_sitter as ts
from ..source_map import Entry, Location, TSourceMap
from .._jsonpath import _escape


def calculate(source: str | bytes) -> TSourceMap:
Expand Down Expand Up @@ -124,6 +125,9 @@ def _pair_key_value(node: ts.Node) -> tuple[ts.Node, ts.Node]:
key_node = child
elif child.type not in ("=", "comment"):
value_node = child

assert key_node is not None
assert value_node is not None
return key_node, value_node


Expand All @@ -138,8 +142,10 @@ def _table_key(node: ts.Node) -> ts.Node:
def _key_segments(node: ts.Node) -> list[str]:
"""Extract key path segments from a key node."""
if node.type == "bare_key":
assert node.text is not None
return [node.text.decode()]
elif node.type == "quoted_key":
assert node.text is not None
return [_unquote(node.text.decode())]
elif node.type == "dotted_key":
return sum((_key_segments(child) for child in node.children), [])
Expand Down Expand Up @@ -176,10 +182,6 @@ def _to_pointer(segments: list[str]) -> str:
return "/" + "/".join(_escape(s) for s in segments) if segments else ""


def _escape(key: str) -> str:
return key.replace("~", "~0").replace("/", "~1")


def _loc(point: ts.Point, src: bytes) -> Location:
# tree-sitter gives byte-based row/column; convert to char-based for consistency
# with json-source-map and yaml-source-map.
Expand Down
1 change: 1 addition & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Needed so pytest considers this a package -- PEP 420 isn't enough
10 changes: 10 additions & 0 deletions tests/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import msgspec


class Config(msgspec.Struct):
host: str
port: int


class Nested(msgspec.Struct):
server: Config
103 changes: 59 additions & 44 deletions tests/test_parse_context_toml.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,82 @@
from parse_errors import toml_source_map
try:
import tomllib
except ImportError:
import tomli as tomllib # type: ignore[no-redef]

import pytest
import msgspec

TRICKY = """\
# comment
"quoted key" = 42
dotted.key = true # inline comment after value
from parse_errors import ParseContext, ParseError

[server] # inline comment on table header
host = "localhost"
path = "/etc/ssl/key.pem" # value contains key substring
"""
from ._types import Config, Nested


def test_toml_quoted_key_present():
sm = toml_source_map.calculate(TRICKY)
assert "/quoted key" in sm, f"missing '/quoted key', got: {sorted(sm)}"
TOML_SOURCE = """\
host = "localhost"
port = "not-an-int"
"""

TOML_NESTED_SOURCE = """\
[server]
host = "localhost"
port = "not-an-int"
"""

def test_toml_val_end_excludes_comment():
sm = toml_source_map.calculate(TRICKY)
entry = sm["/dotted/key"]
val = TRICKY[entry.value_start.position : entry.value_end.position]
assert val == "true", f"got {val!r}"

def test_passthrough_non_jsonspec():
with pytest.raises(ValueError, match="^foo$"):
with ParseContext("config.toml", data=TOML_SOURCE):
raise ValueError("foo")

# --- _escape: keys containing / and ~ ---

ESCAPE_SOURCE = """\
"path/to/thing" = 1
"tilde~zero" = 2
"""
def test_toml_raises_parse_error():
with pytest.raises(ParseError) as exc_info:
with ParseContext("config.toml", data=TOML_SOURCE):
data = tomllib.loads(TOML_SOURCE)
msgspec.convert(data, Config)

err = exc_info.value
assert err.filename == "config.toml"
assert err.line == 2
assert str(err) == "config.toml:2:8: Expected `int`, got `str` - at `$.port`"

def test_toml_escape_slash_in_key():
sm = toml_source_map.calculate(ESCAPE_SOURCE)
assert "/path~1to~1thing" in sm

def test_toml_bytes_data():
with pytest.raises(ParseError) as exc_info:
with ParseContext("config.toml", data=TOML_SOURCE.encode()):
data = tomllib.loads(TOML_SOURCE)
msgspec.convert(data, Config)

def test_toml_escape_tilde_in_key():
sm = toml_source_map.calculate(ESCAPE_SOURCE)
assert "/tilde~0zero" in sm
assert (
str(exc_info.value)
== "config.toml:2:8: Expected `int`, got `str` - at `$.port`"
)


# --- non-consecutive array-of-tables with intermediate sub-table ---
def test_toml_nested_raises_parse_error():
with pytest.raises(ParseError) as exc_info:
with ParseContext("config.toml", data=TOML_NESTED_SOURCE):
data = tomllib.loads(TOML_NESTED_SOURCE)
msgspec.convert(data, Nested)

AOT_SOURCE = """\
[[fruits]]
name = "apple"
err = exc_info.value
assert str(err) == "config.toml:3:8: Expected `int`, got `str` - at `$.server.port`"

[fruits.details]
color = "red"

[bar]
# --- fallback to nearest parent pointer ---

[[fruits]]
name = "banana"
FALLBACK_SOURCE = """\
[server]
port = 8080
"""


def test_toml_nonconsecutive_aot():
sm = toml_source_map.calculate(AOT_SOURCE)
# [fruits.details] appears after the first [[fruits]], so it belongs to fruits[0]
assert "/fruits/0/details" in sm, f"got: {sorted(sm)}"
assert "/fruits/0/details/color" in sm
# The erroneous flat entry must not exist
assert "/fruits/details" not in sm
def test_toml_fallback_to_parent():
# Inject a fake error at a path deeper than the source map tracks.
# /server/tls/cert doesn't exist; should fall back to /server (line 1).
with pytest.raises(ParseError) as exc_info:
with ParseContext("config.toml", data=FALLBACK_SOURCE):
raise msgspec.ValidationError(
"Expected `str`, got `int` - at `$.server.tls.cert`"
)
assert exc_info.value.line == 1
4 changes: 3 additions & 1 deletion tests/test_source_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@


def test_build_toml():
# This isn't an exhaustive test of the toml source mapper, just as something
# a minimal example that lets us exercise str/bytes
sm1 = build_source_map("x=1\nb='foo'\n", fmt="toml")
assert sm1 == {
"": Entry(
Expand All @@ -22,7 +24,7 @@ def test_build_toml():
),
}
sm2 = build_source_map(b"x=1\nb='foo'\n", fmt="toml")
# Only ascii, so str vs bytes should be the same
# Only ASCII, so str vs bytes should be the same
assert sm1 == sm2


Expand Down
Loading