From a0ffe217eb1d9e05d846aeb783931bb835e06b95 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 15:18:08 +0000 Subject: [PATCH 01/10] feat(sema): add precise identifier spans for declarations and bindings This introduces an optional `name_span` field on symbols, capturing the exact identifier token span when available. The semantic model builder and AST nodes now extract and store precise name spans for top-level declarations (relations, functions, types) and for rule-local bindings (head bindings, assignments, for-loop patterns). The unused-variable lint and diagnostics have been updated to use these precise spans where possible for improved diagnostic highlighting. Also includes new semantic-model tests verifying that the precise identifier spans are captured and differ from the coarser span previously used. Co-authored-by: devboxerhub[bot] --- docs/ddlint-design.md | 55 ++++--- .../rules/correctness/unused_variable.rs | 11 +- src/parser/ast/function.rs | 7 + src/parser/ast/mod.rs | 27 +++- src/parser/ast/relation.rs | 7 + src/parser/ast/type_def.rs | 7 + src/sema/builder.rs | 84 ++++++++--- src/sema/model.rs | 7 + src/sema/tests.rs | 1 + src/sema/tests/name_span.rs | 134 ++++++++++++++++++ src/sema/traverse.rs | 44 ++++-- 11 files changed, 320 insertions(+), 64 deletions(-) create mode 100644 src/sema/tests/name_span.rs diff --git a/docs/ddlint-design.md b/docs/ddlint-design.md index 86a29518..044686bb 100644 --- a/docs/ddlint-design.md +++ b/docs/ddlint-design.md @@ -447,7 +447,9 @@ The core contracts are: semantic failures without leaking internal implementation types. - Provenance: semantic nodes must retain source mappings back to the lossless syntax layer for rule heads, body terms, aggregations, attributes, and - adornments, so diagnostics and later rewrites remain explainable. + adornments, so diagnostics and later rewrites remain explainable. For symbol + records specifically, coarse declaration or literal provenance must stay + distinct from any precise identifier-token span used for diagnostics. - Semantic model completeness: `ddlog-sema` must expose typed structures for constructs that downstream planners care about, including joins, filters, projections, aggregations, recursion boundaries, and semantic facts such as @@ -734,14 +736,24 @@ genuine reads. `for`-loop pattern, and it treats a binding as used only when semantic resolution maps a `UseKind::Variable` site back to that exact symbol. The wildcard `_` remains an explicit ignore and is never recorded as a -warning-eligible binding. As of roadmap item `4.1.2`, rule-local binding spans -still point at the enclosing rule or literal site rather than the exact -identifier token, so diagnostics for these rules use that existing coarse -provenance. The semantic model deliberately stays on the `Span` side of that -boundary; conversion into diagnostic `rowan::TextRange` values happens in the -linter layer through one shared boundary helper in `src/linter/span_utils.rs`. -This keeps a single conversion point, rather than per-rule glue, for -correctness diagnostics. +warning-eligible binding. + +Each `Symbol` now carries two related provenance fields: + +- `span` remains the existing coarse provenance used for debugging, maps, and + ownership; it points at the enclosing declaration, rule, or body term that + introduced the symbol. +- `name_span` is an additive optional field that points at the precise + identifier token when semantic collection had CST access to capture it. + +Rules such as `unused-variable` should prefer `name_span` for diagnostic +highlighting and fall back to `span` only when precise token provenance is not +available, such as parse-time `SemanticRule` values that retain only lowered +expressions plus a coarse source span. The semantic model deliberately stays on +the `Span` side of that boundary; conversion into diagnostic +`rowan::TextRange` values happens in the linter layer through one shared +boundary helper in `src/linter/span_utils.rs`. This keeps a single conversion +point, rather than per-rule glue, for correctness diagnostics. Resolution is deliberately tri-state: @@ -763,8 +775,9 @@ body scope and do not escape to later top-level literals. This pass consumes both AST-backed rules and parse-time `SemanticRule` values from top-level `for` desugaring, so the semantic view matches the parser's -existing rule surface. Provenance is retained through source spans on -declarations and on the enclosing literal or rule site for rule-local facts. +existing rule surface. Provenance is retained through coarse source spans on +declarations and on the enclosing literal or rule site for rule-local facts, +with precise identifier-token spans captured additively when CST access exists. ### 3.3. Initial lint rule catalog @@ -775,16 +788,16 @@ suggestions, establishing a solid foundation of essential lints. Table: DDLint rule catalogue and metadata. -| Rule Name | Group | Default Level | Autofixable | Description | -| ---------------------- | ----------- | ------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| unused-relation | correctness | warn | No | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes. | -| unused-variable | correctness | warn | No | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore. | -| shadowed-variable | correctness | warn | No | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body. | -| recursive-negation | correctness | error | No | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs. | -| inefficient-join-order | performance | hint | No | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first. | -| superfluous-group-by | performance | warn | Yes | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed. | -| consistent-casing | style | allow | Yes | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case). | -| no-magic-numbers | style | allow | No | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer. | +| Rule Name | Group | Default Level | Autofixable | Description | +| ---------------------- | ----------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| unused-relation | correctness | warn | No | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes. | +| unused-variable | correctness | warn | No | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore, and diagnostics prefer precise identifier-token spans when available. | +| shadowed-variable | correctness | warn | No | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body. | +| recursive-negation | correctness | error | No | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs. | +| inefficient-join-order | performance | hint | No | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first. | +| superfluous-group-by | performance | warn | Yes | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed. | +| consistent-casing | style | allow | Yes | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case). | +| no-magic-numbers | style | allow | No | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer. | This table serves as a concrete work breakdown for the engineering team and clearly communicates the linter's initial capabilities and priorities to early diff --git a/src/linter/rules/correctness/unused_variable.rs b/src/linter/rules/correctness/unused_variable.rs index b7b9633c..904d7ac0 100644 --- a/src/linter/rules/correctness/unused_variable.rs +++ b/src/linter/rules/correctness/unused_variable.rs @@ -3,8 +3,8 @@ //! The rule operates over semantic `RuleBinding` symbols rather than raw CST //! name matching so shadowing, unresolved names, and wildcard ignores follow //! the semantic model's existing contracts. Diagnostics currently use the -//! binding symbol's stored span, which points at the enclosing rule or literal -//! rather than the exact identifier token. +//! binding symbol's precise identifier span when semantic collection captured +//! one, falling back to the existing coarse provenance otherwise. use crate::linter::{LintDiagnostic, Rule}; use crate::{SyntaxKind, declare_lint}; @@ -30,7 +30,8 @@ declare_lint! { continue; } - let Some(range) = crate::linter::span_utils::span_to_text_range(symbol.span()) else { + let span = symbol.name_span().unwrap_or_else(|| symbol.span()); + let Some(range) = crate::linter::span_utils::span_to_text_range(span) else { continue; }; @@ -69,6 +70,10 @@ mod tests { .map(crate::linter::LintDiagnostic::message), Some("variable `head_x` is defined but never used in this rule") ); + assert_eq!( + diagnostics.first().map(crate::linter::LintDiagnostic::span), + Some(rowan::TextRange::new(7.into(), 13.into())) + ); } #[rstest] diff --git a/src/parser/ast/function.rs b/src/parser/ast/function.rs index 6b0bec97..2953cb7b 100644 --- a/src/parser/ast/function.rs +++ b/src/parser/ast/function.rs @@ -29,6 +29,13 @@ impl Function { }) } + /// Precise source span for the function name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Returns `true` if declared with the `extern` keyword. #[must_use] pub fn is_extern(&self) -> bool { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 85737e8f..bf0de449 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -9,10 +9,11 @@ //! tests and fixtures to assemble match expressions without dipping into //! private modules. -use rowan::SyntaxElement; +use rowan::{NodeOrToken, SyntaxElement, SyntaxNode}; use self::parse_utils::is_trivia; -use crate::{DdlogLanguage, SyntaxKind}; +use crate::parser::ast::rule::text_range_to_span; +use crate::{DdlogLanguage, Span, SyntaxKind}; /// Common interface for AST wrappers. pub(crate) trait AstNode { @@ -20,6 +21,28 @@ pub(crate) trait AstNode { fn syntax(&self) -> &rowan::SyntaxNode; } +/// Find the first identifier token with matching text within a syntax subtree. +#[must_use] +pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &str) -> Option { + for child in syntax.children_with_tokens() { + match child { + NodeOrToken::Token(token) + if token.kind() == SyntaxKind::T_IDENT && token.text() == name => + { + return Some(text_range_to_span(token.text_range())); + } + NodeOrToken::Node(node) => { + if let Some(span) = find_identifier_span(&node, name) { + return Some(span); + } + } + NodeOrToken::Token(_) => {} + } + } + + None +} + macro_rules! impl_ast_node { ($ty:ty) => { impl AstNode for $ty { diff --git a/src/parser/ast/relation.rs b/src/parser/ast/relation.rs index ccd19f04..bd78bf88 100644 --- a/src/parser/ast/relation.rs +++ b/src/parser/ast/relation.rs @@ -58,6 +58,13 @@ impl Relation { }) } + /// Precise source span for the relation name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Returns `true` if declared with the `input` keyword. #[must_use] pub fn is_input(&self) -> bool { diff --git a/src/parser/ast/type_def.rs b/src/parser/ast/type_def.rs index 2541850f..30023d35 100644 --- a/src/parser/ast/type_def.rs +++ b/src/parser/ast/type_def.rs @@ -25,6 +25,13 @@ impl TypeDef { super::take_first_ident(iter) } + /// Precise source span for the type name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Whether this declaration is `extern`. #[must_use] pub fn is_extern(&self) -> bool { diff --git a/src/sema/builder.rs b/src/sema/builder.rs index 15aaa6fa..c7261d8c 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -7,6 +7,7 @@ use rowan::SyntaxNode; use crate::Span; use crate::SyntaxKind; use crate::parser::ast; +use crate::parser::ast::AstNode; use crate::parser::ast::SemanticRule; use crate::parser::ast::rule::text_range_to_span; use crate::sema::model::{ @@ -14,7 +15,7 @@ use crate::sema::model::{ SymbolId, SymbolOrigin, UseKind, }; -use super::resolve::collect_pattern_binding_names; +use super::resolve::{collect_head_binding_names, collect_pattern_binding_names}; pub(crate) struct SymbolSpec { pub(crate) name: String, @@ -22,6 +23,7 @@ pub(crate) struct SymbolSpec { pub(crate) origin: SymbolOrigin, pub(crate) scope: ScopeId, pub(crate) span: Span, + pub(crate) name_span: Option, pub(crate) source_order: usize, pub(crate) visible_from_rule_order: usize, } @@ -126,6 +128,7 @@ impl SemanticModelBuilder { let mut order = 0usize; let mut declare_from_child = |name: Option, + name_span: Option, kind: DeclarationKind, origin: SymbolOrigin, child: &SyntaxNode| { @@ -136,6 +139,7 @@ impl SemanticModelBuilder { origin, scope: self.program_scope, span: text_range_to_span(child.text_range()), + name_span, source_order: order, visible_from_rule_order: 0, }); @@ -146,28 +150,52 @@ impl SemanticModelBuilder { for child in root.syntax().children() { match child.kind() { SyntaxKind::N_RELATION_DECL => declare_from_child( - ast::Relation { - syntax: child.clone(), - } - .name(), + { + let relation = ast::Relation { + syntax: child.clone(), + }; + relation.name() + }, + { + let relation = ast::Relation { + syntax: child.clone(), + }; + relation.name_span() + }, DeclarationKind::Relation, SymbolOrigin::RelationDeclaration, &child, ), SyntaxKind::N_FUNCTION => declare_from_child( - ast::Function { - syntax: child.clone(), - } - .name(), + { + let function = ast::Function { + syntax: child.clone(), + }; + function.name() + }, + { + let function = ast::Function { + syntax: child.clone(), + }; + function.name_span() + }, DeclarationKind::Function, SymbolOrigin::FunctionDeclaration, &child, ), SyntaxKind::N_TYPE_DEF => declare_from_child( - ast::TypeDef { - syntax: child.clone(), - } - .name(), + { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + type_def.name() + }, + { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + type_def.name_span() + }, DeclarationKind::Type, SymbolOrigin::TypeDeclaration, &child, @@ -197,17 +225,10 @@ impl SemanticModelBuilder { ); if let Ok(terms) = rule.body_terms() { - let spans: Vec = rule - .body_expression_nodes() - .into_iter() - .map(|node| node.span()) - .collect(); + let body_nodes = rule.body_expression_nodes(); - for (literal_index, term) in terms.into_iter().enumerate() { - let span = spans - .get(literal_index) - .cloned() - .unwrap_or_else(|| rule_span.clone()); + for (literal_index, (node, term)) in body_nodes.into_iter().zip(terms).enumerate() { + let span = node.span(); self.collect_rule_term( super::variables::VariableUseContext::new( rule_scope, @@ -215,6 +236,7 @@ impl SemanticModelBuilder { &span, literal_index, ), + Some(node.syntax()), &term, ); } @@ -243,8 +265,22 @@ impl SemanticModelBuilder { origin: SymbolOrigin::SemanticRuleHead, }, ); + for binding_name in collect_head_binding_names(rule.head()) { + self.declare_symbol(SymbolSpec { + name: binding_name, + kind: DeclarationKind::RuleBinding, + origin: SymbolOrigin::SemanticRuleHead, + scope: rule_scope, + span: rule_span.clone(), + name_span: None, + source_order: self.symbols.len(), + visible_from_rule_order: 0, + }); + } for pattern in rule.patterns() { + // Parse-time semantic rules do not currently preserve CST token + // handles, so precise identifier spans are unavailable here. for binding_name in collect_pattern_binding_names(pattern) { self.declare_symbol(SymbolSpec { name: binding_name, @@ -252,6 +288,7 @@ impl SemanticModelBuilder { origin: SymbolOrigin::ForPattern, scope: rule_scope, span: rule_span.clone(), + name_span: None, source_order: self.symbols.len(), visible_from_rule_order: 0, }); @@ -297,6 +334,7 @@ impl SemanticModelBuilder { origin: spec.origin, scope: spec.scope, span: spec.span, + name_span: spec.name_span, source_order: spec.source_order, visible_from_rule_order: spec.visible_from_rule_order, }); diff --git a/src/sema/model.rs b/src/sema/model.rs index 015a56a6..735cea96 100644 --- a/src/sema/model.rs +++ b/src/sema/model.rs @@ -175,6 +175,7 @@ pub struct Symbol { pub(crate) origin: SymbolOrigin, pub(crate) scope: ScopeId, pub(crate) span: Span, + pub(crate) name_span: Option, pub(crate) source_order: usize, pub(crate) visible_from_rule_order: usize, } @@ -210,6 +211,12 @@ impl Symbol { &self.span } + /// Precise source span for the identifier token, when captured. + #[must_use] + pub fn name_span(&self) -> Option<&Span> { + self.name_span.as_ref() + } + /// Stable source-order index within the owning scope. #[must_use] pub fn source_order(&self) -> usize { diff --git a/src/sema/tests.rs b/src/sema/tests.rs index 42409629..f8c307eb 100644 --- a/src/sema/tests.rs +++ b/src/sema/tests.rs @@ -388,4 +388,5 @@ fn relation_read_helpers_ignore_head_only_and_unresolved_uses( ); } +mod name_span; mod unused_variable; diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs new file mode 100644 index 00000000..dfed6154 --- /dev/null +++ b/src/sema/tests/name_span.rs @@ -0,0 +1,134 @@ +//! Semantic-model tests for precise identifier spans. + +use rstest::rstest; + +use super::{DeclarationKind, parse_ok, symbols_named}; +use crate::sema::SymbolOrigin; + +fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) +} + +fn find_symbol<'a>( + model: &'a super::super::SemanticModel, + name: &str, + kind: DeclarationKind, + origin: SymbolOrigin, +) -> &'a super::super::Symbol { + symbols_named(model, name, kind) + .into_iter() + .find(|symbol| symbol.origin() == origin) + .unwrap_or_else(|| panic!("missing symbol `{name}` with origin {origin:?}")) +} + +#[rstest] +#[case( + "input relation Source(x: u32)", + "Source", + DeclarationKind::Relation, + SymbolOrigin::RelationDeclaration, + "Source" +)] +#[case( + "function project(id: u32): u32 {}", + "project", + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + "project" +)] +#[case( + "typedef UserId = u64", + "UserId", + DeclarationKind::Type, + SymbolOrigin::TypeDeclaration, + "UserId" +)] +fn top_level_declarations_capture_identifier_name_spans( + #[case] source: &str, + #[case] symbol_name: &str, + #[case] kind: DeclarationKind, + #[case] origin: SymbolOrigin, + #[case] expected_text: &str, +) { + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol(&semantic_model, symbol_name, kind, origin); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing name_span for `{symbol_name}`")); + + assert_eq!(span_text(source, name_span), expected_text); + assert_ne!(name_span, symbol.span()); +} + +#[rstest] +fn rule_head_bindings_capture_identifier_name_spans() { + let source = "Output(head_x) :- Source(_)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "head_x", + DeclarationKind::RuleBinding, + SymbolOrigin::RuleHead, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing head binding name_span")); + + assert_eq!(span_text(source, name_span), "head_x"); + assert_eq!(span_text(source, symbol.span()), source); +} + +#[rstest] +fn assignment_bindings_capture_identifier_name_spans() { + let source = "Output(result) :- Source(result), var assigned_x = Seed(result)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "assigned_x", + DeclarationKind::RuleBinding, + SymbolOrigin::AssignmentPattern, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing assignment binding name_span")); + + assert_eq!(span_text(source, name_span), "assigned_x"); + assert_eq!( + span_text(source, symbol.span()), + "var assigned_x = Seed(result)" + ); +} + +#[rstest] +fn for_loop_bindings_capture_identifier_name_spans() { + let source = "Output(result) :- Source(result), for (item_y in Items(result)) Inner(result)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "item_y", + DeclarationKind::RuleBinding, + SymbolOrigin::ForPattern, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing for-loop binding name_span")); + + assert_eq!(span_text(source, name_span), "item_y"); + assert_eq!( + span_text(source, symbol.span()), + "for (item_y in Items(result)) Inner(result)" + ); +} diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index dc93dbce..969330dc 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -1,7 +1,10 @@ //! Expression and rule-body traversal for semantic-model collection. +use rowan::SyntaxNode; + use crate::Span; use crate::parser::ast; +use crate::parser::ast::{AstNode, find_identifier_span}; use crate::parser::ast::{Expr, RuleBodyTerm}; use crate::sema::model::{ DeclarationKind, ScopeId, ScopeKind, ScopeOrigin, SymbolOrigin, UseKind, UseOrigin, UseSite, @@ -23,6 +26,18 @@ impl SemanticModelBuilder { if let Ok(heads) = rule.heads() { for head in heads { self.collect_head_expr(&head.atom, ctx); + for binding_name in collect_head_binding_names(&head.atom) { + self.declare_symbol(SymbolSpec { + name: binding_name.clone(), + kind: DeclarationKind::RuleBinding, + origin: ctx.origin, + scope: ctx.scope, + span: ctx.span.clone(), + name_span: find_identifier_span(rule.syntax(), &binding_name), + source_order: self.symbols.len(), + visible_from_rule_order: 0, + }); + } if let Some(location) = head.location.as_ref() { self.walk_variable_uses( location, @@ -39,31 +54,25 @@ impl SemanticModelBuilder { UseOrigin::RelationHead, expr, ); - for binding_name in collect_head_binding_names(expr) { - self.declare_symbol(SymbolSpec { - name: binding_name, - kind: DeclarationKind::RuleBinding, - origin: ctx.origin, - scope: ctx.scope, - span: ctx.span.clone(), - source_order: self.symbols.len(), - visible_from_rule_order: 0, - }); - } } pub(crate) fn collect_rule_term( &mut self, context: VariableUseContext<'_>, + syntax: Option<&SyntaxNode>, term: &RuleBodyTerm, ) { match term { RuleBodyTerm::Expression(expr) => self.collect_expression_term(expr, context), - RuleBodyTerm::Assignment(assign) => self.collect_assignment_term(assign, context), + RuleBodyTerm::Assignment(assign) => { + self.collect_assignment_term(assign, syntax, context); + } RuleBodyTerm::Aggregation(aggregation) => { self.collect_aggregation_term(aggregation, context); } - RuleBodyTerm::ForLoop(for_loop) => self.collect_for_loop_term(for_loop, context), + RuleBodyTerm::ForLoop(for_loop) => { + self.collect_for_loop_term(for_loop, syntax, context); + } } } @@ -75,16 +84,18 @@ impl SemanticModelBuilder { fn collect_assignment_term( &mut self, assign: &ast::RuleAssignment, + syntax: Option<&SyntaxNode>, context: VariableUseContext<'_>, ) { self.walk_variable_uses(&assign.value, context); for binding_name in collect_pattern_binding_names(&assign.pattern) { self.declare_symbol(SymbolSpec { - name: binding_name, + name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: SymbolOrigin::AssignmentPattern, scope: context.current_scope(), span: context.span().clone(), + name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), source_order: self.symbols.len(), visible_from_rule_order: context.literal_index() + 1, }); @@ -103,6 +114,7 @@ impl SemanticModelBuilder { fn collect_for_loop_term( &mut self, for_loop: &ast::RuleForLoop, + syntax: Option<&SyntaxNode>, context: VariableUseContext<'_>, ) { self.record_top_level_relation_use(context, UseOrigin::ForIterable, &for_loop.iterable); @@ -122,11 +134,12 @@ impl SemanticModelBuilder { }); for binding_name in collect_pattern_binding_names(&for_loop.pattern) { self.declare_symbol(SymbolSpec { - name: binding_name, + name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: SymbolOrigin::ForPattern, scope: child_scope, span: context.span().clone(), + name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), source_order: self.symbols.len(), visible_from_rule_order: 0, }); @@ -140,6 +153,7 @@ impl SemanticModelBuilder { context.span(), context.rule_order_limit(), ), + None, nested_term, ); } From 9ff9ef450db9144d3985b687ccd06e789c808588 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 20:03:08 +0000 Subject: [PATCH 02/10] refactor(sema): extract program declaration spec generation to helper function Refactor collect_program_declarations by removing inlined closure and pattern matching, replacing it with a new helper method program_declaration_spec. This improves code readability and modularity without changing behavior. Co-authored-by: devboxerhub[bot] --- src/sema/builder.rs | 131 +++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 74 deletions(-) diff --git a/src/sema/builder.rs b/src/sema/builder.rs index c7261d8c..e51eb5b5 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -126,81 +126,10 @@ impl SemanticModelBuilder { pub(crate) fn collect_program_declarations(&mut self, root: &ast::Root) { let mut order = 0usize; - let mut declare_from_child = - |name: Option, - name_span: Option, - kind: DeclarationKind, - origin: SymbolOrigin, - child: &SyntaxNode| { - if let Some(name) = name { - self.declare_symbol(SymbolSpec { - name, - kind, - origin, - scope: self.program_scope, - span: text_range_to_span(child.text_range()), - name_span, - source_order: order, - visible_from_rule_order: 0, - }); - order += 1; - } - }; - for child in root.syntax().children() { - match child.kind() { - SyntaxKind::N_RELATION_DECL => declare_from_child( - { - let relation = ast::Relation { - syntax: child.clone(), - }; - relation.name() - }, - { - let relation = ast::Relation { - syntax: child.clone(), - }; - relation.name_span() - }, - DeclarationKind::Relation, - SymbolOrigin::RelationDeclaration, - &child, - ), - SyntaxKind::N_FUNCTION => declare_from_child( - { - let function = ast::Function { - syntax: child.clone(), - }; - function.name() - }, - { - let function = ast::Function { - syntax: child.clone(), - }; - function.name_span() - }, - DeclarationKind::Function, - SymbolOrigin::FunctionDeclaration, - &child, - ), - SyntaxKind::N_TYPE_DEF => declare_from_child( - { - let type_def = ast::TypeDef { - syntax: child.clone(), - }; - type_def.name() - }, - { - let type_def = ast::TypeDef { - syntax: child.clone(), - }; - type_def.name_span() - }, - DeclarationKind::Type, - SymbolOrigin::TypeDeclaration, - &child, - ), - _ => {} + if let Some(spec) = self.program_declaration_spec(&child, order) { + self.declare_symbol(spec); + order += 1; } } } @@ -320,6 +249,60 @@ impl SemanticModelBuilder { scope_id } + fn program_declaration_spec( + &self, + child: &SyntaxNode, + source_order: usize, + ) -> Option { + let (name, name_span, kind, origin) = match child.kind() { + SyntaxKind::N_RELATION_DECL => { + let relation = ast::Relation { + syntax: child.clone(), + }; + ( + relation.name(), + relation.name_span(), + DeclarationKind::Relation, + SymbolOrigin::RelationDeclaration, + ) + } + SyntaxKind::N_FUNCTION => { + let function = ast::Function { + syntax: child.clone(), + }; + ( + function.name(), + function.name_span(), + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + ) + } + SyntaxKind::N_TYPE_DEF => { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + ( + type_def.name(), + type_def.name_span(), + DeclarationKind::Type, + SymbolOrigin::TypeDeclaration, + ) + } + _ => return None, + }; + + name.map(|name| SymbolSpec { + name, + kind, + origin, + scope: self.program_scope, + span: text_range_to_span(child.text_range()), + name_span, + source_order, + visible_from_rule_order: 0, + }) + } + pub(crate) fn declare_symbol(&mut self, spec: SymbolSpec) -> SymbolId { let symbol_id = SymbolId(self.symbols.len()); self.symbols_by_scope_and_name From c74c8da77752e0fe2d5e50fa32e987aa0b17b000 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 23:05:00 +0000 Subject: [PATCH 03/10] feat(parser, sema): add precise binding spans for rule heads to semantic model This change augments the RuleHead AST with span information for the head atom and its bindings. It introduces functionality in the parser to collect and store identifier-token spans for bindings introduced by the head atom. The semantic model builder now uses these spans to accurately track declaration name spans for rule head bindings. Enhancements include: - New functions to find identifier spans within given spans for precise range matching. - Collection of binding spans in parsed rule heads excluding non-bindings like those following dots or certain punctuations. - Usage of binding spans in semantic symbol declarations for improved source mapping. - Additional tests to verify correct capturing and handling of name spans for rule head bindings. This feature improves diagnostic accuracy and tooling support by providing detailed source location information for rule head bindings. Co-authored-by: devboxerhub[bot] --- src/parser/ast/mod.rs | 28 ++++++++++++++++-- src/parser/ast/rule_head.rs | 57 ++++++++++++++++++++++++++++++++++++- src/sema/builder.rs | 14 ++++++--- src/sema/tests/name_span.rs | 30 +++++++++++++++++-- src/sema/traverse.rs | 9 +++--- 5 files changed, 123 insertions(+), 15 deletions(-) diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index bf0de449..d10ef002 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -24,15 +24,31 @@ pub(crate) trait AstNode { /// Find the first identifier token with matching text within a syntax subtree. #[must_use] pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &str) -> Option { + find_identifier_span_in_range(syntax, &text_range_to_span(syntax.text_range()), name) +} + +/// Find the first identifier token with matching text within a span-bounded subtree search. +#[must_use] +pub(crate) fn find_identifier_span_in_range( + syntax: &SyntaxNode, + search_span: &Span, + name: &str, +) -> Option { for child in syntax.children_with_tokens() { match child { NodeOrToken::Token(token) if token.kind() == SyntaxKind::T_IDENT && token.text() == name => { - return Some(text_range_to_span(token.text_range())); + let token_span = text_range_to_span(token.text_range()); + if span_contains(search_span, &token_span) { + return Some(token_span); + } } NodeOrToken::Node(node) => { - if let Some(span) = find_identifier_span(&node, name) { + if !ranges_overlap(search_span, &text_range_to_span(node.text_range())) { + continue; + } + if let Some(span) = find_identifier_span_in_range(&node, search_span, name) { return Some(span); } } @@ -43,6 +59,14 @@ pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &st None } +fn ranges_overlap(left: &Span, right: &Span) -> bool { + left.start < right.end && right.start < left.end +} + +fn span_contains(container: &Span, candidate: &Span) -> bool { + container.start <= candidate.start && candidate.end <= container.end +} + macro_rules! impl_ast_node { ($ty:ty) => { impl AstNode for $ty { diff --git a/src/parser/ast/rule_head.rs b/src/parser/ast/rule_head.rs index 019b93e0..b9aa1d15 100644 --- a/src/parser/ast/rule_head.rs +++ b/src/parser/ast/rule_head.rs @@ -88,6 +88,10 @@ pub struct RuleHead { pub atom: Expr, /// Optional `@` location expression. pub location: Option, + /// Source span covering only the head atom. + pub span: Span, + /// Precise identifier-token spans for bindings introduced by the head atom. + pub binding_spans: Vec<(String, Span)>, } pub(crate) fn parse_rule_heads( @@ -162,16 +166,67 @@ fn parse_rule_head_span( segment_offset.saturating_add(location_start), )?); - return Ok(Some(RuleHead { atom, location })); + let binding_spans = collect_head_binding_spans(atom_src, segment_offset); + + return Ok(Some(RuleHead { + atom, + location, + span: segment_offset..segment_offset.saturating_add(at_span.start), + binding_spans, + })); } + let binding_spans = collect_head_binding_spans(trimmed, segment_offset); let atom = lower_by_ref_head(parse_expr_with_offset(trimmed, segment_offset)?); Ok(Some(RuleHead { atom, location: None, + span: segment_offset..segment_offset.saturating_add(trimmed.len()), + binding_spans, })) } +fn collect_head_binding_spans(src: &str, base_offset: usize) -> Vec<(String, Span)> { + let tokens = tokenize_without_trivia(src); + let mut bindings = Vec::new(); + + for (index, (kind, span)) in tokens.iter().enumerate() { + if *kind != SyntaxKind::T_IDENT { + continue; + } + + let text = src.get(span.clone()).unwrap_or(""); + if should_skip_binding_ident(&tokens, index) || text == "_" { + continue; + } + + if bindings.iter().any(|(existing, _)| existing == text) { + continue; + } + + bindings.push(( + text.to_string(), + base_offset.saturating_add(span.start)..base_offset.saturating_add(span.end), + )); + } + + bindings +} + +fn should_skip_binding_ident(tokens: &[(SyntaxKind, Span)], index: usize) -> bool { + let prev_kind = index + .checked_sub(1) + .and_then(|prev| tokens.get(prev)) + .map(|(kind, _)| *kind); + let next_kind = tokens.get(index + 1).map(|(kind, _)| *kind); + + prev_kind == Some(SyntaxKind::T_DOT) + || matches!( + next_kind, + Some(SyntaxKind::T_LPAREN | SyntaxKind::T_LBRACE | SyntaxKind::T_COLON) + ) +} + fn parse_expr_with_offset(src: &str, base_offset: usize) -> Result>> { let (start, end) = trim_byte_range(src); let trimmed = src.get(start..end).unwrap_or(""); diff --git a/src/sema/builder.rs b/src/sema/builder.rs index e51eb5b5..555dd2e8 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -7,7 +7,6 @@ use rowan::SyntaxNode; use crate::Span; use crate::SyntaxKind; use crate::parser::ast; -use crate::parser::ast::AstNode; use crate::parser::ast::SemanticRule; use crate::parser::ast::rule::text_range_to_span; use crate::sema::model::{ @@ -155,9 +154,16 @@ impl SemanticModelBuilder { if let Ok(terms) = rule.body_terms() { let body_nodes = rule.body_expression_nodes(); + debug_assert_eq!( + body_nodes.len(), + terms.len(), + "rule body node/term count mismatch should stay visible during semantic collection" + ); - for (literal_index, (node, term)) in body_nodes.into_iter().zip(terms).enumerate() { - let span = node.span(); + for (literal_index, term) in terms.into_iter().enumerate() { + let maybe_node = body_nodes.get(literal_index); + let span = + maybe_node.map_or_else(|| rule_span.clone(), ast::RuleBodyExpression::span); self.collect_rule_term( super::variables::VariableUseContext::new( rule_scope, @@ -165,7 +171,7 @@ impl SemanticModelBuilder { &span, literal_index, ), - Some(node.syntax()), + maybe_node.map(ast::AstNode::syntax), &term, ); } diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs index dfed6154..1a743bd2 100644 --- a/src/sema/tests/name_span.rs +++ b/src/sema/tests/name_span.rs @@ -41,6 +41,13 @@ fn find_symbol<'a>( SymbolOrigin::FunctionDeclaration, "project" )] +#[case( + "function project(project: u32): u32 { project }", + "project", + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + "project" +)] #[case( "typedef UserId = u64", "UserId", @@ -67,14 +74,30 @@ fn top_level_declarations_capture_identifier_name_spans( assert_ne!(name_span, symbol.span()); } +#[rstest] +fn semantic_rule_head_bindings_leave_name_span_unavailable() { + let source = "for (item_y in Source(item_y)) Output(item_y)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "item_y", + DeclarationKind::RuleBinding, + SymbolOrigin::SemanticRuleHead, + ); + + assert!(symbol.name_span().is_none()); + assert_eq!(span_text(source, symbol.span()), source); +} + #[rstest] fn rule_head_bindings_capture_identifier_name_spans() { - let source = "Output(head_x) :- Source(_)."; + let source = "Output(Output) :- Output(_)."; let parsed = parse_ok(source); let semantic_model = super::super::build(&parsed); let symbol = find_symbol( &semantic_model, - "head_x", + "Output", DeclarationKind::RuleBinding, SymbolOrigin::RuleHead, ); @@ -83,7 +106,8 @@ fn rule_head_bindings_capture_identifier_name_spans() { .name_span() .unwrap_or_else(|| panic!("missing head binding name_span")); - assert_eq!(span_text(source, name_span), "head_x"); + assert_eq!(span_text(source, name_span), "Output"); + assert_eq!(name_span.start, 7); assert_eq!(span_text(source, symbol.span()), source); } diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 969330dc..16c064c4 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -4,14 +4,13 @@ use rowan::SyntaxNode; use crate::Span; use crate::parser::ast; -use crate::parser::ast::{AstNode, find_identifier_span}; -use crate::parser::ast::{Expr, RuleBodyTerm}; +use crate::parser::ast::{Expr, RuleBodyTerm, find_identifier_span}; use crate::sema::model::{ DeclarationKind, ScopeId, ScopeKind, ScopeOrigin, SymbolOrigin, UseKind, UseOrigin, UseSite, }; use super::builder::{ScopeSpec, SemanticModelBuilder, SymbolSpec}; -use super::resolve::{collect_head_binding_names, collect_pattern_binding_names, relation_name}; +use super::resolve::{collect_pattern_binding_names, relation_name}; use super::variables::VariableUseContext; #[derive(Clone, Copy)] @@ -26,14 +25,14 @@ impl SemanticModelBuilder { if let Ok(heads) = rule.heads() { for head in heads { self.collect_head_expr(&head.atom, ctx); - for binding_name in collect_head_binding_names(&head.atom) { + for (binding_name, name_span) in &head.binding_spans { self.declare_symbol(SymbolSpec { name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: ctx.origin, scope: ctx.scope, span: ctx.span.clone(), - name_span: find_identifier_span(rule.syntax(), &binding_name), + name_span: Some(name_span.clone()), source_order: self.symbols.len(), visible_from_rule_order: 0, }); From 03a0ad53458773b1ee331c92366aad48a304f61b Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 9 Apr 2026 11:54:52 +0000 Subject: [PATCH 04/10] fix(sema): pass None for CST handles in nested for loop terms Nested bodies recurse through `collect_rule_term`, but the `VariableUseContext::new(...)` path no longer retains CST handles for those inner terms. Passing `None` deliberately prevents nested bindings from claiming precise `name_span`s we cannot source faithfully. Co-authored-by: devboxerhub[bot] --- src/sema/traverse.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 16c064c4..0bcab243 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -145,6 +145,11 @@ impl SemanticModelBuilder { } for (term_offset, nested_term) in for_loop.body_terms.iter().enumerate() { + // Nested bodies recurse through `collect_rule_term`, but the + // `VariableUseContext::new(...)` path no longer retains CST handles + // for those inner terms. We therefore pass `None` here + // deliberately so nested bindings do not claim precise + // `name_span`s we cannot source faithfully. self.collect_rule_term( VariableUseContext::new( child_scope, From 1ade13736a2b16997cefb3312df8d045da66b174 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 9 Apr 2026 16:02:32 +0000 Subject: [PATCH 05/10] refactor(sema): extract pattern binding declaration into helper method Refactored `SemanticModelBuilder` to use a new `declare_pattern_binding` helper method when declaring pattern bindings for assignment and for-patterns. This removes duplication and centralizes symbol declaration logic. Additionally, improved identifier span search in AST by rewriting `find_identifier_span_in_range` to use iterative traversal with helper functions. Added related tests for identifier search correctness. Minor doc formatting fixes also included. Co-authored-by: devboxerhub[bot] --- docs/ddlint-design.md | 8 ++-- src/parser/ast/mod.rs | 108 +++++++++++++++++++++++++++++++++++++----- src/sema/traverse.rs | 57 ++++++++++++++-------- 3 files changed, 138 insertions(+), 35 deletions(-) diff --git a/docs/ddlint-design.md b/docs/ddlint-design.md index 044686bb..80075318 100644 --- a/docs/ddlint-design.md +++ b/docs/ddlint-design.md @@ -750,10 +750,10 @@ Rules such as `unused-variable` should prefer `name_span` for diagnostic highlighting and fall back to `span` only when precise token provenance is not available, such as parse-time `SemanticRule` values that retain only lowered expressions plus a coarse source span. The semantic model deliberately stays on -the `Span` side of that boundary; conversion into diagnostic -`rowan::TextRange` values happens in the linter layer through one shared -boundary helper in `src/linter/span_utils.rs`. This keeps a single conversion -point, rather than per-rule glue, for correctness diagnostics. +the `Span` side of that boundary; conversion into diagnostic `rowan::TextRange` +values happens in the linter layer through one shared boundary helper in +`src/linter/span_utils.rs`. This keeps a single conversion point, rather than +per-rule glue, for correctness diagnostics. Resolution is deliberately tri-state: diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index d10ef002..a3e80915 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -13,7 +13,7 @@ use rowan::{NodeOrToken, SyntaxElement, SyntaxNode}; use self::parse_utils::is_trivia; use crate::parser::ast::rule::text_range_to_span; -use crate::{DdlogLanguage, Span, SyntaxKind}; +use crate::{DdlogLanguage, Span, SyntaxKind, SyntaxToken}; /// Common interface for AST wrappers. pub(crate) trait AstNode { @@ -34,31 +34,51 @@ pub(crate) fn find_identifier_span_in_range( search_span: &Span, name: &str, ) -> Option { - for child in syntax.children_with_tokens() { - match child { - NodeOrToken::Token(token) - if token.kind() == SyntaxKind::T_IDENT && token.text() == name => - { + let mut stack = reversed_children(syntax); + + while let Some(element) = stack.pop() { + match element { + NodeOrToken::Token(token) => { + if !token_overlaps_range(&token, search_span) || !is_matching_ident(&token, name) { + continue; + } + let token_span = text_range_to_span(token.text_range()); if span_contains(search_span, &token_span) { return Some(token_span); } } NodeOrToken::Node(node) => { - if !ranges_overlap(search_span, &text_range_to_span(node.text_range())) { + if !node_overlaps_range(&node, search_span) { continue; } - if let Some(span) = find_identifier_span_in_range(&node, search_span, name) { - return Some(span); - } + + stack.extend(reversed_children(&node)); } - NodeOrToken::Token(_) => {} } } None } +fn reversed_children(syntax: &SyntaxNode) -> Vec> { + let mut children: Vec<_> = syntax.children_with_tokens().collect(); + children.reverse(); + children +} + +fn token_overlaps_range(token: &SyntaxToken, range: &Span) -> bool { + ranges_overlap(range, &text_range_to_span(token.text_range())) +} + +fn node_overlaps_range(node: &SyntaxNode, range: &Span) -> bool { + ranges_overlap(range, &text_range_to_span(node.text_range())) +} + +fn is_matching_ident(token: &SyntaxToken, name: &str) -> bool { + token.kind() == SyntaxKind::T_IDENT && token.text() == name +} + fn ranges_overlap(left: &Span, right: &Span) -> bool { left.start < right.end && right.start < left.end } @@ -230,12 +250,78 @@ mod tests { mod precedence; + use super::{AstNode, find_identifier_span_in_range}; use crate::parse; + fn first_rule(source: &str) -> super::Rule { + parse(source) + .root() + .rules() + .into_iter() + .next() + .unwrap_or_else(|| panic!("expected a parsed rule in `{source}`")) + } + + fn required_offset(source: &str, needle: &str) -> usize { + source + .find(needle) + .unwrap_or_else(|| panic!("expected `{needle}` in `{source}`")) + } + + fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) + } + #[test] fn root_collects_imports() { let src = "import foo"; let parsed = parse(src); assert_eq!(parsed.root().imports().len(), 1); } + + #[test] + fn identifier_search_picks_match_within_repeated_name_subtree() { + let source = "Output(foo) :- Source(foo), Other(foo)."; + let rule = first_rule(source); + let other_start = required_offset(source, "Other(foo)"); + let other_end = other_start + "Other(foo)".len(); + + let span = find_identifier_span_in_range(rule.syntax(), &(other_start..other_end), "foo") + .unwrap_or_else(|| panic!("expected foo within Other(foo) in `{source}`")); + + assert_eq!(span_text(source, &span), "foo"); + assert_eq!(span.start, other_start + "Other(".len()); + } + + #[test] + fn identifier_search_returns_none_for_non_overlapping_range() { + let source = "Output(foo) :- Source(foo), Other(foo)."; + let rule = first_rule(source); + let output_start = required_offset(source, "Output"); + let output_end = output_start + "Output".len(); + + assert_eq!( + find_identifier_span_in_range(rule.syntax(), &(output_start..output_end), "foo"), + None + ); + } + + #[test] + fn identifier_search_matches_exact_nested_token_range() { + let source = "Output(result) :- var assigned_x = Seed(result)."; + let rule = first_rule(source); + let assign_start = required_offset(source, "assigned_x"); + let assign_end = assign_start + "assigned_x".len(); + let token_span = assign_start..assign_end; + + let span = find_identifier_span_in_range(rule.syntax(), &token_span, "assigned_x") + .unwrap_or_else(|| panic!("expected exact nested token match in `{source}`")); + + assert_eq!(span, token_span); + } } diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 0bcab243..11b31d73 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -88,16 +88,14 @@ impl SemanticModelBuilder { ) { self.walk_variable_uses(&assign.value, context); for binding_name in collect_pattern_binding_names(&assign.pattern) { - self.declare_symbol(SymbolSpec { - name: binding_name.clone(), - kind: DeclarationKind::RuleBinding, - origin: SymbolOrigin::AssignmentPattern, - scope: context.current_scope(), - span: context.span().clone(), - name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), - source_order: self.symbols.len(), - visible_from_rule_order: context.literal_index() + 1, - }); + self.declare_pattern_binding( + &binding_name, + SymbolOrigin::AssignmentPattern, + context.current_scope(), + context.span(), + syntax, + context.literal_index() + 1, + ); } } @@ -132,16 +130,14 @@ impl SemanticModelBuilder { span: context.span().clone(), }); for binding_name in collect_pattern_binding_names(&for_loop.pattern) { - self.declare_symbol(SymbolSpec { - name: binding_name.clone(), - kind: DeclarationKind::RuleBinding, - origin: SymbolOrigin::ForPattern, - scope: child_scope, - span: context.span().clone(), - name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), - source_order: self.symbols.len(), - visible_from_rule_order: 0, - }); + self.declare_pattern_binding( + &binding_name, + SymbolOrigin::ForPattern, + child_scope, + context.span(), + syntax, + 0, + ); } for (term_offset, nested_term) in for_loop.body_terms.iter().enumerate() { @@ -184,4 +180,25 @@ impl SemanticModelBuilder { resolution, }); } + + fn declare_pattern_binding( + &mut self, + binding_name: &str, + origin: SymbolOrigin, + scope: ScopeId, + span: &Span, + syntax: Option<&SyntaxNode>, + visible_from_rule_order: usize, + ) { + self.declare_symbol(SymbolSpec { + name: binding_name.to_string(), + kind: DeclarationKind::RuleBinding, + origin, + scope, + span: span.clone(), + name_span: syntax.and_then(|syntax| find_identifier_span(syntax, binding_name)), + source_order: self.symbols.len(), + visible_from_rule_order, + }); + } } From 63e8c597fa4d8ea42d187773d8c77baa49cf8bcb Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 10 Apr 2026 01:07:05 +0000 Subject: [PATCH 06/10] test(sema): add test for missing name_span in nested for-loop bindings Add a new test verifying that nested for-loop bindings lack precise name_span information due to missing CST handles, ensuring semantic model correctly represents symbol spans in this case. Co-authored-by: devboxerhub[bot] --- src/sema/builder.rs | 4 ++++ src/sema/tests/name_span.rs | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/sema/builder.rs b/src/sema/builder.rs index 555dd2e8..1f27f3ef 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -200,6 +200,10 @@ impl SemanticModelBuilder { origin: SymbolOrigin::SemanticRuleHead, }, ); + // This is intentionally asymmetric with `traverse.rs`: AST rule-head + // bindings capture precise `SymbolSpec.name_span` values from + // `binding_spans`, but parse-time `SemanticRule` values do not + // retain the CST handles needed to recover those token spans here. for binding_name in collect_head_binding_names(rule.head()) { self.declare_symbol(SymbolSpec { name: binding_name, diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs index 1a743bd2..18859877 100644 --- a/src/sema/tests/name_span.rs +++ b/src/sema/tests/name_span.rs @@ -156,3 +156,22 @@ fn for_loop_bindings_capture_identifier_name_spans() { "for (item_y in Items(result)) Inner(result)" ); } + +#[rstest] +fn nested_for_loop_bindings_lack_name_span() { + let source = "Output(x) :- Source(x), for (outer in Items(x)) for (inner in Nested(outer)) Inner(inner)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "inner", + DeclarationKind::RuleBinding, + SymbolOrigin::ForPattern, + ); + + assert!(symbol.name_span().is_none()); + assert_eq!( + span_text(source, symbol.span()), + "for (outer in Items(x)) for (inner in Nested(outer)) Inner(inner)" + ); +} From ea413e1381dbe52083de696e79e9622c35768912 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 10 Apr 2026 01:54:45 +0000 Subject: [PATCH 07/10] refactor(sema): use PatternBindingSpec struct to group pattern binding parameters Refactored declare_pattern_binding method to accept a PatternBindingSpec struct instead of multiple separate parameters. This consolidates related data into a single struct for improved readability and maintainability in traverse.rs. Co-authored-by: devboxerhub[bot] --- src/sema/traverse.rs | 62 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 11b31d73..e4a23014 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -20,6 +20,16 @@ pub(super) struct RuleHeadContext<'a> { pub(super) origin: SymbolOrigin, } +#[derive(Clone, Copy)] +struct PatternBindingSpec<'a> { + binding_name: &'a str, + origin: SymbolOrigin, + scope: ScopeId, + span: &'a Span, + syntax: Option<&'a SyntaxNode>, + visible_from_rule_order: usize, +} + impl SemanticModelBuilder { pub(crate) fn collect_rule_heads(&mut self, ctx: RuleHeadContext<'_>, rule: &ast::Rule) { if let Ok(heads) = rule.heads() { @@ -88,14 +98,14 @@ impl SemanticModelBuilder { ) { self.walk_variable_uses(&assign.value, context); for binding_name in collect_pattern_binding_names(&assign.pattern) { - self.declare_pattern_binding( - &binding_name, - SymbolOrigin::AssignmentPattern, - context.current_scope(), - context.span(), + self.declare_pattern_binding(PatternBindingSpec { + binding_name: &binding_name, + origin: SymbolOrigin::AssignmentPattern, + scope: context.current_scope(), + span: context.span(), syntax, - context.literal_index() + 1, - ); + visible_from_rule_order: context.literal_index() + 1, + }); } } @@ -130,14 +140,14 @@ impl SemanticModelBuilder { span: context.span().clone(), }); for binding_name in collect_pattern_binding_names(&for_loop.pattern) { - self.declare_pattern_binding( - &binding_name, - SymbolOrigin::ForPattern, - child_scope, - context.span(), + self.declare_pattern_binding(PatternBindingSpec { + binding_name: &binding_name, + origin: SymbolOrigin::ForPattern, + scope: child_scope, + span: context.span(), syntax, - 0, - ); + visible_from_rule_order: 0, + }); } for (term_offset, nested_term) in for_loop.body_terms.iter().enumerate() { @@ -181,24 +191,18 @@ impl SemanticModelBuilder { }); } - fn declare_pattern_binding( - &mut self, - binding_name: &str, - origin: SymbolOrigin, - scope: ScopeId, - span: &Span, - syntax: Option<&SyntaxNode>, - visible_from_rule_order: usize, - ) { + fn declare_pattern_binding(&mut self, spec: PatternBindingSpec<'_>) { self.declare_symbol(SymbolSpec { - name: binding_name.to_string(), + name: spec.binding_name.to_string(), kind: DeclarationKind::RuleBinding, - origin, - scope, - span: span.clone(), - name_span: syntax.and_then(|syntax| find_identifier_span(syntax, binding_name)), + origin: spec.origin, + scope: spec.scope, + span: spec.span.clone(), + name_span: spec + .syntax + .and_then(|s| find_identifier_span(s, spec.binding_name)), source_order: self.symbols.len(), - visible_from_rule_order, + visible_from_rule_order: spec.visible_from_rule_order, }); } } From ba10b824af0b34d41fd74f3b730f9c01b63481f6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 10 Apr 2026 13:37:01 +0000 Subject: [PATCH 08/10] test(sema): parameterize binding declarations tests using rstest cases Refactor binding declaration tests into a single parameterized test function with multiple cases to improve readability and maintainability. The new test covers rule head bindings, assignment pattern bindings, and for loop bindings uniformly. Co-authored-by: devboxerhub[bot] --- src/sema/tests/name_span.rs | 94 ++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs index 18859877..5c770558 100644 --- a/src/sema/tests/name_span.rs +++ b/src/sema/tests/name_span.rs @@ -91,70 +91,56 @@ fn semantic_rule_head_bindings_leave_name_span_unavailable() { } #[rstest] -fn rule_head_bindings_capture_identifier_name_spans() { - let source = "Output(Output) :- Output(_)."; - let parsed = parse_ok(source); - let semantic_model = super::super::build(&parsed); - let symbol = find_symbol( - &semantic_model, - "Output", - DeclarationKind::RuleBinding, - SymbolOrigin::RuleHead, - ); - - let name_span = symbol - .name_span() - .unwrap_or_else(|| panic!("missing head binding name_span")); - - assert_eq!(span_text(source, name_span), "Output"); - assert_eq!(name_span.start, 7); - assert_eq!(span_text(source, symbol.span()), source); -} - -#[rstest] -fn assignment_bindings_capture_identifier_name_spans() { - let source = "Output(result) :- Source(result), var assigned_x = Seed(result)."; - let parsed = parse_ok(source); - let semantic_model = super::super::build(&parsed); - let symbol = find_symbol( - &semantic_model, - "assigned_x", - DeclarationKind::RuleBinding, - SymbolOrigin::AssignmentPattern, - ); - - let name_span = symbol - .name_span() - .unwrap_or_else(|| panic!("missing assignment binding name_span")); - - assert_eq!(span_text(source, name_span), "assigned_x"); - assert_eq!( - span_text(source, symbol.span()), - "var assigned_x = Seed(result)" - ); -} - -#[rstest] -fn for_loop_bindings_capture_identifier_name_spans() { - let source = "Output(result) :- Source(result), for (item_y in Items(result)) Inner(result)."; +#[case( + "Output(Output) :- Output(_).", + "Output", + SymbolOrigin::RuleHead, + "Output", + Some(7), + "Output(Output) :- Output(_)." +)] +#[case( + "Output(result) :- Source(result), var assigned_x = Seed(result).", + "assigned_x", + SymbolOrigin::AssignmentPattern, + "assigned_x", + None, + "var assigned_x = Seed(result)" +)] +#[case( + "Output(result) :- Source(result), for (item_y in Items(result)) Inner(result).", + "item_y", + SymbolOrigin::ForPattern, + "item_y", + None, + "for (item_y in Items(result)) Inner(result)" +)] +fn binding_declarations_capture_identifier_name_spans( + #[case] source: &str, + #[case] symbol_name: &str, + #[case] origin: SymbolOrigin, + #[case] expected_name_text: &str, + #[case] expected_name_span_start: Option, + #[case] expected_span_text: &str, +) { let parsed = parse_ok(source); let semantic_model = super::super::build(&parsed); let symbol = find_symbol( &semantic_model, - "item_y", + symbol_name, DeclarationKind::RuleBinding, - SymbolOrigin::ForPattern, + origin, ); let name_span = symbol .name_span() - .unwrap_or_else(|| panic!("missing for-loop binding name_span")); + .unwrap_or_else(|| panic!("missing binding name_span for `{symbol_name}`")); - assert_eq!(span_text(source, name_span), "item_y"); - assert_eq!( - span_text(source, symbol.span()), - "for (item_y in Items(result)) Inner(result)" - ); + assert_eq!(span_text(source, name_span), expected_name_text); + if let Some(start) = expected_name_span_start { + assert_eq!(name_span.start, start); + } + assert_eq!(span_text(source, symbol.span()), expected_span_text); } #[rstest] From ffa7d4434662a46a3649e413da5276842364592d Mon Sep 17 00:00:00 2001 From: Leynos Date: Fri, 10 Apr 2026 19:00:45 +0000 Subject: [PATCH 09/10] feat(parser, sema): add precise identifier span sourcing for declarations and bindings This change enhances diagnostic precision by capturing exact spans of declaration and rule-local binding identifiers during parsing and semantic collection rather than re-walking the CST during linting. - Added `find_identifier_span` helpers in `src/parser/ast/mod.rs` for locating identifier tokens within syntax nodes or ranges. - Exposed `name_span()` accessors in AST nodes for functions, relations, and typedefs. - Implemented `RuleHead::binding_spans` in `src/parser/ast/rule_head.rs` to record declaration-like binding identifiers from rule heads. - Modified semantic model builder and traverser to thread and record precise `name_span` information for top-level declarations and rule-local bindings. - Added extensive tests to validate correct span extraction and binding recognition. - Documented implementation details in `docs/parser-implementation-notes.md`. This improves diagnostics by providing accurate source spans that tools and linters can utilize consistently. Co-authored-by: devboxerhub[bot] --- docs/parser-implementation-notes.md | 35 ++++++++++ src/parser/ast/function.rs | 31 +++++++++ src/parser/ast/mod.rs | 73 ++------------------ src/parser/ast/relation.rs | 30 +++++++++ src/parser/ast/rule_head.rs | 16 +++++ src/parser/ast/rule_head/tests.rs | 59 ++++++++++++++++ src/parser/ast/tests/identifier_span.rs | 89 +++++++++++++++++++++++++ src/parser/ast/type_def.rs | 30 +++++++++ src/sema/tests/name_span.rs | 2 +- src/sema/traverse.rs | 2 + 10 files changed, 300 insertions(+), 67 deletions(-) create mode 100644 src/parser/ast/rule_head/tests.rs create mode 100644 src/parser/ast/tests/identifier_span.rs diff --git a/docs/parser-implementation-notes.md b/docs/parser-implementation-notes.md index b0448229..1aa492da 100644 --- a/docs/parser-implementation-notes.md +++ b/docs/parser-implementation-notes.md @@ -213,6 +213,41 @@ Current guarantees are intentionally narrow: - Name resolution records `Resolved`, `Unresolved`, and `Ignored` outcomes rather than emitting diagnostics directly. +### Precise identifier span sourcing + +Precise diagnostic spans for declarations and rule-local bindings are captured +once during syntax or semantic collection rather than by re-walking the CST in +lint rules. + +Implementation points: + +- `src/parser/ast/mod.rs` exposes `find_identifier_span(...)` and + `find_identifier_span_in_range(...)`, which locate the first matching + `T_IDENT` token within a syntax subtree or span-bounded search window. +- `src/parser/ast/function.rs`, `src/parser/ast/relation.rs`, and + `src/parser/ast/type_def.rs` use that helper to expose `name_span()` for + top-level declaration identifiers. +- `src/parser/ast/rule_head.rs` computes `RuleHead::binding_spans` while + parsing head text. `collect_head_binding_spans(...)` tokenizes the head atom + slice, filters out structural identifiers such as relation callees and field + labels, and records only declaration-like binding identifiers with absolute + source spans. + +Semantic collection then threads those precise spans into the owned +`SemanticModel`: + +- `src/sema/builder.rs` records top-level declaration `name_span` values while + AST wrappers are already in hand. +- `src/sema/traverse.rs` consumes `RuleHead::binding_spans` for AST-backed rule + heads and threads optional CST `syntax` handles through + `collect_rule_term(...)`, `collect_assignment_term(...)`, and + `collect_for_loop_term(...)` so assignment and `for` pattern bindings can + resolve `SymbolSpec.name_span` during collection. +- Where CST handles are not preserved faithfully, semantic collection leaves + `SymbolSpec.name_span` unset by design. This includes synthetic + `SemanticRuleHead` bindings produced by top-level `for` lowering and nested + `for` body traversals that recurse with `syntax: None`. + Similarly, collection literal lowering is not part of the base `parse()` pipeline: diff --git a/src/parser/ast/function.rs b/src/parser/ast/function.rs index 2953cb7b..029e609d 100644 --- a/src/parser/ast/function.rs +++ b/src/parser/ast/function.rs @@ -83,6 +83,16 @@ impl_ast_node!(Function); mod tests { use crate::parse; + + fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) + } + #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")] #[test] fn function_name() { @@ -96,4 +106,25 @@ mod tests { .expect("function missing"); assert_eq!(func.name().as_deref(), Some("foo")); } + + #[test] + fn function_name_span_points_to_declaration_identifier() { + let source = "function project(project: u32): u32 { project }"; + let parsed = parse(source); + crate::test_util::assert_no_parse_errors(parsed.errors()); + #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")] + let func = parsed + .root() + .functions() + .first() + .cloned() + .expect("function missing"); + + let span = func + .name_span() + .unwrap_or_else(|| panic!("missing function name_span in `{source}`")); + + assert_eq!(span_text(source, &span), "project"); + assert_eq!(span.start, "function ".len()); + } } diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index a3e80915..f2001caa 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -61,28 +61,34 @@ pub(crate) fn find_identifier_span_in_range( None } +/// Collect child elements in reverse order so a stack preserves source order. fn reversed_children(syntax: &SyntaxNode) -> Vec> { let mut children: Vec<_> = syntax.children_with_tokens().collect(); children.reverse(); children } +/// Whether a token's byte span overlaps the search range at all. fn token_overlaps_range(token: &SyntaxToken, range: &Span) -> bool { ranges_overlap(range, &text_range_to_span(token.text_range())) } +/// Whether a node's byte span overlaps the search range at all. fn node_overlaps_range(node: &SyntaxNode, range: &Span) -> bool { ranges_overlap(range, &text_range_to_span(node.text_range())) } +/// Whether the token is the identifier text the caller is searching for. fn is_matching_ident(token: &SyntaxToken, name: &str) -> bool { token.kind() == SyntaxKind::T_IDENT && token.text() == name } +/// Whether two byte ranges intersect. fn ranges_overlap(left: &Span, right: &Span) -> bool { left.start < right.end && right.start < left.end } +/// Whether the candidate span sits entirely within the container span. fn span_contains(container: &Span, candidate: &Span) -> bool { container.start <= candidate.start && candidate.end <= container.end } @@ -248,80 +254,15 @@ pub use type_def::TypeDef; #[cfg(test)] mod tests { + mod identifier_span; mod precedence; - use super::{AstNode, find_identifier_span_in_range}; use crate::parse; - fn first_rule(source: &str) -> super::Rule { - parse(source) - .root() - .rules() - .into_iter() - .next() - .unwrap_or_else(|| panic!("expected a parsed rule in `{source}`")) - } - - fn required_offset(source: &str, needle: &str) -> usize { - source - .find(needle) - .unwrap_or_else(|| panic!("expected `{needle}` in `{source}`")) - } - - fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { - source.get(span.start..span.end).unwrap_or_else(|| { - panic!( - "invalid UTF-8 boundary for span {}..{} in `{source}`", - span.start, span.end - ) - }) - } - #[test] fn root_collects_imports() { let src = "import foo"; let parsed = parse(src); assert_eq!(parsed.root().imports().len(), 1); } - - #[test] - fn identifier_search_picks_match_within_repeated_name_subtree() { - let source = "Output(foo) :- Source(foo), Other(foo)."; - let rule = first_rule(source); - let other_start = required_offset(source, "Other(foo)"); - let other_end = other_start + "Other(foo)".len(); - - let span = find_identifier_span_in_range(rule.syntax(), &(other_start..other_end), "foo") - .unwrap_or_else(|| panic!("expected foo within Other(foo) in `{source}`")); - - assert_eq!(span_text(source, &span), "foo"); - assert_eq!(span.start, other_start + "Other(".len()); - } - - #[test] - fn identifier_search_returns_none_for_non_overlapping_range() { - let source = "Output(foo) :- Source(foo), Other(foo)."; - let rule = first_rule(source); - let output_start = required_offset(source, "Output"); - let output_end = output_start + "Output".len(); - - assert_eq!( - find_identifier_span_in_range(rule.syntax(), &(output_start..output_end), "foo"), - None - ); - } - - #[test] - fn identifier_search_matches_exact_nested_token_range() { - let source = "Output(result) :- var assigned_x = Seed(result)."; - let rule = first_rule(source); - let assign_start = required_offset(source, "assigned_x"); - let assign_end = assign_start + "assigned_x".len(); - let token_span = assign_start..assign_end; - - let span = find_identifier_span_in_range(rule.syntax(), &token_span, "assigned_x") - .unwrap_or_else(|| panic!("expected exact nested token match in `{source}`")); - - assert_eq!(span, token_span); - } } diff --git a/src/parser/ast/relation.rs b/src/parser/ast/relation.rs index bd78bf88..cb5652ca 100644 --- a/src/parser/ast/relation.rs +++ b/src/parser/ast/relation.rs @@ -145,6 +145,15 @@ mod tests { use crate::parse; + fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) + } + #[test] fn relation_name() { let parsed = parse("input relation R(x: u32)"); @@ -158,4 +167,25 @@ mod tests { assert_eq!(rel.name().as_deref(), Some("R")); assert!(rel.is_input()); } + + #[test] + fn relation_name_span_points_to_declaration_identifier() { + let source = "input relation Source(Source: u32)"; + let parsed = parse(source); + crate::test_util::assert_no_parse_errors(parsed.errors()); + #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")] + let rel = parsed + .root() + .relations() + .first() + .cloned() + .expect("relation missing"); + + let span = rel + .name_span() + .unwrap_or_else(|| panic!("missing relation name_span in `{source}`")); + + assert_eq!(span_text(source, &span), "Source"); + assert_eq!(span.start, "input relation ".len()); + } } diff --git a/src/parser/ast/rule_head.rs b/src/parser/ast/rule_head.rs index b9aa1d15..f74142e0 100644 --- a/src/parser/ast/rule_head.rs +++ b/src/parser/ast/rule_head.rs @@ -55,6 +55,7 @@ fn process_token_for_head_text( } } +/// Extract the first head segment text from a rule syntax node. pub(crate) fn first_head_text(syntax: &rowan::SyntaxNode) -> Option { use rowan::NodeOrToken; @@ -94,6 +95,7 @@ pub struct RuleHead { pub binding_spans: Vec<(String, Span)>, } +/// Parse the comma-separated heads at the start of a rule. pub(crate) fn parse_rule_heads( rule_src: &str, base_offset: usize, @@ -186,6 +188,11 @@ fn parse_rule_head_span( })) } +/// Collect unique binding identifier spans from a parsed head atom slice. +/// +/// The scanner keeps only declaration-like identifiers. Relation callees, +/// dotted access segments, constructor-like identifiers, and wildcards are +/// skipped so the returned spans can feed `RuleHead::binding_spans`. fn collect_head_binding_spans(src: &str, base_offset: usize) -> Vec<(String, Span)> { let tokens = tokenize_without_trivia(src); let mut bindings = Vec::new(); @@ -213,6 +220,11 @@ fn collect_head_binding_spans(src: &str, base_offset: usize) -> Vec<(String, Spa bindings } +/// Whether the identifier token at `index` is structural rather than a binding. +/// +/// Identifiers used as dotted members or immediately followed by `(`, `{`, or +/// `:` are treated as relation names, constructors, or field labels and are +/// therefore excluded from head-binding collection. fn should_skip_binding_ident(tokens: &[(SyntaxKind, Span)], index: usize) -> bool { let prev_kind = index .checked_sub(1) @@ -358,3 +370,7 @@ fn parse_delay_suffix( let full = minus_span.start..tokens.last().map_or(minus_span.end, |(_, sp)| sp.end); Ok(Some(DelaySuffix { full, value })) } + +#[cfg(test)] +#[path = "rule_head/tests.rs"] +mod tests; diff --git a/src/parser/ast/rule_head/tests.rs b/src/parser/ast/rule_head/tests.rs new file mode 100644 index 00000000..a7853c92 --- /dev/null +++ b/src/parser/ast/rule_head/tests.rs @@ -0,0 +1,59 @@ +//! Tests for rule-head binding span helpers. + +use super::{collect_head_binding_spans, should_skip_binding_ident}; +use crate::{Span, SyntaxKind, tokenize_without_trivia}; + +fn ident_indices(tokens: &[(SyntaxKind, Span)]) -> Vec { + tokens + .iter() + .enumerate() + .filter_map(|(index, (kind, _))| (*kind == SyntaxKind::T_IDENT).then_some(index)) + .collect() +} + +#[test] +fn collect_head_binding_spans_skips_relation_callee_collision() { + let source = "Output(Output)"; + + let spans = collect_head_binding_spans(source, 0); + + assert_eq!(spans.as_slice(), &[("Output".to_string(), 7..13)]); +} + +#[test] +fn should_skip_binding_ident_rejects_callee_but_keeps_argument_binding() { + let tokens = tokenize_without_trivia("Output(Output)"); + let indices = ident_indices(&tokens); + let [callee_index, binding_index] = indices.as_slice() else { + panic!("expected two identifier tokens in `Output(Output)`"); + }; + + assert_eq!(indices.len(), 2); + assert!(should_skip_binding_ident(&tokens, *callee_index)); + assert!(!should_skip_binding_ident(&tokens, *binding_index)); +} + +#[test] +fn should_skip_binding_ident_rejects_dotted_and_constructor_positions() { + let tokens = tokenize_without_trivia("Output(record.field, Item { value: value })"); + let indices = ident_indices(&tokens); + let [ + callee_index, + record_index, + field_index, + constructor_index, + field_label_index, + binding_index, + ] = indices.as_slice() + else { + panic!("expected six identifier tokens in dotted/constructor sample"); + }; + + assert_eq!(indices.len(), 6); + assert!(should_skip_binding_ident(&tokens, *callee_index)); + assert!(!should_skip_binding_ident(&tokens, *record_index)); + assert!(should_skip_binding_ident(&tokens, *field_index)); + assert!(should_skip_binding_ident(&tokens, *constructor_index)); + assert!(should_skip_binding_ident(&tokens, *field_label_index)); + assert!(!should_skip_binding_ident(&tokens, *binding_index)); +} diff --git a/src/parser/ast/tests/identifier_span.rs b/src/parser/ast/tests/identifier_span.rs new file mode 100644 index 00000000..87272180 --- /dev/null +++ b/src/parser/ast/tests/identifier_span.rs @@ -0,0 +1,89 @@ +//! Tests for identifier-span discovery helpers. + +use super::super::{AstNode, find_identifier_span, find_identifier_span_in_range}; +use crate::parse; + +fn first_rule(source: &str) -> super::super::Rule { + parse(source) + .root() + .rules() + .into_iter() + .next() + .unwrap_or_else(|| panic!("expected a parsed rule in `{source}`")) +} + +fn required_offset(source: &str, needle: &str) -> usize { + source + .find(needle) + .unwrap_or_else(|| panic!("expected `{needle}` in `{source}`")) +} + +fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) +} + +#[test] +fn identifier_search_picks_match_within_repeated_name_subtree() { + let source = "Output(foo) :- Source(foo), Other(foo)."; + let rule = first_rule(source); + let other_start = required_offset(source, "Other(foo)"); + let other_end = other_start + "Other(foo)".len(); + + let span = find_identifier_span_in_range(rule.syntax(), &(other_start..other_end), "foo") + .unwrap_or_else(|| panic!("expected foo within Other(foo) in `{source}`")); + + assert_eq!(span_text(source, &span), "foo"); + assert_eq!(span.start, other_start + "Other(".len()); +} + +#[test] +fn identifier_search_returns_none_for_non_overlapping_range() { + let source = "Output(foo) :- Source(foo), Other(foo)."; + let rule = first_rule(source); + let output_start = required_offset(source, "Output"); + let output_end = output_start + "Output".len(); + + assert_eq!( + find_identifier_span_in_range(rule.syntax(), &(output_start..output_end), "foo"), + None + ); +} + +#[test] +fn identifier_search_matches_exact_nested_token_range() { + let source = "Output(result) :- var assigned_x = Seed(result)."; + let rule = first_rule(source); + let assign_start = required_offset(source, "assigned_x"); + let assign_end = assign_start + "assigned_x".len(); + let token_span = assign_start..assign_end; + + let span = find_identifier_span_in_range(rule.syntax(), &token_span, "assigned_x") + .unwrap_or_else(|| panic!("expected exact nested token match in `{source}`")); + + assert_eq!(span, token_span); +} + +#[test] +fn identifier_search_returns_none_when_name_is_absent() { + let source = "Output(result) :- Source(result)."; + let rule = first_rule(source); + + assert_eq!(find_identifier_span(rule.syntax(), "missing_name"), None); +} + +#[test] +fn identifier_search_prefers_first_match_in_source_order() { + let source = "Output(foo) :- Nested(foo, foo)."; + let rule = first_rule(source); + + let span = find_identifier_span(rule.syntax(), "foo") + .unwrap_or_else(|| panic!("expected to find foo in `{source}`")); + + assert_eq!(span_text(source, &span), "foo"); + assert_eq!(span.start, required_offset(source, "foo")); +} diff --git a/src/parser/ast/type_def.rs b/src/parser/ast/type_def.rs index 30023d35..3bb4fec8 100644 --- a/src/parser/ast/type_def.rs +++ b/src/parser/ast/type_def.rs @@ -48,6 +48,15 @@ mod tests { use crate::parse; + fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) + } + #[test] fn extern_type_parsed() { let parsed = parse("extern type Handle"); @@ -77,4 +86,25 @@ mod tests { assert_eq!(td.name().as_deref(), Some("UserId")); assert!(!td.is_extern()); } + + #[test] + fn typedef_name_span_points_to_declaration_identifier() { + let source = "typedef UserId = UserId"; + let parsed = parse(source); + crate::test_util::assert_no_parse_errors(parsed.errors()); + #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")] + let td = parsed + .root() + .type_defs() + .first() + .cloned() + .expect("typedef missing"); + + let span = td + .name_span() + .unwrap_or_else(|| panic!("missing typedef name_span in `{source}`")); + + assert_eq!(span_text(source, &span), "UserId"); + assert_eq!(span.start, "typedef ".len()); + } } diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs index 5c770558..5df8d01b 100644 --- a/src/sema/tests/name_span.rs +++ b/src/sema/tests/name_span.rs @@ -96,7 +96,7 @@ fn semantic_rule_head_bindings_leave_name_span_unavailable() { "Output", SymbolOrigin::RuleHead, "Output", - Some(7), + Some(7_usize), "Output(Output) :- Output(_)." )] #[case( diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index e4a23014..514d088e 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -20,6 +20,7 @@ pub(super) struct RuleHeadContext<'a> { pub(super) origin: SymbolOrigin, } +/// Collected inputs needed to declare a pattern binding symbol. #[derive(Clone, Copy)] struct PatternBindingSpec<'a> { binding_name: &'a str, @@ -191,6 +192,7 @@ impl SemanticModelBuilder { }); } + /// Declare a rule-body pattern binding with optional precise CST-backed span data. fn declare_pattern_binding(&mut self, spec: PatternBindingSpec<'_>) { self.declare_symbol(SymbolSpec { name: spec.binding_name.to_string(), From 34d1ade5ea32e50a3fa58ad84790f8587c136ee6 Mon Sep 17 00:00:00 2001 From: Leynos Date: Sat, 11 Apr 2026 14:38:19 +0000 Subject: [PATCH 10/10] refactor(tests): centralize span_text utility for cleaner test code Replaced duplicated local span_text functions in multiple AST test modules with a single shared implementation from src/test_util/mod.rs. This reduces code duplication and improves test utility usage. Also adjusted semantic builder logic to safely handle rule body nodes, ensuring debug assertions remain valid during semantic collection. Minor docs update for identifier span sourcing explanatory text. Co-authored-by: devboxerhub[bot] --- docs/parser-implementation-notes.md | 4 ++-- src/parser/ast/function.rs | 11 +---------- src/parser/ast/relation.rs | 11 +---------- src/parser/ast/tests/identifier_span.rs | 11 +---------- src/parser/ast/type_def.rs | 11 +---------- src/sema/builder.rs | 10 ++++++++-- src/test_util/mod.rs | 22 ++++++++++++++++++++++ 7 files changed, 36 insertions(+), 44 deletions(-) diff --git a/docs/parser-implementation-notes.md b/docs/parser-implementation-notes.md index 1aa492da..96e39d1d 100644 --- a/docs/parser-implementation-notes.md +++ b/docs/parser-implementation-notes.md @@ -216,8 +216,8 @@ Current guarantees are intentionally narrow: ### Precise identifier span sourcing Precise diagnostic spans for declarations and rule-local bindings are captured -once during syntax or semantic collection rather than by re-walking the CST in -lint rules. +once, either during syntax collection or during semantic collection, rather +than by re-walking the CST in lint rules. Implementation points: diff --git a/src/parser/ast/function.rs b/src/parser/ast/function.rs index 029e609d..6c51dc2a 100644 --- a/src/parser/ast/function.rs +++ b/src/parser/ast/function.rs @@ -82,16 +82,7 @@ impl_ast_node!(Function); #[cfg(test)] mod tests { - use crate::parse; - - fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { - source.get(span.start..span.end).unwrap_or_else(|| { - panic!( - "invalid UTF-8 boundary for span {}..{} in `{source}`", - span.start, span.end - ) - }) - } + use crate::{parse, test_util::span_text}; #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")] #[test] diff --git a/src/parser/ast/relation.rs b/src/parser/ast/relation.rs index cb5652ca..c18a75e9 100644 --- a/src/parser/ast/relation.rs +++ b/src/parser/ast/relation.rs @@ -143,16 +143,7 @@ impl_ast_node!(Relation); #[cfg(test)] mod tests { - use crate::parse; - - fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { - source.get(span.start..span.end).unwrap_or_else(|| { - panic!( - "invalid UTF-8 boundary for span {}..{} in `{source}`", - span.start, span.end - ) - }) - } + use crate::{parse, test_util::span_text}; #[test] fn relation_name() { diff --git a/src/parser/ast/tests/identifier_span.rs b/src/parser/ast/tests/identifier_span.rs index 87272180..a23aa111 100644 --- a/src/parser/ast/tests/identifier_span.rs +++ b/src/parser/ast/tests/identifier_span.rs @@ -1,7 +1,7 @@ //! Tests for identifier-span discovery helpers. use super::super::{AstNode, find_identifier_span, find_identifier_span_in_range}; -use crate::parse; +use crate::{parse, test_util::span_text}; fn first_rule(source: &str) -> super::super::Rule { parse(source) @@ -18,15 +18,6 @@ fn required_offset(source: &str, needle: &str) -> usize { .unwrap_or_else(|| panic!("expected `{needle}` in `{source}`")) } -fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { - source.get(span.start..span.end).unwrap_or_else(|| { - panic!( - "invalid UTF-8 boundary for span {}..{} in `{source}`", - span.start, span.end - ) - }) -} - #[test] fn identifier_search_picks_match_within_repeated_name_subtree() { let source = "Output(foo) :- Source(foo), Other(foo)."; diff --git a/src/parser/ast/type_def.rs b/src/parser/ast/type_def.rs index 3bb4fec8..609b46fe 100644 --- a/src/parser/ast/type_def.rs +++ b/src/parser/ast/type_def.rs @@ -46,16 +46,7 @@ impl_ast_node!(TypeDef); #[cfg(test)] mod tests { - use crate::parse; - - fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { - source.get(span.start..span.end).unwrap_or_else(|| { - panic!( - "invalid UTF-8 boundary for span {}..{} in `{source}`", - span.start, span.end - ) - }) - } + use crate::{parse, test_util::span_text}; #[test] fn extern_type_parsed() { diff --git a/src/sema/builder.rs b/src/sema/builder.rs index 1f27f3ef..9246eb07 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -154,14 +154,20 @@ impl SemanticModelBuilder { if let Ok(terms) = rule.body_terms() { let body_nodes = rule.body_expression_nodes(); + let body_nodes_len = body_nodes.len(); + let counts_match = body_nodes_len == terms.len(); debug_assert_eq!( - body_nodes.len(), + body_nodes_len, terms.len(), "rule body node/term count mismatch should stay visible during semantic collection" ); for (literal_index, term) in terms.into_iter().enumerate() { - let maybe_node = body_nodes.get(literal_index); + let maybe_node = if counts_match && literal_index < body_nodes_len { + body_nodes.get(literal_index) + } else { + None + }; let span = maybe_node.map_or_else(|| rule_span.clone(), ast::RuleBodyExpression::span); self.collect_rule_term( diff --git a/src/test_util/mod.rs b/src/test_util/mod.rs index bada16ba..d86a0b8d 100644 --- a/src/test_util/mod.rs +++ b/src/test_util/mod.rs @@ -44,6 +44,28 @@ pub fn tokenize(src: &str) -> Vec<(SyntaxKind, Span)> { tokenize_with_trivia(src) } +/// Slice `source` by a validated byte span for test assertions. +/// +/// # Examples +/// +/// ```rust,no_run +/// # #[cfg(feature = "test-support")] +/// # { +/// use ddlint::test_util::span_text; +/// +/// assert_eq!(span_text("abcdef", &(1..4)), "bcd"); +/// # } +/// ``` +#[must_use] +pub fn span_text<'a>(source: &'a str, span: &Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) +} + /// Typed wrapper for variable and function names. #[derive(Debug, Clone)] pub struct Name(pub(crate) String);