From 7783aabdc5ec221043a9e602da9245f7c6345356 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 15:18:08 +0000 Subject: [PATCH 1/5] feat(sema): add precise identifier spans for declarations and bindings This introduces an optional `name_span` field on symbols, capturing the exact identifier token span when available. The semantic model builder and AST nodes now extract and store precise name spans for top-level declarations (relations, functions, types) and for rule-local bindings (head bindings, assignments, for-loop patterns). The unused-variable lint and diagnostics have been updated to use these precise spans where possible for improved diagnostic highlighting. Also includes new semantic-model tests verifying that the precise identifier spans are captured and differ from the coarser span previously used. Co-authored-by: devboxerhub[bot] --- docs/ddlint-design.md | 47 +++--- .../rules/correctness/unused_variable.rs | 11 +- src/parser/ast/function.rs | 7 + src/parser/ast/mod.rs | 27 +++- src/parser/ast/relation.rs | 7 + src/parser/ast/type_def.rs | 7 + src/sema/builder.rs | 84 ++++++++--- src/sema/model.rs | 7 + src/sema/tests.rs | 1 + src/sema/tests/name_span.rs | 134 ++++++++++++++++++ src/sema/traverse.rs | 44 ++++-- 11 files changed, 316 insertions(+), 60 deletions(-) create mode 100644 src/sema/tests/name_span.rs diff --git a/docs/ddlint-design.md b/docs/ddlint-design.md index a8c8fd06..7279da7b 100644 --- a/docs/ddlint-design.md +++ b/docs/ddlint-design.md @@ -447,7 +447,9 @@ The core contracts are: semantic failures without leaking internal implementation types. - Provenance: semantic nodes must retain source mappings back to the lossless syntax layer for rule heads, body terms, aggregations, attributes, and - adornments, so diagnostics and later rewrites remain explainable. + adornments, so diagnostics and later rewrites remain explainable. For symbol + records specifically, coarse declaration or literal provenance must stay + distinct from any precise identifier-token span used for diagnostics. - Semantic model completeness: `ddlog-sema` must expose typed structures for constructs that downstream planners care about, including joins, filters, projections, aggregations, recursion boundaries, and semantic facts such as @@ -734,10 +736,20 @@ genuine reads. `for`-loop pattern, and it treats a binding as used only when semantic resolution maps a `UseKind::Variable` site back to that exact symbol. The wildcard `_` remains an explicit ignore and is never recorded as a -warning-eligible binding. As of roadmap item `4.1.2`, rule-local binding spans -still point at the enclosing rule or literal site rather than the exact -identifier token, so diagnostics for these rules use that existing coarse -provenance. +warning-eligible binding. + +Each `Symbol` now carries two related provenance fields: + +- `span` remains the existing coarse provenance used for debugging, maps, and + ownership; it points at the enclosing declaration, rule, or body term that + introduced the symbol. +- `name_span` is an additive optional field that points at the precise + identifier token when semantic collection had CST access to capture it. + +Rules such as `unused-variable` should prefer `name_span` for diagnostic +highlighting and fall back to `span` only when precise token provenance is not +available, such as parse-time `SemanticRule` values that retain only lowered +expressions plus a coarse source span. Resolution is deliberately tri-state: @@ -759,8 +771,9 @@ body scope and do not escape to later top-level literals. This pass consumes both AST-backed rules and parse-time `SemanticRule` values from top-level `for` desugaring, so the semantic view matches the parser's -existing rule surface. Provenance is retained through source spans on -declarations and on the enclosing literal or rule site for rule-local facts. +existing rule surface. Provenance is retained through coarse source spans on +declarations and on the enclosing literal or rule site for rule-local facts, +with precise identifier-token spans captured additively when CST access exists. ### 3.3. Initial lint rule catalog @@ -771,16 +784,16 @@ suggestions, establishing a solid foundation of essential lints. Table: DDLint rule catalogue and metadata. -| Rule Name | Group | Default Level | Autofixable | Description | -| ---------------------- | ----------- | ------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| unused-relation | correctness | warn | No | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes. | -| unused-variable | correctness | warn | No | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore. | -| shadowed-variable | correctness | warn | No | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body. | -| recursive-negation | correctness | error | No | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs. | -| inefficient-join-order | performance | hint | No | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first. | -| superfluous-group-by | performance | warn | Yes | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed. | -| consistent-casing | style | allow | Yes | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case). | -| no-magic-numbers | style | allow | No | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer. | +| Rule Name | Group | Default Level | Autofixable | Description | +| ---------------------- | ----------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| unused-relation | correctness | warn | No | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes. | +| unused-variable | correctness | warn | No | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore, and diagnostics prefer precise identifier-token spans when available. | +| shadowed-variable | correctness | warn | No | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body. | +| recursive-negation | correctness | error | No | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs. | +| inefficient-join-order | performance | hint | No | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first. | +| superfluous-group-by | performance | warn | Yes | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed. | +| consistent-casing | style | allow | Yes | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case). | +| no-magic-numbers | style | allow | No | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer. | This table serves as a concrete work breakdown for the engineering team and clearly communicates the linter's initial capabilities and priorities to early diff --git a/src/linter/rules/correctness/unused_variable.rs b/src/linter/rules/correctness/unused_variable.rs index 1b51dd05..8cc138ee 100644 --- a/src/linter/rules/correctness/unused_variable.rs +++ b/src/linter/rules/correctness/unused_variable.rs @@ -3,8 +3,8 @@ //! The rule operates over semantic `RuleBinding` symbols rather than raw CST //! name matching so shadowing, unresolved names, and wildcard ignores follow //! the semantic model's existing contracts. Diagnostics currently use the -//! binding symbol's stored span, which points at the enclosing rule or literal -//! rather than the exact identifier token. +//! binding symbol's precise identifier span when semantic collection captured +//! one, falling back to the existing coarse provenance otherwise. use crate::linter::{LintDiagnostic, Rule}; use crate::{SyntaxKind, declare_lint}; @@ -30,7 +30,8 @@ declare_lint! { continue; } - let Some(range) = crate::linter::span_utils::span_to_text_range(symbol.span()) else { + let span = symbol.name_span().unwrap_or_else(|| symbol.span()); + let Some(range) = crate::linter::span_utils::span_to_text_range(span) else { continue; }; @@ -80,6 +81,10 @@ mod tests { .map(crate::linter::LintDiagnostic::message), Some("variable `head_x` is defined but never used in this rule") ); + assert_eq!( + diagnostics.first().map(crate::linter::LintDiagnostic::span), + Some(rowan::TextRange::new(7.into(), 13.into())) + ); } #[rstest] diff --git a/src/parser/ast/function.rs b/src/parser/ast/function.rs index 6b0bec97..2953cb7b 100644 --- a/src/parser/ast/function.rs +++ b/src/parser/ast/function.rs @@ -29,6 +29,13 @@ impl Function { }) } + /// Precise source span for the function name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Returns `true` if declared with the `extern` keyword. #[must_use] pub fn is_extern(&self) -> bool { diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index 85737e8f..bf0de449 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -9,10 +9,11 @@ //! tests and fixtures to assemble match expressions without dipping into //! private modules. -use rowan::SyntaxElement; +use rowan::{NodeOrToken, SyntaxElement, SyntaxNode}; use self::parse_utils::is_trivia; -use crate::{DdlogLanguage, SyntaxKind}; +use crate::parser::ast::rule::text_range_to_span; +use crate::{DdlogLanguage, Span, SyntaxKind}; /// Common interface for AST wrappers. pub(crate) trait AstNode { @@ -20,6 +21,28 @@ pub(crate) trait AstNode { fn syntax(&self) -> &rowan::SyntaxNode; } +/// Find the first identifier token with matching text within a syntax subtree. +#[must_use] +pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &str) -> Option { + for child in syntax.children_with_tokens() { + match child { + NodeOrToken::Token(token) + if token.kind() == SyntaxKind::T_IDENT && token.text() == name => + { + return Some(text_range_to_span(token.text_range())); + } + NodeOrToken::Node(node) => { + if let Some(span) = find_identifier_span(&node, name) { + return Some(span); + } + } + NodeOrToken::Token(_) => {} + } + } + + None +} + macro_rules! impl_ast_node { ($ty:ty) => { impl AstNode for $ty { diff --git a/src/parser/ast/relation.rs b/src/parser/ast/relation.rs index ccd19f04..bd78bf88 100644 --- a/src/parser/ast/relation.rs +++ b/src/parser/ast/relation.rs @@ -58,6 +58,13 @@ impl Relation { }) } + /// Precise source span for the relation name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Returns `true` if declared with the `input` keyword. #[must_use] pub fn is_input(&self) -> bool { diff --git a/src/parser/ast/type_def.rs b/src/parser/ast/type_def.rs index 2541850f..30023d35 100644 --- a/src/parser/ast/type_def.rs +++ b/src/parser/ast/type_def.rs @@ -25,6 +25,13 @@ impl TypeDef { super::take_first_ident(iter) } + /// Precise source span for the type name token, if present. + #[must_use] + pub fn name_span(&self) -> Option { + self.name() + .and_then(|name| super::find_identifier_span(&self.syntax, &name)) + } + /// Whether this declaration is `extern`. #[must_use] pub fn is_extern(&self) -> bool { diff --git a/src/sema/builder.rs b/src/sema/builder.rs index 15aaa6fa..c7261d8c 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -7,6 +7,7 @@ use rowan::SyntaxNode; use crate::Span; use crate::SyntaxKind; use crate::parser::ast; +use crate::parser::ast::AstNode; use crate::parser::ast::SemanticRule; use crate::parser::ast::rule::text_range_to_span; use crate::sema::model::{ @@ -14,7 +15,7 @@ use crate::sema::model::{ SymbolId, SymbolOrigin, UseKind, }; -use super::resolve::collect_pattern_binding_names; +use super::resolve::{collect_head_binding_names, collect_pattern_binding_names}; pub(crate) struct SymbolSpec { pub(crate) name: String, @@ -22,6 +23,7 @@ pub(crate) struct SymbolSpec { pub(crate) origin: SymbolOrigin, pub(crate) scope: ScopeId, pub(crate) span: Span, + pub(crate) name_span: Option, pub(crate) source_order: usize, pub(crate) visible_from_rule_order: usize, } @@ -126,6 +128,7 @@ impl SemanticModelBuilder { let mut order = 0usize; let mut declare_from_child = |name: Option, + name_span: Option, kind: DeclarationKind, origin: SymbolOrigin, child: &SyntaxNode| { @@ -136,6 +139,7 @@ impl SemanticModelBuilder { origin, scope: self.program_scope, span: text_range_to_span(child.text_range()), + name_span, source_order: order, visible_from_rule_order: 0, }); @@ -146,28 +150,52 @@ impl SemanticModelBuilder { for child in root.syntax().children() { match child.kind() { SyntaxKind::N_RELATION_DECL => declare_from_child( - ast::Relation { - syntax: child.clone(), - } - .name(), + { + let relation = ast::Relation { + syntax: child.clone(), + }; + relation.name() + }, + { + let relation = ast::Relation { + syntax: child.clone(), + }; + relation.name_span() + }, DeclarationKind::Relation, SymbolOrigin::RelationDeclaration, &child, ), SyntaxKind::N_FUNCTION => declare_from_child( - ast::Function { - syntax: child.clone(), - } - .name(), + { + let function = ast::Function { + syntax: child.clone(), + }; + function.name() + }, + { + let function = ast::Function { + syntax: child.clone(), + }; + function.name_span() + }, DeclarationKind::Function, SymbolOrigin::FunctionDeclaration, &child, ), SyntaxKind::N_TYPE_DEF => declare_from_child( - ast::TypeDef { - syntax: child.clone(), - } - .name(), + { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + type_def.name() + }, + { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + type_def.name_span() + }, DeclarationKind::Type, SymbolOrigin::TypeDeclaration, &child, @@ -197,17 +225,10 @@ impl SemanticModelBuilder { ); if let Ok(terms) = rule.body_terms() { - let spans: Vec = rule - .body_expression_nodes() - .into_iter() - .map(|node| node.span()) - .collect(); + let body_nodes = rule.body_expression_nodes(); - for (literal_index, term) in terms.into_iter().enumerate() { - let span = spans - .get(literal_index) - .cloned() - .unwrap_or_else(|| rule_span.clone()); + for (literal_index, (node, term)) in body_nodes.into_iter().zip(terms).enumerate() { + let span = node.span(); self.collect_rule_term( super::variables::VariableUseContext::new( rule_scope, @@ -215,6 +236,7 @@ impl SemanticModelBuilder { &span, literal_index, ), + Some(node.syntax()), &term, ); } @@ -243,8 +265,22 @@ impl SemanticModelBuilder { origin: SymbolOrigin::SemanticRuleHead, }, ); + for binding_name in collect_head_binding_names(rule.head()) { + self.declare_symbol(SymbolSpec { + name: binding_name, + kind: DeclarationKind::RuleBinding, + origin: SymbolOrigin::SemanticRuleHead, + scope: rule_scope, + span: rule_span.clone(), + name_span: None, + source_order: self.symbols.len(), + visible_from_rule_order: 0, + }); + } for pattern in rule.patterns() { + // Parse-time semantic rules do not currently preserve CST token + // handles, so precise identifier spans are unavailable here. for binding_name in collect_pattern_binding_names(pattern) { self.declare_symbol(SymbolSpec { name: binding_name, @@ -252,6 +288,7 @@ impl SemanticModelBuilder { origin: SymbolOrigin::ForPattern, scope: rule_scope, span: rule_span.clone(), + name_span: None, source_order: self.symbols.len(), visible_from_rule_order: 0, }); @@ -297,6 +334,7 @@ impl SemanticModelBuilder { origin: spec.origin, scope: spec.scope, span: spec.span, + name_span: spec.name_span, source_order: spec.source_order, visible_from_rule_order: spec.visible_from_rule_order, }); diff --git a/src/sema/model.rs b/src/sema/model.rs index 015a56a6..735cea96 100644 --- a/src/sema/model.rs +++ b/src/sema/model.rs @@ -175,6 +175,7 @@ pub struct Symbol { pub(crate) origin: SymbolOrigin, pub(crate) scope: ScopeId, pub(crate) span: Span, + pub(crate) name_span: Option, pub(crate) source_order: usize, pub(crate) visible_from_rule_order: usize, } @@ -210,6 +211,12 @@ impl Symbol { &self.span } + /// Precise source span for the identifier token, when captured. + #[must_use] + pub fn name_span(&self) -> Option<&Span> { + self.name_span.as_ref() + } + /// Stable source-order index within the owning scope. #[must_use] pub fn source_order(&self) -> usize { diff --git a/src/sema/tests.rs b/src/sema/tests.rs index 42409629..f8c307eb 100644 --- a/src/sema/tests.rs +++ b/src/sema/tests.rs @@ -388,4 +388,5 @@ fn relation_read_helpers_ignore_head_only_and_unresolved_uses( ); } +mod name_span; mod unused_variable; diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs new file mode 100644 index 00000000..dfed6154 --- /dev/null +++ b/src/sema/tests/name_span.rs @@ -0,0 +1,134 @@ +//! Semantic-model tests for precise identifier spans. + +use rstest::rstest; + +use super::{DeclarationKind, parse_ok, symbols_named}; +use crate::sema::SymbolOrigin; + +fn span_text<'a>(source: &'a str, span: &crate::Span) -> &'a str { + source.get(span.start..span.end).unwrap_or_else(|| { + panic!( + "invalid UTF-8 boundary for span {}..{} in `{source}`", + span.start, span.end + ) + }) +} + +fn find_symbol<'a>( + model: &'a super::super::SemanticModel, + name: &str, + kind: DeclarationKind, + origin: SymbolOrigin, +) -> &'a super::super::Symbol { + symbols_named(model, name, kind) + .into_iter() + .find(|symbol| symbol.origin() == origin) + .unwrap_or_else(|| panic!("missing symbol `{name}` with origin {origin:?}")) +} + +#[rstest] +#[case( + "input relation Source(x: u32)", + "Source", + DeclarationKind::Relation, + SymbolOrigin::RelationDeclaration, + "Source" +)] +#[case( + "function project(id: u32): u32 {}", + "project", + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + "project" +)] +#[case( + "typedef UserId = u64", + "UserId", + DeclarationKind::Type, + SymbolOrigin::TypeDeclaration, + "UserId" +)] +fn top_level_declarations_capture_identifier_name_spans( + #[case] source: &str, + #[case] symbol_name: &str, + #[case] kind: DeclarationKind, + #[case] origin: SymbolOrigin, + #[case] expected_text: &str, +) { + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol(&semantic_model, symbol_name, kind, origin); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing name_span for `{symbol_name}`")); + + assert_eq!(span_text(source, name_span), expected_text); + assert_ne!(name_span, symbol.span()); +} + +#[rstest] +fn rule_head_bindings_capture_identifier_name_spans() { + let source = "Output(head_x) :- Source(_)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "head_x", + DeclarationKind::RuleBinding, + SymbolOrigin::RuleHead, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing head binding name_span")); + + assert_eq!(span_text(source, name_span), "head_x"); + assert_eq!(span_text(source, symbol.span()), source); +} + +#[rstest] +fn assignment_bindings_capture_identifier_name_spans() { + let source = "Output(result) :- Source(result), var assigned_x = Seed(result)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "assigned_x", + DeclarationKind::RuleBinding, + SymbolOrigin::AssignmentPattern, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing assignment binding name_span")); + + assert_eq!(span_text(source, name_span), "assigned_x"); + assert_eq!( + span_text(source, symbol.span()), + "var assigned_x = Seed(result)" + ); +} + +#[rstest] +fn for_loop_bindings_capture_identifier_name_spans() { + let source = "Output(result) :- Source(result), for (item_y in Items(result)) Inner(result)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "item_y", + DeclarationKind::RuleBinding, + SymbolOrigin::ForPattern, + ); + + let name_span = symbol + .name_span() + .unwrap_or_else(|| panic!("missing for-loop binding name_span")); + + assert_eq!(span_text(source, name_span), "item_y"); + assert_eq!( + span_text(source, symbol.span()), + "for (item_y in Items(result)) Inner(result)" + ); +} diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index dc93dbce..969330dc 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -1,7 +1,10 @@ //! Expression and rule-body traversal for semantic-model collection. +use rowan::SyntaxNode; + use crate::Span; use crate::parser::ast; +use crate::parser::ast::{AstNode, find_identifier_span}; use crate::parser::ast::{Expr, RuleBodyTerm}; use crate::sema::model::{ DeclarationKind, ScopeId, ScopeKind, ScopeOrigin, SymbolOrigin, UseKind, UseOrigin, UseSite, @@ -23,6 +26,18 @@ impl SemanticModelBuilder { if let Ok(heads) = rule.heads() { for head in heads { self.collect_head_expr(&head.atom, ctx); + for binding_name in collect_head_binding_names(&head.atom) { + self.declare_symbol(SymbolSpec { + name: binding_name.clone(), + kind: DeclarationKind::RuleBinding, + origin: ctx.origin, + scope: ctx.scope, + span: ctx.span.clone(), + name_span: find_identifier_span(rule.syntax(), &binding_name), + source_order: self.symbols.len(), + visible_from_rule_order: 0, + }); + } if let Some(location) = head.location.as_ref() { self.walk_variable_uses( location, @@ -39,31 +54,25 @@ impl SemanticModelBuilder { UseOrigin::RelationHead, expr, ); - for binding_name in collect_head_binding_names(expr) { - self.declare_symbol(SymbolSpec { - name: binding_name, - kind: DeclarationKind::RuleBinding, - origin: ctx.origin, - scope: ctx.scope, - span: ctx.span.clone(), - source_order: self.symbols.len(), - visible_from_rule_order: 0, - }); - } } pub(crate) fn collect_rule_term( &mut self, context: VariableUseContext<'_>, + syntax: Option<&SyntaxNode>, term: &RuleBodyTerm, ) { match term { RuleBodyTerm::Expression(expr) => self.collect_expression_term(expr, context), - RuleBodyTerm::Assignment(assign) => self.collect_assignment_term(assign, context), + RuleBodyTerm::Assignment(assign) => { + self.collect_assignment_term(assign, syntax, context); + } RuleBodyTerm::Aggregation(aggregation) => { self.collect_aggregation_term(aggregation, context); } - RuleBodyTerm::ForLoop(for_loop) => self.collect_for_loop_term(for_loop, context), + RuleBodyTerm::ForLoop(for_loop) => { + self.collect_for_loop_term(for_loop, syntax, context); + } } } @@ -75,16 +84,18 @@ impl SemanticModelBuilder { fn collect_assignment_term( &mut self, assign: &ast::RuleAssignment, + syntax: Option<&SyntaxNode>, context: VariableUseContext<'_>, ) { self.walk_variable_uses(&assign.value, context); for binding_name in collect_pattern_binding_names(&assign.pattern) { self.declare_symbol(SymbolSpec { - name: binding_name, + name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: SymbolOrigin::AssignmentPattern, scope: context.current_scope(), span: context.span().clone(), + name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), source_order: self.symbols.len(), visible_from_rule_order: context.literal_index() + 1, }); @@ -103,6 +114,7 @@ impl SemanticModelBuilder { fn collect_for_loop_term( &mut self, for_loop: &ast::RuleForLoop, + syntax: Option<&SyntaxNode>, context: VariableUseContext<'_>, ) { self.record_top_level_relation_use(context, UseOrigin::ForIterable, &for_loop.iterable); @@ -122,11 +134,12 @@ impl SemanticModelBuilder { }); for binding_name in collect_pattern_binding_names(&for_loop.pattern) { self.declare_symbol(SymbolSpec { - name: binding_name, + name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: SymbolOrigin::ForPattern, scope: child_scope, span: context.span().clone(), + name_span: syntax.and_then(|syntax| find_identifier_span(syntax, &binding_name)), source_order: self.symbols.len(), visible_from_rule_order: 0, }); @@ -140,6 +153,7 @@ impl SemanticModelBuilder { context.span(), context.rule_order_limit(), ), + None, nested_term, ); } From 7377cdc9519ceda032b5a470bf73374e11e488c7 Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 20:03:08 +0000 Subject: [PATCH 2/5] refactor(sema): extract program declaration spec generation to helper function Refactor collect_program_declarations by removing inlined closure and pattern matching, replacing it with a new helper method program_declaration_spec. This improves code readability and modularity without changing behavior. Co-authored-by: devboxerhub[bot] --- src/sema/builder.rs | 131 +++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 74 deletions(-) diff --git a/src/sema/builder.rs b/src/sema/builder.rs index c7261d8c..e51eb5b5 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -126,81 +126,10 @@ impl SemanticModelBuilder { pub(crate) fn collect_program_declarations(&mut self, root: &ast::Root) { let mut order = 0usize; - let mut declare_from_child = - |name: Option, - name_span: Option, - kind: DeclarationKind, - origin: SymbolOrigin, - child: &SyntaxNode| { - if let Some(name) = name { - self.declare_symbol(SymbolSpec { - name, - kind, - origin, - scope: self.program_scope, - span: text_range_to_span(child.text_range()), - name_span, - source_order: order, - visible_from_rule_order: 0, - }); - order += 1; - } - }; - for child in root.syntax().children() { - match child.kind() { - SyntaxKind::N_RELATION_DECL => declare_from_child( - { - let relation = ast::Relation { - syntax: child.clone(), - }; - relation.name() - }, - { - let relation = ast::Relation { - syntax: child.clone(), - }; - relation.name_span() - }, - DeclarationKind::Relation, - SymbolOrigin::RelationDeclaration, - &child, - ), - SyntaxKind::N_FUNCTION => declare_from_child( - { - let function = ast::Function { - syntax: child.clone(), - }; - function.name() - }, - { - let function = ast::Function { - syntax: child.clone(), - }; - function.name_span() - }, - DeclarationKind::Function, - SymbolOrigin::FunctionDeclaration, - &child, - ), - SyntaxKind::N_TYPE_DEF => declare_from_child( - { - let type_def = ast::TypeDef { - syntax: child.clone(), - }; - type_def.name() - }, - { - let type_def = ast::TypeDef { - syntax: child.clone(), - }; - type_def.name_span() - }, - DeclarationKind::Type, - SymbolOrigin::TypeDeclaration, - &child, - ), - _ => {} + if let Some(spec) = self.program_declaration_spec(&child, order) { + self.declare_symbol(spec); + order += 1; } } } @@ -320,6 +249,60 @@ impl SemanticModelBuilder { scope_id } + fn program_declaration_spec( + &self, + child: &SyntaxNode, + source_order: usize, + ) -> Option { + let (name, name_span, kind, origin) = match child.kind() { + SyntaxKind::N_RELATION_DECL => { + let relation = ast::Relation { + syntax: child.clone(), + }; + ( + relation.name(), + relation.name_span(), + DeclarationKind::Relation, + SymbolOrigin::RelationDeclaration, + ) + } + SyntaxKind::N_FUNCTION => { + let function = ast::Function { + syntax: child.clone(), + }; + ( + function.name(), + function.name_span(), + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + ) + } + SyntaxKind::N_TYPE_DEF => { + let type_def = ast::TypeDef { + syntax: child.clone(), + }; + ( + type_def.name(), + type_def.name_span(), + DeclarationKind::Type, + SymbolOrigin::TypeDeclaration, + ) + } + _ => return None, + }; + + name.map(|name| SymbolSpec { + name, + kind, + origin, + scope: self.program_scope, + span: text_range_to_span(child.text_range()), + name_span, + source_order, + visible_from_rule_order: 0, + }) + } + pub(crate) fn declare_symbol(&mut self, spec: SymbolSpec) -> SymbolId { let symbol_id = SymbolId(self.symbols.len()); self.symbols_by_scope_and_name From fe9aff310ac4721689f5ac771f29c32b2e20faec Mon Sep 17 00:00:00 2001 From: Leynos Date: Wed, 8 Apr 2026 23:05:00 +0000 Subject: [PATCH 3/5] feat(parser, sema): add precise binding spans for rule heads to semantic model This change augments the RuleHead AST with span information for the head atom and its bindings. It introduces functionality in the parser to collect and store identifier-token spans for bindings introduced by the head atom. The semantic model builder now uses these spans to accurately track declaration name spans for rule head bindings. Enhancements include: - New functions to find identifier spans within given spans for precise range matching. - Collection of binding spans in parsed rule heads excluding non-bindings like those following dots or certain punctuations. - Usage of binding spans in semantic symbol declarations for improved source mapping. - Additional tests to verify correct capturing and handling of name spans for rule head bindings. This feature improves diagnostic accuracy and tooling support by providing detailed source location information for rule head bindings. Co-authored-by: devboxerhub[bot] --- src/parser/ast/mod.rs | 28 ++++++++++++++++-- src/parser/ast/rule_head.rs | 57 ++++++++++++++++++++++++++++++++++++- src/sema/builder.rs | 14 ++++++--- src/sema/tests/name_span.rs | 30 +++++++++++++++++-- src/sema/traverse.rs | 9 +++--- 5 files changed, 123 insertions(+), 15 deletions(-) diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index bf0de449..d10ef002 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -24,15 +24,31 @@ pub(crate) trait AstNode { /// Find the first identifier token with matching text within a syntax subtree. #[must_use] pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &str) -> Option { + find_identifier_span_in_range(syntax, &text_range_to_span(syntax.text_range()), name) +} + +/// Find the first identifier token with matching text within a span-bounded subtree search. +#[must_use] +pub(crate) fn find_identifier_span_in_range( + syntax: &SyntaxNode, + search_span: &Span, + name: &str, +) -> Option { for child in syntax.children_with_tokens() { match child { NodeOrToken::Token(token) if token.kind() == SyntaxKind::T_IDENT && token.text() == name => { - return Some(text_range_to_span(token.text_range())); + let token_span = text_range_to_span(token.text_range()); + if span_contains(search_span, &token_span) { + return Some(token_span); + } } NodeOrToken::Node(node) => { - if let Some(span) = find_identifier_span(&node, name) { + if !ranges_overlap(search_span, &text_range_to_span(node.text_range())) { + continue; + } + if let Some(span) = find_identifier_span_in_range(&node, search_span, name) { return Some(span); } } @@ -43,6 +59,14 @@ pub(crate) fn find_identifier_span(syntax: &SyntaxNode, name: &st None } +fn ranges_overlap(left: &Span, right: &Span) -> bool { + left.start < right.end && right.start < left.end +} + +fn span_contains(container: &Span, candidate: &Span) -> bool { + container.start <= candidate.start && candidate.end <= container.end +} + macro_rules! impl_ast_node { ($ty:ty) => { impl AstNode for $ty { diff --git a/src/parser/ast/rule_head.rs b/src/parser/ast/rule_head.rs index 019b93e0..b9aa1d15 100644 --- a/src/parser/ast/rule_head.rs +++ b/src/parser/ast/rule_head.rs @@ -88,6 +88,10 @@ pub struct RuleHead { pub atom: Expr, /// Optional `@` location expression. pub location: Option, + /// Source span covering only the head atom. + pub span: Span, + /// Precise identifier-token spans for bindings introduced by the head atom. + pub binding_spans: Vec<(String, Span)>, } pub(crate) fn parse_rule_heads( @@ -162,16 +166,67 @@ fn parse_rule_head_span( segment_offset.saturating_add(location_start), )?); - return Ok(Some(RuleHead { atom, location })); + let binding_spans = collect_head_binding_spans(atom_src, segment_offset); + + return Ok(Some(RuleHead { + atom, + location, + span: segment_offset..segment_offset.saturating_add(at_span.start), + binding_spans, + })); } + let binding_spans = collect_head_binding_spans(trimmed, segment_offset); let atom = lower_by_ref_head(parse_expr_with_offset(trimmed, segment_offset)?); Ok(Some(RuleHead { atom, location: None, + span: segment_offset..segment_offset.saturating_add(trimmed.len()), + binding_spans, })) } +fn collect_head_binding_spans(src: &str, base_offset: usize) -> Vec<(String, Span)> { + let tokens = tokenize_without_trivia(src); + let mut bindings = Vec::new(); + + for (index, (kind, span)) in tokens.iter().enumerate() { + if *kind != SyntaxKind::T_IDENT { + continue; + } + + let text = src.get(span.clone()).unwrap_or(""); + if should_skip_binding_ident(&tokens, index) || text == "_" { + continue; + } + + if bindings.iter().any(|(existing, _)| existing == text) { + continue; + } + + bindings.push(( + text.to_string(), + base_offset.saturating_add(span.start)..base_offset.saturating_add(span.end), + )); + } + + bindings +} + +fn should_skip_binding_ident(tokens: &[(SyntaxKind, Span)], index: usize) -> bool { + let prev_kind = index + .checked_sub(1) + .and_then(|prev| tokens.get(prev)) + .map(|(kind, _)| *kind); + let next_kind = tokens.get(index + 1).map(|(kind, _)| *kind); + + prev_kind == Some(SyntaxKind::T_DOT) + || matches!( + next_kind, + Some(SyntaxKind::T_LPAREN | SyntaxKind::T_LBRACE | SyntaxKind::T_COLON) + ) +} + fn parse_expr_with_offset(src: &str, base_offset: usize) -> Result>> { let (start, end) = trim_byte_range(src); let trimmed = src.get(start..end).unwrap_or(""); diff --git a/src/sema/builder.rs b/src/sema/builder.rs index e51eb5b5..555dd2e8 100644 --- a/src/sema/builder.rs +++ b/src/sema/builder.rs @@ -7,7 +7,6 @@ use rowan::SyntaxNode; use crate::Span; use crate::SyntaxKind; use crate::parser::ast; -use crate::parser::ast::AstNode; use crate::parser::ast::SemanticRule; use crate::parser::ast::rule::text_range_to_span; use crate::sema::model::{ @@ -155,9 +154,16 @@ impl SemanticModelBuilder { if let Ok(terms) = rule.body_terms() { let body_nodes = rule.body_expression_nodes(); + debug_assert_eq!( + body_nodes.len(), + terms.len(), + "rule body node/term count mismatch should stay visible during semantic collection" + ); - for (literal_index, (node, term)) in body_nodes.into_iter().zip(terms).enumerate() { - let span = node.span(); + for (literal_index, term) in terms.into_iter().enumerate() { + let maybe_node = body_nodes.get(literal_index); + let span = + maybe_node.map_or_else(|| rule_span.clone(), ast::RuleBodyExpression::span); self.collect_rule_term( super::variables::VariableUseContext::new( rule_scope, @@ -165,7 +171,7 @@ impl SemanticModelBuilder { &span, literal_index, ), - Some(node.syntax()), + maybe_node.map(ast::AstNode::syntax), &term, ); } diff --git a/src/sema/tests/name_span.rs b/src/sema/tests/name_span.rs index dfed6154..1a743bd2 100644 --- a/src/sema/tests/name_span.rs +++ b/src/sema/tests/name_span.rs @@ -41,6 +41,13 @@ fn find_symbol<'a>( SymbolOrigin::FunctionDeclaration, "project" )] +#[case( + "function project(project: u32): u32 { project }", + "project", + DeclarationKind::Function, + SymbolOrigin::FunctionDeclaration, + "project" +)] #[case( "typedef UserId = u64", "UserId", @@ -67,14 +74,30 @@ fn top_level_declarations_capture_identifier_name_spans( assert_ne!(name_span, symbol.span()); } +#[rstest] +fn semantic_rule_head_bindings_leave_name_span_unavailable() { + let source = "for (item_y in Source(item_y)) Output(item_y)."; + let parsed = parse_ok(source); + let semantic_model = super::super::build(&parsed); + let symbol = find_symbol( + &semantic_model, + "item_y", + DeclarationKind::RuleBinding, + SymbolOrigin::SemanticRuleHead, + ); + + assert!(symbol.name_span().is_none()); + assert_eq!(span_text(source, symbol.span()), source); +} + #[rstest] fn rule_head_bindings_capture_identifier_name_spans() { - let source = "Output(head_x) :- Source(_)."; + let source = "Output(Output) :- Output(_)."; let parsed = parse_ok(source); let semantic_model = super::super::build(&parsed); let symbol = find_symbol( &semantic_model, - "head_x", + "Output", DeclarationKind::RuleBinding, SymbolOrigin::RuleHead, ); @@ -83,7 +106,8 @@ fn rule_head_bindings_capture_identifier_name_spans() { .name_span() .unwrap_or_else(|| panic!("missing head binding name_span")); - assert_eq!(span_text(source, name_span), "head_x"); + assert_eq!(span_text(source, name_span), "Output"); + assert_eq!(name_span.start, 7); assert_eq!(span_text(source, symbol.span()), source); } diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 969330dc..16c064c4 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -4,14 +4,13 @@ use rowan::SyntaxNode; use crate::Span; use crate::parser::ast; -use crate::parser::ast::{AstNode, find_identifier_span}; -use crate::parser::ast::{Expr, RuleBodyTerm}; +use crate::parser::ast::{Expr, RuleBodyTerm, find_identifier_span}; use crate::sema::model::{ DeclarationKind, ScopeId, ScopeKind, ScopeOrigin, SymbolOrigin, UseKind, UseOrigin, UseSite, }; use super::builder::{ScopeSpec, SemanticModelBuilder, SymbolSpec}; -use super::resolve::{collect_head_binding_names, collect_pattern_binding_names, relation_name}; +use super::resolve::{collect_pattern_binding_names, relation_name}; use super::variables::VariableUseContext; #[derive(Clone, Copy)] @@ -26,14 +25,14 @@ impl SemanticModelBuilder { if let Ok(heads) = rule.heads() { for head in heads { self.collect_head_expr(&head.atom, ctx); - for binding_name in collect_head_binding_names(&head.atom) { + for (binding_name, name_span) in &head.binding_spans { self.declare_symbol(SymbolSpec { name: binding_name.clone(), kind: DeclarationKind::RuleBinding, origin: ctx.origin, scope: ctx.scope, span: ctx.span.clone(), - name_span: find_identifier_span(rule.syntax(), &binding_name), + name_span: Some(name_span.clone()), source_order: self.symbols.len(), visible_from_rule_order: 0, }); From 590b15d1df1300d2c59051ab09022ad43a057372 Mon Sep 17 00:00:00 2001 From: Leynos Date: Thu, 9 Apr 2026 11:54:52 +0000 Subject: [PATCH 4/5] fix(sema): pass None for CST handles in nested for loop terms Nested bodies recurse through `collect_rule_term`, but the `VariableUseContext::new(...)` path no longer retains CST handles for those inner terms. Passing `None` deliberately prevents nested bindings from claiming precise `name_span`s we cannot source faithfully. Co-authored-by: devboxerhub[bot] --- src/sema/traverse.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sema/traverse.rs b/src/sema/traverse.rs index 16c064c4..0bcab243 100644 --- a/src/sema/traverse.rs +++ b/src/sema/traverse.rs @@ -145,6 +145,11 @@ impl SemanticModelBuilder { } for (term_offset, nested_term) in for_loop.body_terms.iter().enumerate() { + // Nested bodies recurse through `collect_rule_term`, but the + // `VariableUseContext::new(...)` path no longer retains CST handles + // for those inner terms. We therefore pass `None` here + // deliberately so nested bindings do not claim precise + // `name_span`s we cannot source faithfully. self.collect_rule_term( VariableUseContext::new( child_scope, From 533a73045d39029233bf7cb30f4bc62d8e409563 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 11:57:41 +0000 Subject: [PATCH 5/5] =?UTF-8?q?=F0=9F=93=9D=20CodeRabbit=20Chat:=20Impleme?= =?UTF-8?q?nt=20requested=20code=20changes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/ast/mod.rs | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs index d10ef002..cd24817c 100644 --- a/src/parser/ast/mod.rs +++ b/src/parser/ast/mod.rs @@ -34,29 +34,20 @@ pub(crate) fn find_identifier_span_in_range( search_span: &Span, name: &str, ) -> Option { - for child in syntax.children_with_tokens() { - match child { - NodeOrToken::Token(token) - if token.kind() == SyntaxKind::T_IDENT && token.text() == name => - { - let token_span = text_range_to_span(token.text_range()); - if span_contains(search_span, &token_span) { - return Some(token_span); - } - } - NodeOrToken::Node(node) => { - if !ranges_overlap(search_span, &text_range_to_span(node.text_range())) { - continue; - } - if let Some(span) = find_identifier_span_in_range(&node, search_span, name) { - return Some(span); - } - } - NodeOrToken::Token(_) => {} + syntax.children_with_tokens().find_map(|child| match child { + NodeOrToken::Token(token) + if token.kind() == SyntaxKind::T_IDENT && token.text() == name => + { + let token_span = text_range_to_span(token.text_range()); + span_contains(search_span, &token_span).then_some(token_span) } - } - - None + NodeOrToken::Node(node) => { + ranges_overlap(search_span, &text_range_to_span(node.text_range())) + .then(|| find_identifier_span_in_range(&node, search_span, name)) + .flatten() + } + NodeOrToken::Token(_) => None, + }) } fn ranges_overlap(left: &Span, right: &Span) -> bool { @@ -238,4 +229,4 @@ mod tests { let parsed = parse(src); assert_eq!(parsed.root().imports().len(), 1); } -} +} \ No newline at end of file