leynos · leynos · Apr 11, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/docs/ddlint-design.md b/docs/ddlint-design.md
@@ -447,7 +447,9 @@ The core contracts are:
   semantic failures without leaking internal implementation types.
 - Provenance: semantic nodes must retain source mappings back to the lossless
   syntax layer for rule heads, body terms, aggregations, attributes, and
-  adornments, so diagnostics and later rewrites remain explainable.
+  adornments, so diagnostics and later rewrites remain explainable. For symbol
+  records specifically, coarse declaration or literal provenance must stay
+  distinct from any precise identifier-token span used for diagnostics.
 - Semantic model completeness: `ddlog-sema` must expose typed structures for
   constructs that downstream planners care about, including joins, filters,
   projections, aggregations, recursion boundaries, and semantic facts such as
@@ -734,14 +736,24 @@ genuine reads.
 `for`-loop pattern, and it treats a binding as used only when semantic
 resolution maps a `UseKind::Variable` site back to that exact symbol. The
 wildcard `_` remains an explicit ignore and is never recorded as a
-warning-eligible binding. As of roadmap item `4.1.2`, rule-local binding spans
-still point at the enclosing rule or literal site rather than the exact
-identifier token, so diagnostics for these rules use that existing coarse
-provenance. The semantic model deliberately stays on the `Span` side of that
-boundary; conversion into diagnostic `rowan::TextRange` values happens in the
-linter layer through one shared boundary helper in `src/linter/span_utils.rs`.
-This keeps a single conversion point, rather than per-rule glue, for
-correctness diagnostics.
+warning-eligible binding.
+
+Each `Symbol` now carries two related provenance fields:
+
+- `span` remains the existing coarse provenance used for debugging, maps, and
+  ownership; it points at the enclosing declaration, rule, or body term that
+  introduced the symbol.
+- `name_span` is an additive optional field that points at the precise
+  identifier token when semantic collection had CST access to capture it.
+
+Rules such as `unused-variable` should prefer `name_span` for diagnostic
+highlighting and fall back to `span` only when precise token provenance is not
+available, such as parse-time `SemanticRule` values that retain only lowered
+expressions plus a coarse source span. The semantic model deliberately stays on
+the `Span` side of that boundary; conversion into diagnostic `rowan::TextRange`
+values happens in the linter layer through one shared boundary helper in
+`src/linter/span_utils.rs`. This keeps a single conversion point, rather than
+per-rule glue, for correctness diagnostics.
 
 Resolution is deliberately tri-state:
 
@@ -763,8 +775,9 @@ body scope and do not escape to later top-level literals.
 
 This pass consumes both AST-backed rules and parse-time `SemanticRule` values
 from top-level `for` desugaring, so the semantic view matches the parser's
-existing rule surface. Provenance is retained through source spans on
-declarations and on the enclosing literal or rule site for rule-local facts.
+existing rule surface. Provenance is retained through coarse source spans on
+declarations and on the enclosing literal or rule site for rule-local facts,
+with precise identifier-token spans captured additively when CST access exists.
 
 ### 3.3. Initial lint rule catalog
 
@@ -775,16 +788,16 @@ suggestions, establishing a solid foundation of essential lints.
 
 Table: DDLint rule catalogue and metadata.
 
-| Rule Name              | Group       | Default Level | Autofixable | Description                                                                                                                                                                 |
-| ---------------------- | ----------- | ------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| unused-relation        | correctness | warn          | No          | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes.                               |
-| unused-variable        | correctness | warn          | No          | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore. |
-| shadowed-variable      | correctness | warn          | No          | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body.                                                                    |
-| recursive-negation     | correctness | error         | No          | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs.                                                                               |
-| inefficient-join-order | performance | hint          | No          | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first.                                          |
-| superfluous-group-by   | performance | warn          | Yes         | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed.                                                             |
-| consistent-casing      | style       | allow         | Yes         | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case).                                                   |
-| no-magic-numbers       | style       | allow         | No          | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer.                                                                           |
+| Rule Name              | Group       | Default Level | Autofixable | Description                                                                                                                                                                                                                                       |
+| ---------------------- | ----------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| unused-relation        | correctness | warn          | No          | Detects declared relations with no resolved read-like uses in rule bodies, `for` iterables, or `for` guards; rule heads count only as writes.                                                                                                     |
+| unused-variable        | correctness | warn          | No          | Detects rule-local bindings from rule heads, assignment patterns, or `for` patterns that never receive a resolved variable use in the same rule; `_` is an explicit ignore, and diagnostics prefer precise identifier-token spans when available. |
+| shadowed-variable      | correctness | warn          | No          | Detects when a variable binding in a literal shadows one from a preceding literal in the same rule body.                                                                                                                                          |
+| recursive-negation     | correctness | error         | No          | Detects rules with recursion through negation, which leads to unsafe, non-monotonic programs.                                                                                                                                                     |
+| inefficient-join-order | performance | hint          | No          | Evaluates rule bodies and suggests reordering atoms for a more efficient join plan, e.g., placing more restrictive literals first.                                                                                                                |
+| superfluous-group-by   | performance | warn          | Yes         | Detects group_by clauses where the aggregation is trivial (e.g., grouping by all variables) and can be removed.                                                                                                                                   |
+| consistent-casing      | style       | allow         | Yes         | Enforces a consistent casing style for relation and type identifiers (e.g., PascalCase) and variables (e.g., snake_case).                                                                                                                         |
+| no-magic-numbers       | style       | allow         | No          | Flags the use of unnamed numeric literals in rule bodies where a named constant might be clearer.                                                                                                                                                 |
 
 This table serves as a concrete work breakdown for the engineering team and
 clearly communicates the linter's initial capabilities and priorities to early

diff --git a/docs/parser-implementation-notes.md b/docs/parser-implementation-notes.md
@@ -213,6 +213,41 @@ Current guarantees are intentionally narrow:
 - Name resolution records `Resolved`, `Unresolved`, and `Ignored` outcomes
   rather than emitting diagnostics directly.
 
+### Precise identifier span sourcing
+
+Precise diagnostic spans for declarations and rule-local bindings are captured
+once, either during syntax collection or during semantic collection, rather
+than by re-walking the CST in lint rules.
+
+Implementation points:
+
+- `src/parser/ast/mod.rs` exposes `find_identifier_span(...)` and
+  `find_identifier_span_in_range(...)`, which locate the first matching
+  `T_IDENT` token within a syntax subtree or span-bounded search window.
+- `src/parser/ast/function.rs`, `src/parser/ast/relation.rs`, and
+  `src/parser/ast/type_def.rs` use that helper to expose `name_span()` for
+  top-level declaration identifiers.
+- `src/parser/ast/rule_head.rs` computes `RuleHead::binding_spans` while
+  parsing head text. `collect_head_binding_spans(...)` tokenizes the head atom
+  slice, filters out structural identifiers such as relation callees and field
+  labels, and records only declaration-like binding identifiers with absolute
+  source spans.
+
+Semantic collection then threads those precise spans into the owned
+`SemanticModel`:
+
+- `src/sema/builder.rs` records top-level declaration `name_span` values while
+  AST wrappers are already in hand.
+- `src/sema/traverse.rs` consumes `RuleHead::binding_spans` for AST-backed rule
+  heads and threads optional CST `syntax` handles through
+  `collect_rule_term(...)`, `collect_assignment_term(...)`, and
+  `collect_for_loop_term(...)` so assignment and `for` pattern bindings can
+  resolve `SymbolSpec.name_span` during collection.
+- Where CST handles are not preserved faithfully, semantic collection leaves
+  `SymbolSpec.name_span` unset by design. This includes synthetic
+  `SemanticRuleHead` bindings produced by top-level `for` lowering and nested
+  `for` body traversals that recurse with `syntax: None`.
+
 Similarly, collection literal lowering is not part of the base `parse()`
 pipeline:
 

diff --git a/src/linter/rules/correctness/unused_variable.rs b/src/linter/rules/correctness/unused_variable.rs
@@ -3,8 +3,8 @@
 //! The rule operates over semantic `RuleBinding` symbols rather than raw CST
 //! name matching so shadowing, unresolved names, and wildcard ignores follow
 //! the semantic model's existing contracts. Diagnostics currently use the
-//! binding symbol's stored span, which points at the enclosing rule or literal
-//! rather than the exact identifier token.
+//! binding symbol's precise identifier span when semantic collection captured
+//! one, falling back to the existing coarse provenance otherwise.
 
 use crate::linter::{LintDiagnostic, Rule};
 use crate::{SyntaxKind, declare_lint};
@@ -30,7 +30,8 @@ declare_lint! {
                     continue;
                 }
 
-                let Some(range) = crate::linter::span_utils::span_to_text_range(symbol.span()) else {
+                let span = symbol.name_span().unwrap_or_else(|| symbol.span());
+                let Some(range) = crate::linter::span_utils::span_to_text_range(span) else {
                     continue;
                 };
 
@@ -69,6 +70,10 @@ mod tests {
                 .map(crate::linter::LintDiagnostic::message),
             Some("variable `head_x` is defined but never used in this rule")
         );
+        assert_eq!(
+            diagnostics.first().map(crate::linter::LintDiagnostic::span),
+            Some(rowan::TextRange::new(7.into(), 13.into()))
+        );
     }
 
     #[rstest]

diff --git a/src/parser/ast/function.rs b/src/parser/ast/function.rs
@@ -29,6 +29,13 @@ impl Function {
             })
     }
 
+    /// Precise source span for the function name token, if present.
+    #[must_use]
+    pub fn name_span(&self) -> Option<crate::Span> {
+        self.name()
+            .and_then(|name| super::find_identifier_span(&self.syntax, &name))
+    }
+
     /// Returns `true` if declared with the `extern` keyword.
     #[must_use]
     pub fn is_extern(&self) -> bool {
@@ -75,7 +82,8 @@ impl_ast_node!(Function);
 #[cfg(test)]
 mod tests {
 
-    use crate::parse;
+    use crate::{parse, test_util::span_text};
+
     #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")]
     #[test]
     fn function_name() {
@@ -89,4 +97,25 @@ mod tests {
             .expect("function missing");
         assert_eq!(func.name().as_deref(), Some("foo"));
     }
+
+    #[test]
+    fn function_name_span_points_to_declaration_identifier() {
+        let source = "function project(project: u32): u32 { project }";
+        let parsed = parse(source);
+        crate::test_util::assert_no_parse_errors(parsed.errors());
+        #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")]
+        let func = parsed
+            .root()
+            .functions()
+            .first()
+            .cloned()
+            .expect("function missing");
+
+        let span = func
+            .name_span()
+            .unwrap_or_else(|| panic!("missing function name_span in `{source}`"));
+
+        assert_eq!(span_text(source, &span), "project");
+        assert_eq!(span.start, "function ".len());
+    }
 }
diff --git a/src/parser/ast/mod.rs b/src/parser/ast/mod.rs
@@ -9,17 +9,90 @@
 //! tests and fixtures to assemble match expressions without dipping into
 //! private modules.
 
-use rowan::SyntaxElement;
+use rowan::{NodeOrToken, SyntaxElement, SyntaxNode};
 
 use self::parse_utils::is_trivia;
-use crate::{DdlogLanguage, SyntaxKind};
+use crate::parser::ast::rule::text_range_to_span;
+use crate::{DdlogLanguage, Span, SyntaxKind, SyntaxToken};
 
 /// Common interface for AST wrappers.
 pub(crate) trait AstNode {
     /// Access the underlying syntax node.
     fn syntax(&self) -> &rowan::SyntaxNode<DdlogLanguage>;
 }
 
+/// Find the first identifier token with matching text within a syntax subtree.
+#[must_use]
+pub(crate) fn find_identifier_span(syntax: &SyntaxNode<DdlogLanguage>, name: &str) -> Option<Span> {
+    find_identifier_span_in_range(syntax, &text_range_to_span(syntax.text_range()), name)
+}
+
+/// Find the first identifier token with matching text within a span-bounded subtree search.
+#[must_use]
+pub(crate) fn find_identifier_span_in_range(
+    syntax: &SyntaxNode<DdlogLanguage>,
+    search_span: &Span,
+    name: &str,
+) -> Option<Span> {
+    let mut stack = reversed_children(syntax);
+
+    while let Some(element) = stack.pop() {
+        match element {
+            NodeOrToken::Token(token) => {
+                if !token_overlaps_range(&token, search_span) || !is_matching_ident(&token, name) {
+                    continue;
+                }
+
+                let token_span = text_range_to_span(token.text_range());
+                if span_contains(search_span, &token_span) {
+                    return Some(token_span);
+                }
+            }
+            NodeOrToken::Node(node) => {
+                if !node_overlaps_range(&node, search_span) {
+                    continue;
+                }
+
+                stack.extend(reversed_children(&node));
+            }
+        }
+    }
+
+    None
+}
+
+/// Collect child elements in reverse order so a stack preserves source order.
+fn reversed_children(syntax: &SyntaxNode<DdlogLanguage>) -> Vec<SyntaxElement<DdlogLanguage>> {
+    let mut children: Vec<_> = syntax.children_with_tokens().collect();
+    children.reverse();
+    children
+}
+
+/// Whether a token's byte span overlaps the search range at all.
+fn token_overlaps_range(token: &SyntaxToken<DdlogLanguage>, range: &Span) -> bool {
+    ranges_overlap(range, &text_range_to_span(token.text_range()))
+}
+
+/// Whether a node's byte span overlaps the search range at all.
+fn node_overlaps_range(node: &SyntaxNode<DdlogLanguage>, range: &Span) -> bool {
+    ranges_overlap(range, &text_range_to_span(node.text_range()))
+}
+
+/// Whether the token is the identifier text the caller is searching for.
+fn is_matching_ident(token: &SyntaxToken<DdlogLanguage>, name: &str) -> bool {
+    token.kind() == SyntaxKind::T_IDENT && token.text() == name
+}
+
+/// Whether two byte ranges intersect.
+fn ranges_overlap(left: &Span, right: &Span) -> bool {
+    left.start < right.end && right.start < left.end
+}
+
+/// Whether the candidate span sits entirely within the container span.
+fn span_contains(container: &Span, candidate: &Span) -> bool {
+    container.start <= candidate.start && candidate.end <= container.end
+}
+
 macro_rules! impl_ast_node {
     ($ty:ty) => {
         impl AstNode for $ty {
@@ -181,6 +254,7 @@ pub use type_def::TypeDef;
 #[cfg(test)]
 mod tests {
 
+    mod identifier_span;
     mod precedence;
 
     use crate::parse;

diff --git a/src/parser/ast/relation.rs b/src/parser/ast/relation.rs
@@ -58,6 +58,13 @@ impl Relation {
             })
     }
 
+    /// Precise source span for the relation name token, if present.
+    #[must_use]
+    pub fn name_span(&self) -> Option<crate::Span> {
+        self.name()
+            .and_then(|name| super::find_identifier_span(&self.syntax, &name))
+    }
+
     /// Returns `true` if declared with the `input` keyword.
     #[must_use]
     pub fn is_input(&self) -> bool {
@@ -136,7 +143,7 @@ impl_ast_node!(Relation);
 #[cfg(test)]
 mod tests {
 
-    use crate::parse;
+    use crate::{parse, test_util::span_text};
 
     #[test]
     fn relation_name() {
@@ -151,4 +158,25 @@ mod tests {
         assert_eq!(rel.name().as_deref(), Some("R"));
         assert!(rel.is_input());
     }
+
+    #[test]
+    fn relation_name_span_points_to_declaration_identifier() {
+        let source = "input relation Source(Source: u32)";
+        let parsed = parse(source);
+        crate::test_util::assert_no_parse_errors(parsed.errors());
+        #[expect(clippy::expect_used, reason = "Using expect for clearer test failures")]
+        let rel = parsed
+            .root()
+            .relations()
+            .first()
+            .cloned()
+            .expect("relation missing");
+
+        let span = rel
+            .name_span()
+            .unwrap_or_else(|| panic!("missing relation name_span in `{source}`"));
+
+        assert_eq!(span_text(source, &span), "Source");
+        assert_eq!(span.start, "input relation ".len());
+    }
 }