diff --git a/docs/design/xfa-scope.md b/docs/design/xfa-scope.md new file mode 100644 index 0000000..f02e391 --- /dev/null +++ b/docs/design/xfa-scope.md @@ -0,0 +1,346 @@ +# pdfer XFA support: scope recommendations + +**Status:** Design plan for pdfer's XFA surface. P1 is the committed roadmap; +P2 is a draft proposal still under consideration. Open for discussion — file +an issue or open a PR to suggest changes. + +**Implementation status:** P1 #1 closed by commit `88b6989` (orphan-script +extraction). P1 #2 and #3 remain open and tracked here. + +--- + +A review of pdfer's XFA support from the perspective of a downstream consumer +(`xfa-web`) building an interactive renderer on top of the library. + +The headline: **the overall design is coherent and defensible. The scope +boundary is in the right place. Most of what follows is about finishing the +contract that the current code is already 80% of the way to, not redrawing +it.** + +--- + +## TL;DR + +| Change | Priority | Status | Why | +|---|---|---|---| +| Export all scripts, regardless of whether their owner node is emitted as a Question | **P1** | **Done** (`88b6989`) | The current emission filter drops the script-bearing nodes (`bind="none"` buttons, event-bearing draws, `` events) that renderers most need. This is the single largest fidelity gap. | +| Surface event-bearing and `bind="none"` nodes via a parallel `Elements` collection | **P1** | Planned | Renderers need these nodes addressable in the schema; keeping them out of `Questions` preserves the "Question = thing a user answers" invariant. | +| Parse and expose `` and `` metadata on Questions and Sections | **P1** | Planned | Without this, renderers can't tell which subforms are repeatable or what they bind to — i.e. can't implement dynamic XFA at all. | +| Ship a SOM path parser + schema resolver as `forms/xfa/som` | **P2** | Draft | Single correct implementation everyone needs; co-located with the schema it operates on. | +| Add data-DOM cursor API (`GetDataValue` / `SetDataValue` / `ListDataChildren` by SOM path) | **P2** | Draft | Current `UpdateXFAValues` is name-keyed and regex-based; renderers doing real binding need path-keyed access. | +| Capture ` + + + + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + s := findScript(form.Scripts, "form1.Master", "ready") + if s == nil { + t.Fatalf("no script with OwnerPath=form1.Master event=ready; got %+v", form.Scripts) + } + if s.Body != body { + t.Errorf("body = %q, want %q", s.Body, body) + } + if s.OwnerID != "" { + t.Errorf("OwnerID = %q, want empty (pageArea is not a Question/Section)", s.OwnerID) + } +} + +// TestBindNoneButtonScriptExtracted verifies that a bind="none" button that is +// not an AddAttachment (e.g. "Help Text" / "Show Intro" UI triggers) still +// contributes its click-handler script to FormSchema.Scripts, even though the +// button itself is not emitted as a Question. +func TestBindNoneButtonScriptExtracted(t *testing.T) { + body := `xfa.host.messageBox("?-hint");` + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + for _, q := range form.Questions { + if q.Name == "helpBtn" { + t.Fatalf("bind=none non-AddAttachment button should not be emitted as Question; got %+v", q) + } + } + s := findScript(form.Scripts, "Page1.helpBtn", "click") + if s == nil { + t.Fatalf("no orphan script for Page1.helpBtn click; got %+v", form.Scripts) + } + if s.Body != body { + t.Errorf("body = %q, want %q", s.Body, body) + } + if s.OwnerID != "" { + t.Errorf("OwnerID = %q, want empty (button is not emitted as Question)", s.OwnerID) + } +} + +// TestDrawEventScriptExtracted verifies that event-bearing elements +// (status indicators with dynamic show/hide handlers) still surface their +// scripts as orphan FormScripts even though the draw itself is suppressed. +func TestDrawEventScriptExtracted(t *testing.T) { + body := `this.presence = "hidden";` + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + for _, q := range form.Questions { + if q.Name == "statusOk" { + t.Fatalf("event-bearing draw should not be emitted as Question; got %+v", q) + } + } + s := findScript(form.Scripts, "Page1.statusOk", "initialize") + if s == nil { + t.Fatalf("no orphan script for Page1.statusOk initialize; got %+v", form.Scripts) + } + if s.Body != body { + t.Errorf("body = %q, want %q", s.Body, body) + } + if s.OwnerID != "" { + t.Errorf("OwnerID = %q, want empty (draw is not emitted as Question)", s.OwnerID) + } +} + +// TestExclGroupOptionScriptsExtracted verifies that per-option +// blocks (defined on the individual radio-option s inside an +// ) are preserved as distinct FormScripts. Each option's script +// must have its own OwnerPath (exclGroup SOM path + "." + option field name) +// and remain an orphan, while the exclGroup itself (which IS a Question) gets +// its own script back-ref via the standard Question.Scripts mechanism. +// +// The field values ("a", "b") deliberately differ from the field +// names ("optA", "optB") to assert that the SOM OwnerPath is keyed by the +// field's name (real SOM) rather than the option's data value (which can +// contain arbitrary text). +func TestExclGroupOptionScriptsExtracted(t *testing.T) { + bodyA := `xfa.host.messageBox("A selected");` + bodyB := `xfa.host.messageBox("B selected");` + groupBody := `xfa.host.messageBox("group changed");` + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + var choiceQ *types.Question + for i := range form.Questions { + if form.Questions[i].Name == "choice" { + choiceQ = &form.Questions[i] + } + } + if choiceQ == nil { + t.Fatalf("exclGroup 'choice' question missing") + } + + groupScript := findScript(form.Scripts, "Page1.choice", "change") + if groupScript == nil { + t.Fatalf("no script with OwnerPath=Page1.choice event=change; got %+v", form.Scripts) + } + if groupScript.Body != groupBody { + t.Errorf("group body = %q, want %q", groupScript.Body, groupBody) + } + if groupScript.OwnerID != choiceQ.ID { + t.Errorf("group OwnerID = %q, want question ID %q", groupScript.OwnerID, choiceQ.ID) + } + + scriptA := findScript(form.Scripts, "Page1.choice.optA", "click") + if scriptA == nil { + t.Fatalf("no per-option script with OwnerPath=Page1.choice.optA; got %+v", form.Scripts) + } + if scriptA.Body != bodyA { + t.Errorf("option A body = %q, want %q", scriptA.Body, bodyA) + } + if scriptA.OwnerID != "" { + t.Errorf("option A OwnerID = %q, want empty (orphan)", scriptA.OwnerID) + } + + scriptB := findScript(form.Scripts, "Page1.choice.optB", "click") + if scriptB == nil { + t.Fatalf("no per-option script with OwnerPath=Page1.choice.optB; got %+v", form.Scripts) + } + if scriptB.Body != bodyB { + t.Errorf("option B body = %q, want %q", scriptB.Body, bodyB) + } + if scriptB.OwnerID != "" { + t.Errorf("option B OwnerID = %q, want empty (orphan)", scriptB.OwnerID) + } + + // Negative assertion: the OLD path (keyed by option value) must + // NOT appear, to lock in the SOM-correct field-name keying. + if s := findScript(form.Scripts, "Page1.choice.a", "click"); s != nil { + t.Errorf("per-option script should NOT use option value as path key; got %+v", s) + } +} + +// TestNestedSubformFieldBackRef verifies that a field inside a deeply nested +// subform gets its Question.Scripts back-reference populated, and the script's +// OwnerID resolves to the Question.ID. Specifically locks in that the SOM +// path computed by extractAllScripts (parent walk of named nodes) matches the +// path computed by populateScriptBackRefs (sec.Path + "." + q.Name) for +// arbitrary nesting depth. +func TestNestedSubformFieldBackRef(t *testing.T) { + body := `xfa.host.messageBox("deep");` + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + var deepQ *types.Question + for i := range form.Questions { + if form.Questions[i].Name == "deepField" { + deepQ = &form.Questions[i] + } + } + if deepQ == nil { + t.Fatalf("deepField question missing; got %+v", form.Questions) + } + + wantPath := "form1.outer.inner.deepField" + s := findScript(form.Scripts, wantPath, "change") + if s == nil { + t.Fatalf("no script with OwnerPath=%s; got %+v", wantPath, form.Scripts) + } + if s.OwnerID != deepQ.ID { + t.Errorf("OwnerID = %q, want question ID %q (back-ref must resolve through nested sections)", s.OwnerID, deepQ.ID) + } + if len(deepQ.Scripts) != 1 || deepQ.Scripts[0] != s.ID { + t.Errorf("Question.Scripts = %v, want [%q]", deepQ.Scripts, s.ID) + } +} diff --git a/types/form_types.go b/types/form_types.go index a021b04..062083d 100644 --- a/types/form_types.go +++ b/types/form_types.go @@ -100,12 +100,16 @@ type ValidationRules struct { // FormScript represents a raw script block extracted from an XFA form. // Bodies are exposed verbatim — pdfer does not interpret script semantics. // -// Limitations: scripts attached to XFA nodes that pdfer does not surface in -// the schema are not extracted. This includes decorative elements with -// events (e.g. status indicators), buttons with bind="none" other than -// AddAttachment, -level events, and individual radio options -// that are collapsed into an 's Options. Callers that need full -// event fidelity should walk the raw XFA XML directly. +// OwnerID is non-empty iff the owning node is surfaced as a typed schema +// entity (today: Question or FormSection) that callers can dereference by ID. +// Scripts whose owner is not currently typed in the schema appear with +// OwnerPath set and OwnerID empty — at time of writing, these include +// event-bearing elements, bind="none" non-AddAttachment buttons, +// events, and individual radio options collapsed into an +// . The set of orphan cases will shrink as more node types become +// typed (see docs/design/xfa-scope.md §2). Callers should treat OwnerID empty +// as "not currently dereferenceable" rather than a permanent classification, +// and rely on OwnerPath when they need owner-keyed addressing in either case. type FormScript struct { ID string `json:"id"` // stable: SOM owner path + "#" + event + "[" + index + "]" OwnerPath string `json:"owner_path,omitempty"` // SOM path of containing node (e.g. "form1.section.field"); empty for template-level