From b17918020dc48ea5213340369b6dc306e0487731 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 24 May 2026 19:55:17 +0000 Subject: [PATCH 1/4] feat(xfa): expose raw script blocks via FormSchema.Scripts, drop heuristics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XFA script interpretation was regex-based and lossy: bodies got trimmed and sliced when a heuristic "succeeded", contentType wasn't surfaced, and owning field/subform context was dropped. Real ES5/FormCalc semantics need an AST, which is out of scope here. Callers can do better with the raw source. This is a breaking change to the form API: - New types.FormScript carries the verbatim body, language, event activity, SOM owner path, and matching Question.ID / FormSection.Path as OwnerID. - types.FormSchema gains Scripts []FormScript; loses Rules. - types.Question and types.FormSection gain Scripts []string — ID indexes into FormSchema.Scripts in declaration order. - Rule, RuleType, Condition, Operator, LogicOp, Action, ActionType and their constants are deleted. Annotation.Actions (unused) drops too. - pdfer.Rule alias is replaced with pdfer.FormScript. XFA translator: heuristic helpers (parseXFAScript, detectScriptLanguage, tryParseVisibilityScript, tryParseSetValueScript, tryParseValidationScript, tryParseCalculateScript, extractSimpleCondition, invertCondition, extractAllPresenceTargets, splitIfElse, splitJSIfElseChain, parseVariablesFunctionRules, extractJSFunctionBodies, and supporting regex/match utilities) are removed. Language defaults to "formcalc" per the XFA spec when contentType is absent; contentTypeToLang() handles the rest. + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + if len(form.Scripts) != 1 { + t.Fatalf("expected 1 script, got %d", len(form.Scripts)) + } + s := form.Scripts[0] + if s.Body != body { + t.Errorf("body not preserved verbatim:\n got: %q\nwant: %q", s.Body, body) + } + if s.Event != "change" { + t.Errorf("event = %q, want change", s.Event) + } + if s.Language != "formcalc" { + t.Errorf("language = %q, want formcalc", s.Language) + } +} + +// TestLanguageDefaultsToFormCalc verifies that a + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + if len(form.Scripts) != 1 { + t.Fatalf("expected 1 script, got %d", len(form.Scripts)) + } + if got := form.Scripts[0].Language; got != "formcalc" { + t.Errorf("language = %q, want formcalc (XFA default)", got) + } +} + +// TestLanguageJavaScriptContentType verifies that contentType maps to javascript. +func TestLanguageJavaScriptContentType(t *testing.T) { + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + if len(form.Scripts) != 1 { + t.Fatalf("expected 1 script, got %d", len(form.Scripts)) + } + if got := form.Scripts[0].Language; got != "javascript" { + t.Errorf("language = %q, want javascript", got) + } +} + +// TestQuestionScriptsIndex verifies that Question.Scripts holds IDs that resolve +// to entries in FormSchema.Scripts. +func TestQuestionScriptsIndex(t *testing.T) { + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + if len(form.Questions) != 1 { + t.Fatalf("expected 1 question, got %d", len(form.Questions)) + } + q := form.Questions[0] + if len(q.Scripts) != 2 { + t.Fatalf("question.Scripts = %v, want 2 IDs", q.Scripts) + } + + scriptsByID := make(map[string]types.FormScript, len(form.Scripts)) + for _, s := range form.Scripts { + scriptsByID[s.ID] = s + } + for _, id := range q.Scripts { + s, ok := scriptsByID[id] + if !ok { + t.Fatalf("question.Scripts ID %q not found in FormSchema.Scripts", id) + } + if s.OwnerID != q.ID { + t.Errorf("script %s OwnerID = %q, want %q", id, s.OwnerID, q.ID) + } + if !strings.HasPrefix(s.OwnerPath, "Page1.trigger") { + t.Errorf("script %s OwnerPath = %q, want prefix Page1.trigger", id, s.OwnerPath) + } + } +} + +// TestSectionScriptsIndex verifies that subform-attached events surface as +// FormSection.Scripts IDs that resolve to FormSchema.Scripts. +func TestSectionScriptsIndex(t *testing.T) { + xfaXML := `` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + if len(form.Sections) == 0 || len(form.Sections[0].Children) == 0 { + t.Fatalf("expected nested section tree, got %+v", form.Sections) + } + page1 := form.Sections[0].Children[0] + if page1.Name != "Page1" { + t.Fatalf("expected Page1 section, got %q", page1.Name) + } + if len(page1.Scripts) != 1 { + t.Fatalf("section.Scripts = %v, want 1 ID", page1.Scripts) + } + + scriptsByID := make(map[string]types.FormScript, len(form.Scripts)) + for _, s := range form.Scripts { + scriptsByID[s.ID] = s + } + s, ok := scriptsByID[page1.Scripts[0]] + if !ok { + t.Fatalf("section.Scripts ID %q not found in FormSchema.Scripts", page1.Scripts[0]) + } + if s.OwnerPath != "form1.Page1" { + t.Errorf("script OwnerPath = %q, want form1.Page1", s.OwnerPath) + } + if s.OwnerID != "form1.Page1" { + t.Errorf("script OwnerID = %q, want form1.Page1", s.OwnerID) + } + if s.Event != "initialize" { + t.Errorf("script Event = %q, want initialize", s.Event) + } +} + +// TestScriptEventNameAndRunAt verifies that and + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + if len(form.Scripts) != 1 { + t.Fatalf("expected 1 script, got %d", len(form.Scripts)) + } + s := form.Scripts[0] + if s.Name != "click" { + t.Errorf("Name = %q, want click", s.Name) + } + if s.RunAt != "client" { + t.Errorf("RunAt = %q, want client", s.RunAt) + } +} + +// TestEmptyEventSkipped verifies that an declaration with no + + + + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + if len(form.Scripts) != 2 { + t.Fatalf("expected 2 scripts, got %d", len(form.Scripts)) + } + wantIDs := []string{"Page1.trigger#initialize[0]", "Page1.trigger#change[1]"} + for i, want := range wantIDs { + if form.Scripts[i].ID != want { + t.Errorf("script[%d].ID = %q, want %q", i, form.Scripts[i].ID, want) + } + } +} diff --git a/forms/xfa/xfa_script_test.go b/forms/xfa/xfa_script_test.go deleted file mode 100644 index ce35791..0000000 --- a/forms/xfa/xfa_script_test.go +++ /dev/null @@ -1,696 +0,0 @@ -package xfa - -import ( - "strings" - "testing" - - "github.com/benedoc-inc/pdfer/types" -) - -// --- detectScriptLanguage --------------------------------------------------- - -func TestDetectScriptLanguage_FormCalc_ThenEndif(t *testing.T) { - s := `if ($.rawValue == "yes") then $.presence = "hidden" endif` - if got := detectScriptLanguage(s, ""); got != "formcalc" { - t.Errorf("expected formcalc, got %q", got) - } -} - -func TestDetectScriptLanguage_FormCalc_DollarRef(t *testing.T) { - if got := detectScriptLanguage(`$.rawValue = "hello"`, ""); got != "formcalc" { - t.Errorf("expected formcalc, got %q", got) - } -} - -func TestDetectScriptLanguage_JavaScript_Braces(t *testing.T) { - if got := detectScriptLanguage(`if (this.rawValue == "yes") { this.presence = "hidden"; }`, ""); got != "javascript" { - t.Errorf("expected javascript, got %q", got) - } -} - -func TestDetectScriptLanguage_JavaScript_Return(t *testing.T) { - if got := detectScriptLanguage(`return this.rawValue.length > 0;`, ""); got != "javascript" { - t.Errorf("expected javascript, got %q", got) - } -} - -func TestDetectScriptLanguage_HintWins(t *testing.T) { - // Hint should override heuristic. - if got := detectScriptLanguage(`$.rawValue = "x"`, "javascript"); got != "javascript" { - t.Errorf("expected javascript (from hint), got %q", got) - } -} - -// --- visibility ------------------------------------------------------------- - -func TestParseXFAScript_Visibility_Hide_FormCalc(t *testing.T) { - rs := parseXFAScript(`$.presence = "hidden"`, "myField", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeVisibility { - t.Fatalf("expected RuleTypeVisibility, got %q", r.ruleType) - } - if len(r.actions) != 1 { - t.Fatalf("expected 1 action, got %d", len(r.actions)) - } - if r.actions[0].Type != types.ActionTypeHide { - t.Errorf("expected ActionTypeHide, got %q", r.actions[0].Type) - } -} - -func TestParseXFAScript_Visibility_Show_FormCalc(t *testing.T) { - rs := parseXFAScript(`$.presence = "visible"`, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeVisibility { - t.Fatalf("expected RuleTypeVisibility, got %q", r.ruleType) - } - if r.actions[0].Type != types.ActionTypeShow { - t.Errorf("expected ActionTypeShow, got %q", r.actions[0].Type) - } -} - -func TestParseXFAScript_Visibility_Invisible(t *testing.T) { - rs := parseXFAScript(`$.presence = "invisible"`, "f", "", "") - r := rs[0] - if r.actions[0].Type != types.ActionTypeHide { - t.Errorf("expected ActionTypeHide for 'invisible', got %q", r.actions[0].Type) - } -} - -func TestParseXFAScript_Visibility_WithCondition_FormCalc(t *testing.T) { - s := `if ($.rawValue == "yes") then $.presence = "hidden" endif` - rs := parseXFAScript(s, "trigger", "target", "") - r := rs[0] - if r.ruleType != types.RuleTypeVisibility { - t.Fatalf("expected RuleTypeVisibility, got %q", r.ruleType) - } - if r.condition == nil { - t.Fatal("expected non-nil condition") - } - if r.condition.Operator != types.OperatorEquals { - t.Errorf("expected OperatorEquals, got %q", r.condition.Operator) - } -} - -func TestParseXFAScript_Visibility_WithCondition_JavaScript(t *testing.T) { - s := `if (this.rawValue == "yes") { xfa.resolveNode("otherField").presence = "hidden"; }` - rs := parseXFAScript(s, "trigger", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeVisibility { - t.Fatalf("expected RuleTypeVisibility, got %q", r.ruleType) - } - if r.condition == nil { - t.Fatal("expected non-nil condition") - } - // Target should be extracted from resolveNode. - if r.actions[0].Target != "otherField" { - t.Errorf("expected target 'otherField', got %q", r.actions[0].Target) - } -} - -func TestParseXFAScript_Visibility_ResolveNode_Target(t *testing.T) { - s := `xfa.resolveNode("sectionB").presence = "hidden"` - rs := parseXFAScript(s, "checkboxField", "", "") - r := rs[0] - if r.actions[0].Target != "sectionB" { - t.Errorf("expected target 'sectionB', got %q", r.actions[0].Target) - } -} - -// --- if/else rule splitting -------------------------------------------------- - -func TestParseXFAScript_IfElse_TwoRules(t *testing.T) { - s := `if ($.rawValue == "1") then - IMDRF.presence = "visible" - USA.presence = "hidden" -else - IMDRF.presence = "hidden" - USA.presence = "visible" -endif` - rs := parseXFAScript(s, "AppType", "", "formcalc") - if len(rs) != 2 { - t.Fatalf("expected 2 rules (if+else), got %d", len(rs)) - } - // Both rules should be visibility type. - for i, r := range rs { - if r.ruleType != types.RuleTypeVisibility { - t.Errorf("rule[%d]: expected RuleTypeVisibility, got %q", i, r.ruleType) - } - } - // First rule condition: equals "1"; second rule: inverted (not equals "1"). - if rs[0].condition == nil || rs[0].condition.Operator != types.OperatorEquals { - t.Errorf("if-rule condition should be OperatorEquals") - } - if rs[1].condition == nil { - t.Errorf("else-rule should have a condition") - } -} - -// --- set value -------------------------------------------------------------- - -func TestParseXFAScript_SetValue_LiteralString_FormCalc(t *testing.T) { - rs := parseXFAScript(`$.rawValue = "hello"`, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeSetValue { - t.Fatalf("expected RuleTypeSetValue, got %q", r.ruleType) - } - if len(r.actions) != 1 { - t.Fatalf("expected 1 action, got %d", len(r.actions)) - } - a := r.actions[0] - if a.Type != types.ActionTypeSetValue { - t.Errorf("expected ActionTypeSetValue, got %q", a.Type) - } - if a.Value != "hello" { - t.Errorf("expected unquoted literal 'hello', got %v", a.Value) - } -} - -func TestParseXFAScript_SetValue_FieldRef_FormCalc(t *testing.T) { - rs := parseXFAScript(`$.rawValue = otherField.rawValue`, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeSetValue { - t.Fatalf("expected RuleTypeSetValue, got %q", r.ruleType) - } - if r.actions[0].Expression != "otherField.rawValue" { - t.Errorf("expected expression 'otherField.rawValue', got %q", r.actions[0].Expression) - } -} - -func TestParseXFAScript_SetValue_JavaScript_ThisRawValue(t *testing.T) { - rs := parseXFAScript(`this.rawValue = "default";`, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeSetValue { - t.Fatalf("expected RuleTypeSetValue, got %q", r.ruleType) - } -} - -func TestParseXFAScript_SetValue_WithCondition(t *testing.T) { - s := `if (trigger.rawValue != "") then $.rawValue = trigger.rawValue endif` - rs := parseXFAScript(s, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeSetValue { - t.Fatalf("expected RuleTypeSetValue, got %q", r.ruleType) - } - if r.condition == nil { - t.Fatal("expected condition") - } - if r.condition.Operator != types.OperatorNotEquals { - t.Errorf("expected OperatorNotEquals, got %q", r.condition.Operator) - } -} - -// --- validation ------------------------------------------------------------- - -func TestParseXFAScript_Validate_ReturnFalse(t *testing.T) { - s := `if ($.rawValue == "") then return false endif` - rs := parseXFAScript(s, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeValidate { - t.Fatalf("expected RuleTypeValidate, got %q", r.ruleType) - } - if r.actions[0].Type != types.ActionTypeValidate { - t.Errorf("expected ActionTypeValidate, got %q", r.actions[0].Type) - } - if r.actions[0].Script == "" { - t.Error("raw script should be preserved") - } -} - -func TestParseXFAScript_Validate_JavaScript_ReturnBool(t *testing.T) { - s := ` -var v = this.rawValue; -if (v.length < 5) { return false; } -return true;` - rs := parseXFAScript(s, "zipCode", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeValidate { - t.Fatalf("expected RuleTypeValidate, got %q", r.ruleType) - } -} - -func TestParseXFAScript_Validate_MessageBox(t *testing.T) { - s := `if ($.rawValue == "") then xfa.host.messageBox("Required field") return false endif` - rs := parseXFAScript(s, "f", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeValidate { - t.Fatalf("expected RuleTypeValidate, got %q", r.ruleType) - } -} - -// --- calculate -------------------------------------------------------------- - -func TestParseXFAScript_Calculate_Sum_FormCalc(t *testing.T) { - s := `$.rawValue = Sum(a.rawValue, b.rawValue, c.rawValue)` - rs := parseXFAScript(s, "total", "", "") - r := rs[0] - // Contains Sum() — triggers calculate path before set-value since it - // matches calculate earlier if we check set-value first. Either - // RuleTypeCalculate or RuleTypeSetValue is acceptable here; the key check - // is that Sum is present in the expression. - if r.ruleType != types.RuleTypeCalculate && r.ruleType != types.RuleTypeSetValue { - t.Errorf("unexpected rule type %q", r.ruleType) - } -} - -func TestParseXFAScript_Calculate_FormCalcBuiltin(t *testing.T) { - s := `Concat(firstName.rawValue, " ", lastName.rawValue)` - rs := parseXFAScript(s, "fullName", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeCalculate { - t.Fatalf("expected RuleTypeCalculate, got %q", r.ruleType) - } -} - -func TestParseXFAScript_Calculate_JavaScript_Return(t *testing.T) { - s := `return parseFloat(qty.rawValue) * parseFloat(price.rawValue);` - rs := parseXFAScript(s, "total", "", "") - r := rs[0] - if r.ruleType != types.RuleTypeCalculate { - t.Fatalf("expected RuleTypeCalculate, got %q", r.ruleType) - } - if !strings.Contains(r.actions[0].Expression, "parseFloat") { - t.Errorf("expression should contain the return value, got %q", r.actions[0].Expression) - } -} - -// --- fallback --------------------------------------------------------------- - -func TestParseXFAScript_Fallback_Unknown(t *testing.T) { - s := `someComplexCustomFunction(arg1, arg2)` - rs := parseXFAScript(s, "f", "", "") - r := rs[0] - if len(r.actions) != 1 { - t.Fatalf("expected 1 action, got %d", len(r.actions)) - } - if r.actions[0].Type != types.ActionTypeExecute { - t.Errorf("expected ActionTypeExecute fallback, got %q", r.actions[0].Type) - } - if r.actions[0].Script != s { - t.Error("raw script not preserved in fallback") - } -} - -func TestParseXFAScript_Empty(t *testing.T) { - rs := parseXFAScript("", "f", "", "") - // Empty script: single execute action with empty script or no actions. - // Either is fine; the important thing is no panic. - _ = rs -} - -// --- condition parsing ------------------------------------------------------ - -func TestExtractSimpleCondition_Equals(t *testing.T) { - s := `if (status.rawValue == "active") then $.presence = "visible" endif` - cond := extractSimpleCondition(s, "formcalc", "f") - if cond == nil { - t.Fatal("expected condition") - } - if cond.Operator != types.OperatorEquals { - t.Errorf("expected OperatorEquals, got %q", cond.Operator) - } - if cond.Value != "active" { - t.Errorf("expected value 'active', got %v", cond.Value) - } -} - -func TestExtractSimpleCondition_NotEquals(t *testing.T) { - s := `if ($.rawValue != "") then return true endif` - cond := extractSimpleCondition(s, "formcalc", "myField") - if cond == nil { - t.Fatal("expected condition") - } - if cond.Operator != types.OperatorNotEquals { - t.Errorf("expected OperatorNotEquals, got %q", cond.Operator) - } -} - -func TestExtractSimpleCondition_JavaScript_GreaterThan(t *testing.T) { - s := `if (this.rawValue > 100) { return false; }` - cond := extractSimpleCondition(s, "javascript", "amount") - if cond == nil { - t.Fatal("expected condition") - } - if cond.Operator != types.OperatorGreaterThan { - t.Errorf("expected OperatorGreaterThan, got %q", cond.Operator) - } -} - -func TestExtractSimpleCondition_NoIf(t *testing.T) { - cond := extractSimpleCondition(`$.rawValue = "hello"`, "formcalc", "f") - if cond != nil { - t.Errorf("expected nil condition for script without if, got %+v", cond) - } -} - -// --- helper functions ------------------------------------------------------- - -func TestExtractFieldRef_Dollar(t *testing.T) { - if got := extractFieldRef("$.rawValue", "myField"); got != "myField" { - t.Errorf("expected 'myField', got %q", got) - } -} - -func TestExtractFieldRef_This(t *testing.T) { - if got := extractFieldRef("this.rawValue", "myField"); got != "myField" { - t.Errorf("expected 'myField', got %q", got) - } -} - -func TestExtractFieldRef_Named(t *testing.T) { - if got := extractFieldRef("otherField.rawValue", "myField"); got != "otherField" { - t.Errorf("expected 'otherField', got %q", got) - } -} - -func TestExtractResolveNodeTarget(t *testing.T) { - cases := []struct { - s string - want string - }{ - {`xfa.resolveNode("firstName").rawValue = ""`, "firstName"}, - {`xfa.resolveNode('section2').presence = "hidden"`, "section2"}, - {`$.rawValue = "x"`, ""}, - } - for _, tc := range cases { - got := extractResolveNodeTarget(tc.s) - if got != tc.want { - t.Errorf("extractResolveNodeTarget(%q) = %q, want %q", tc.s, got, tc.want) - } - } -} - -func TestUnquoteLiteral(t *testing.T) { - cases := []struct { - in string - want interface{} - }{ - {`"hello"`, "hello"}, - {`'world'`, "world"}, - {`42`, nil}, - {`field.rawValue`, nil}, - } - for _, tc := range cases { - got := unquoteLiteral(tc.in) - if got != tc.want { - t.Errorf("unquoteLiteral(%q) = %v, want %v", tc.in, got, tc.want) - } - } -} - -func TestFindAssignmentOp(t *testing.T) { - cases := []struct { - s string - want int // -1 means "not found" - }{ - {`$.rawValue = "hello"`, 11}, - {`a == b`, -1}, - {`a != b`, -1}, - {`a >= b`, -1}, - {`a <= b`, -1}, - } - for _, tc := range cases { - got := findAssignmentOp(tc.s) - if tc.want == -1 { - if got != -1 { - t.Errorf("findAssignmentOp(%q) = %d, expected -1", tc.s, got) - } - } else { - if got == -1 { - t.Errorf("findAssignmentOp(%q) = -1, expected %d", tc.s, tc.want) - } - } - } -} - -// --- splitIfElse ------------------------------------------------------------ - -func TestSplitIfElse_FormCalc(t *testing.T) { - s := `if ($.rawValue == "1") then - A.presence = "visible" -else - A.presence = "hidden" -endif` - ifBody, elseBody, ok := splitIfElse(s, "formcalc") - if !ok { - t.Fatal("expected ok=true") - } - if !strings.Contains(ifBody, `"visible"`) { - t.Errorf("ifBody should contain visible: %q", ifBody) - } - if !strings.Contains(elseBody, `"hidden"`) { - t.Errorf("elseBody should contain hidden: %q", elseBody) - } -} - -func TestSplitIfElse_NoElse(t *testing.T) { - s := `if ($.rawValue == "1") then A.presence = "visible" endif` - _, _, ok := splitIfElse(s, "formcalc") - if ok { - t.Error("expected ok=false for script with no else branch") - } -} - -// --- invertCondition -------------------------------------------------------- - -func TestInvertCondition_Equals(t *testing.T) { - c := &types.Condition{Operator: types.OperatorEquals, Value: "yes"} - inv := invertCondition(c) - if inv.Operator != types.OperatorNotEquals { - t.Errorf("expected OperatorNotEquals, got %q", inv.Operator) - } -} - -func TestInvertCondition_NotEquals(t *testing.T) { - c := &types.Condition{Operator: types.OperatorNotEquals, Value: ""} - inv := invertCondition(c) - if inv.Operator != types.OperatorEquals { - t.Errorf("expected OperatorEquals, got %q", inv.Operator) - } -} - -func TestInvertCondition_GreaterThan(t *testing.T) { - c := &types.Condition{Operator: types.OperatorGreaterThan, Value: "5"} - inv := invertCondition(c) - if inv.Operator != types.OperatorLessOrEqual { - t.Errorf("expected OperatorLessOrEqual, got op=%q", inv.Operator) - } -} - -func TestInvertCondition_Fallback_Not(t *testing.T) { - // An operator with no explicit inverse mapping falls back to LogicOpNot wrapper. - c := &types.Condition{Operator: types.OperatorContains, Value: "x"} - inv := invertCondition(c) - if inv.Logic != types.LogicOpNot { - t.Errorf("expected LogicOpNot fallback, got logic=%q op=%q", inv.Logic, inv.Operator) - } -} - -// --- convertXFAEventToRules integration ------------------------------------- - -func TestConvertXFAEventToRules_EventTypeMapping(t *testing.T) { - cases := []struct { - eventType string - want types.RuleType - }{ - {"validate", types.RuleTypeValidate}, - {"calculate", types.RuleTypeCalculate}, - {"initialize", types.RuleTypeSetValue}, - } - for _, tc := range cases { - event := XFAEvent{Type: tc.eventType} - rules, err := convertXFAEventToRules(event, "f", 1) - if err != nil { - t.Errorf("%q: unexpected error: %v", tc.eventType, err) - } - if len(rules) == 0 { - t.Errorf("%q: expected at least one rule", tc.eventType) - continue - } - if rules[0].Type != tc.want { - t.Errorf("event type %q: expected rule type %q, got %q", tc.eventType, tc.want, rules[0].Type) - } - } -} - -func TestConvertXFAEventToRules_ScriptOverridesEventType(t *testing.T) { - // A "change" event whose script is actually a visibility toggle. - event := XFAEvent{ - Type: "change", - Script: `$.presence = "hidden"`, - } - rules, err := convertXFAEventToRules(event, "f", 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(rules) == 0 { - t.Fatal("expected at least one rule") - } - if rules[0].Type != types.RuleTypeVisibility { - t.Errorf("expected RuleTypeVisibility (from script), got %q", rules[0].Type) - } -} - -func TestConvertXFAEventToRules_PreservesSourceAndID(t *testing.T) { - event := XFAEvent{Type: "validate", Script: `return false`} - rules, _ := convertXFAEventToRules(event, "emailField", 7) - if len(rules) == 0 { - t.Fatal("expected at least one rule") - } - rule := rules[0] - if rule.Source != "emailField" { - t.Errorf("expected source 'emailField', got %q", rule.Source) - } - if rule.ID != "rule_7" { - t.Errorf("expected ID 'rule_7', got %q", rule.ID) - } -} - -func TestConvertXFAEventToRules_IfElse_TwoRules(t *testing.T) { - event := XFAEvent{ - Type: "change", - Script: `if ($.rawValue == "yes") then - sectionA.presence = "visible" - sectionB.presence = "hidden" -else - sectionA.presence = "hidden" - sectionB.presence = "visible" -endif`, - Lang: "formcalc", - } - rules, err := convertXFAEventToRules(event, "toggle", 1) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if len(rules) != 2 { - t.Fatalf("expected 2 rules for if/else script, got %d", len(rules)) - } -} - -// --- splitJSIfElseChain -------------------------------------------------- - -func TestSplitJSIfElseChain_ThreeBranches(t *testing.T) { - s := `if (field.rawValue == "0") { - A.presence = "visible"; - B.presence = "hidden"; -} else if (field.rawValue == "1") { - A.presence = "hidden"; - B.presence = "visible"; -} else if (field.rawValue == "2") { - A.presence = "hidden"; - B.presence = "hidden"; -}` - branches := splitJSIfElseChain(s) - if len(branches) != 3 { - t.Fatalf("expected 3 branches, got %d", len(branches)) - } - if branches[0].cond != `field.rawValue == "0"` { - t.Errorf("branch 0 cond = %q", branches[0].cond) - } - if branches[2].cond != `field.rawValue == "2"` { - t.Errorf("branch 2 cond = %q", branches[2].cond) - } -} - -func TestSplitJSIfElseChain_WithElse(t *testing.T) { - s := `if (x.rawValue == "a") { - A.presence = "visible"; -} else { - A.presence = "hidden"; -}` - branches := splitJSIfElseChain(s) - if len(branches) != 2 { - t.Fatalf("expected 2 branches, got %d: %+v", len(branches), branches) - } - if branches[0].cond != `x.rawValue == "a"` { - t.Errorf("branch 0 cond = %q", branches[0].cond) - } - if branches[1].cond != "" { - t.Errorf("else branch should have empty cond, got %q", branches[1].cond) - } -} - -// --- parseVariablesFunctionRules ----------------------------------------- - -func TestParseVariablesFunctionRules_ThreeBranches(t *testing.T) { - body := ` - if (ATRadioButton100.rawValue == "0") { - IMDRF.presence = "visible"; - USA.presence = "hidden"; - CDN.presence = "hidden"; - } else if (ATRadioButton100.rawValue == "1") { - IMDRF.presence = "hidden"; - USA.presence = "visible"; - CDN.presence = "hidden"; - } else if (ATRadioButton100.rawValue == "2") { - IMDRF.presence = "hidden"; - USA.presence = "hidden"; - CDN.presence = "visible"; - }` - rules := parseVariablesFunctionRules(body, "javascript", 0) - if len(rules) != 3 { - t.Fatalf("expected 3 rules, got %d", len(rules)) - } - // Branch 0: IMDRF visible, USA hidden, CDN hidden - r0 := rules[0] - if r0.Condition == nil || r0.Condition.Expression != `ATRadioButton100.rawValue == "0"` { - t.Errorf("rule 0 condition = %+v", r0.Condition) - } - // Branch 0 should show IMDRF and hide USA and CDN - var showTargets, hideTargets []string - for _, a := range r0.Actions { - if a.Type == types.ActionTypeShow { - showTargets = append(showTargets, a.Target) - } else if a.Type == types.ActionTypeHide { - hideTargets = append(hideTargets, a.Target) - } - } - if len(showTargets) != 1 || showTargets[0] != "IMDRF" { - t.Errorf("branch 0 show targets = %v, want [IMDRF]", showTargets) - } - if len(hideTargets) != 2 { - t.Errorf("branch 0 hide targets = %v, want [USA CDN]", hideTargets) - } - - // Branch 1: USA visible - r1 := rules[1] - if r1.Condition == nil || r1.Condition.Expression != `ATRadioButton100.rawValue == "1"` { - t.Errorf("rule 1 condition = %+v", r1.Condition) - } - var show1 []string - for _, a := range r1.Actions { - if a.Type == types.ActionTypeShow { - show1 = append(show1, a.Target) - } - } - if len(show1) != 1 || show1[0] != "USA" { - t.Errorf("branch 1 show targets = %v, want [USA]", show1) - } -} - -// --- extractJSFunctionBodies -------------------------------------------- - -func TestExtractJSFunctionBodies_SingleFunction(t *testing.T) { - script := `function AutoPopulate() { - if (x.rawValue == "0") { A.presence = "visible"; } -}` - fns := extractJSFunctionBodies(script, "javascript") - if len(fns) != 1 { - t.Fatalf("expected 1 function, got %d", len(fns)) - } - if !strings.Contains(fns[0].body, "A.presence") { - t.Errorf("function body missing expected content: %q", fns[0].body) - } -} - -func TestExtractJSFunctionBodies_TwoFunctions(t *testing.T) { - script := `function AutoPopulate() { - A.presence = "visible"; -} -function LBPresence() { - B.presence = "hidden"; -}` - fns := extractJSFunctionBodies(script, "javascript") - if len(fns) != 2 { - t.Fatalf("expected 2 functions, got %d", len(fns)) - } -} diff --git a/forms/xfa/xfa_translator_test.go b/forms/xfa/xfa_translator_test.go index c9f05d8..daf30fe 100644 --- a/forms/xfa/xfa_translator_test.go +++ b/forms/xfa/xfa_translator_test.go @@ -1243,72 +1243,6 @@ func TestPresenceVisibleField(t *testing.T) { } } -func TestMultiTargetVisibilityScript(t *testing.T) { - script := `if (this.rawValue == "1") { - IMDRF.presence = "visible"; - USA.presence = "hidden"; - CDN.presence = "hidden"; -}` - results, ok := tryParseVisibilityScript(script, "javascript", "AppType", "") - if !ok { - t.Fatal("tryParseVisibilityScript should have matched") - } - result := results[0] - if result.ruleType != types.RuleTypeVisibility { - t.Errorf("ruleType = %q, want visibility", result.ruleType) - } - if len(result.actions) != 3 { - t.Fatalf("actions count = %d, want 3; got %+v", len(result.actions), result.actions) - } - - byTarget := map[string]types.ActionType{} - for _, a := range result.actions { - byTarget[a.Target] = a.Type - } - if byTarget["IMDRF"] != types.ActionTypeShow { - t.Errorf("IMDRF action = %q, want show", byTarget["IMDRF"]) - } - if byTarget["USA"] != types.ActionTypeHide { - t.Errorf("USA action = %q, want hide", byTarget["USA"]) - } - if byTarget["CDN"] != types.ActionTypeHide { - t.Errorf("CDN action = %q, want hide", byTarget["CDN"]) - } -} - -func TestThisPresenceFallback(t *testing.T) { - // Script only uses "this.presence" — no named external targets. - // Should fall back to sourceField. - script := `this.presence = "hidden";` - results, ok := tryParseVisibilityScript(script, "javascript", "myField", "") - if !ok { - t.Fatal("tryParseVisibilityScript should have matched") - } - result := results[0] - if len(result.actions) != 1 { - t.Fatalf("actions count = %d, want 1", len(result.actions)) - } - if result.actions[0].Target != "myField" { - t.Errorf("action target = %q, want myField", result.actions[0].Target) - } - if result.actions[0].Type != types.ActionTypeHide { - t.Errorf("action type = %q, want hide", result.actions[0].Type) - } -} - -func TestPerTargetActionType(t *testing.T) { - script := `IMDRF.presence = "visible"; USA.presence = "hidden";` - if got := perTargetActionType(script, "IMDRF", types.ActionTypeHide); got != types.ActionTypeShow { - t.Errorf("IMDRF: got %q, want show", got) - } - if got := perTargetActionType(script, "USA", types.ActionTypeShow); got != types.ActionTypeHide { - t.Errorf("USA: got %q, want hide", got) - } - // Unknown target falls back to the provided default. - if got := perTargetActionType(script, "OTHER", types.ActionTypeShow); got != types.ActionTypeShow { - t.Errorf("OTHER: got %q, want show (fallback)", got) - } -} // ── Rendering improvement tests ─────────────────────────────────────────────── @@ -1954,38 +1888,25 @@ func TestPresenceAttrImageEmitted(t *testing.T) { } } -// TestVariablesBlockRulesExtracted verifies that + ` + xfaXML := `` @@ -1993,31 +1914,25 @@ func TestVariablesBlockRulesExtracted(t *testing.T) { if err != nil { t.Fatalf("ParseXFAForm() error = %v", err) } - if len(form.Rules) != 3 { - t.Fatalf("expected 3 rules from variables block, got %d: %+v", len(form.Rules), form.Rules) - } - // Rule 0: AppType == "0" → SectionA show, SectionB hide, SectionC hide - r0 := form.Rules[0] - if r0.Type != types.RuleTypeVisibility { - t.Errorf("rule 0 type = %v", r0.Type) + + var variablesScripts []types.FormScript + for _, s := range form.Scripts { + if s.Event == "variables" { + variablesScripts = append(variablesScripts, s) + } } - if r0.Condition == nil || r0.Condition.Expression != `AppType.rawValue == "0"` { - t.Errorf("rule 0 condition = %+v", r0.Condition) + if len(variablesScripts) != 1 { + t.Fatalf("expected 1 variables script, got %d: %+v", len(variablesScripts), variablesScripts) } - // Rule 1: AppType == "1" → SectionB show - r1 := form.Rules[1] - if r1.Condition == nil || r1.Condition.Expression != `AppType.rawValue == "1"` { - t.Errorf("rule 1 condition = %+v", r1.Condition) + s := variablesScripts[0] + if s.Name != "Functions" { + t.Errorf("script name = %q, want %q", s.Name, "Functions") } - // Verify rule 1 shows SectionB - var found bool - for _, a := range r1.Actions { - if a.Type == types.ActionTypeShow && a.Target == "SectionB" { - found = true - } + if s.Language != "javascript" { + t.Errorf("language = %q, want javascript", s.Language) } - if !found { - t.Errorf("rule 1 actions don't include show SectionB: %+v", r1.Actions) + if s.Body != body { + t.Errorf("body not preserved verbatim:\n got: %q\nwant: %q", s.Body, body) } } diff --git a/pdfer.go b/pdfer.go index a154a0c..609f878 100644 --- a/pdfer.go +++ b/pdfer.go @@ -94,8 +94,8 @@ type FormSchema = types.FormSchema // Question represents a single form field. type Question = types.Question -// Rule represents a validation or calculation rule. -type Rule = types.Rule +// FormScript represents a raw script block extracted from an XFA form. +type FormScript = types.FormScript // FormData is a map of field names to values for form filling. type FormData = types.FormData diff --git a/tests/estar_test.go b/tests/estar_test.go index 9eb011e..db18468 100644 --- a/tests/estar_test.go +++ b/tests/estar_test.go @@ -34,7 +34,7 @@ func TestESTAR_NonIVD_Schema(t *testing.T) { t.Errorf("expected at least %d questions, got %d", minNonIVDQuestions, len(schema.Questions)) } t.Logf("NonIVD eSTAR: %d questions, %d sections, %d rules", - len(schema.Questions), len(schema.Sections), len(schema.Rules)) + len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "NonIVD eSTAR") } @@ -59,7 +59,7 @@ func TestESTAR_IVD_Schema(t *testing.T) { t.Errorf("expected at least %d questions, got %d", minIVDQuestions, len(schema.Questions)) } t.Logf("IVD eSTAR: %d questions, %d sections, %d rules", - len(schema.Questions), len(schema.Sections), len(schema.Rules)) + len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "IVD eSTAR") } @@ -81,7 +81,7 @@ func TestESTAR_PreSTAR_Schema(t *testing.T) { t.Errorf("expected at least %d questions, got %d", minPreSTARQuestions, len(schema.Questions)) } t.Logf("PreSTAR: %d questions, %d sections, %d rules", - len(schema.Questions), len(schema.Sections), len(schema.Rules)) + len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "PreSTAR") } diff --git a/tests/xfa_roundtrip_test.go b/tests/xfa_roundtrip_test.go index 628cf76..fbbeba9 100644 --- a/tests/xfa_roundtrip_test.go +++ b/tests/xfa_roundtrip_test.go @@ -99,7 +99,7 @@ func TestXFARoundTrip(t *testing.T) { t.Logf("Warning: Failed to parse template as form: %v", err) } else { xfaData.Form = form - t.Logf("Parsed form: %d questions, %d rules", len(form.Questions), len(form.Rules)) + t.Logf("Parsed form: %d questions, %d scripts", len(form.Questions), len(form.Scripts)) } } diff --git a/types/content_types.go b/types/content_types.go index 95ccd91..83055c3 100644 --- a/types/content_types.go +++ b/types/content_types.go @@ -205,7 +205,6 @@ type Annotation struct { Subject string `json:"subject,omitempty"` Color *Color `json:"color,omitempty"` Border *Border `json:"border,omitempty"` - Actions []Action `json:"actions,omitempty"` Properties map[string]interface{} `json:"properties,omitempty"` // Link-specific diff --git a/types/form_types.go b/types/form_types.go index 3f8c974..46a6f38 100644 --- a/types/form_types.go +++ b/types/form_types.go @@ -1,12 +1,12 @@ package types // FormSchema represents the complete form structure -// that can be used to rebuild the form with questions, responses, and control flow +// that can be used to rebuild the form with questions, responses, and scripts. type FormSchema struct { Metadata FormMetadata `json:"metadata"` Questions []Question `json:"questions"` Sections []FormSection `json:"sections,omitempty"` // hierarchical section tree (XFA only) - Rules []Rule `json:"rules"` // Control flow rules (dependencies, conditions) + Scripts []FormScript `json:"scripts,omitempty"` // raw + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + if len(form.Scripts) != 1 { + t.Fatalf("expected 1 script, got %d", len(form.Scripts)) + } + s := form.Scripts[0] + + wantPresent := map[string]string{ + "listen": "refOnly", + "ref": "someField", + "binding": "this", + "stateless": "0", + } + for k, want := range wantPresent { + got, ok := s.Properties[k] + if !ok { + t.Errorf("Properties[%q] missing", k) + continue + } + if got != want { + t.Errorf("Properties[%q] = %v, want %q", k, got, want) + } + } + + // Both and + + + +` + + form, err := ParseXFAForm(xfaXML, false) + if err != nil { + t.Fatalf("ParseXFAForm() error = %v", err) + } + + var vs *struct{ id, url, binding string } + for _, s := range form.Scripts { + if s.Event == "variables" { + id, _ := s.Properties["id"].(string) + url, _ := s.Properties["url"].(string) + binding, _ := s.Properties["binding"].(string) + vs = &struct{ id, url, binding string }{id, url, binding} + if _, ok := s.Properties["contentType"]; ok { + t.Error("Properties[\"contentType\"] should not be duplicated; it's surfaced as Language") + } + if _, ok := s.Properties["name"]; ok { + t.Error("Properties[\"name\"] should not be duplicated; it's surfaced as Name") + } + break + } + } + if vs == nil { + t.Fatal("no variables script found") + } + if vs.id != "vars1" { + t.Errorf("Properties[\"id\"] = %q, want vars1", vs.id) + } + if vs.url != "http://example/lib.js" { + t.Errorf("Properties[\"url\"] = %q, want http://example/lib.js", vs.url) + } + if vs.binding != "this" { + t.Errorf("Properties[\"binding\"] = %q, want this", vs.binding) + } +} diff --git a/tests/estar_test.go b/tests/estar_test.go index db18468..4f69785 100644 --- a/tests/estar_test.go +++ b/tests/estar_test.go @@ -33,7 +33,7 @@ func TestESTAR_NonIVD_Schema(t *testing.T) { if len(schema.Questions) < minNonIVDQuestions { t.Errorf("expected at least %d questions, got %d", minNonIVDQuestions, len(schema.Questions)) } - t.Logf("NonIVD eSTAR: %d questions, %d sections, %d rules", + t.Logf("NonIVD eSTAR: %d questions, %d sections, %d scripts", len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "NonIVD eSTAR") @@ -58,7 +58,7 @@ func TestESTAR_IVD_Schema(t *testing.T) { if len(schema.Questions) < minIVDQuestions { t.Errorf("expected at least %d questions, got %d", minIVDQuestions, len(schema.Questions)) } - t.Logf("IVD eSTAR: %d questions, %d sections, %d rules", + t.Logf("IVD eSTAR: %d questions, %d sections, %d scripts", len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "IVD eSTAR") @@ -80,7 +80,7 @@ func TestESTAR_PreSTAR_Schema(t *testing.T) { if len(schema.Questions) < minPreSTARQuestions { t.Errorf("expected at least %d questions, got %d", minPreSTARQuestions, len(schema.Questions)) } - t.Logf("PreSTAR: %d questions, %d sections, %d rules", + t.Logf("PreSTAR: %d questions, %d sections, %d scripts", len(schema.Questions), len(schema.Sections), len(schema.Scripts)) assertSchemaQuality(t, schema, "PreSTAR") diff --git a/types/form_types.go b/types/form_types.go index 46a6f38..a021b04 100644 --- a/types/form_types.go +++ b/types/form_types.go @@ -99,6 +99,13 @@ type ValidationRules struct { // FormScript represents a raw script block extracted from an XFA form. // Bodies are exposed verbatim — pdfer does not interpret script semantics. +// +// Limitations: scripts attached to XFA nodes that pdfer does not surface in +// the schema are not extracted. This includes decorative elements with +// events (e.g. status indicators), buttons with bind="none" other than +// AddAttachment, -level events, and individual radio options +// that are collapsed into an 's Options. Callers that need full +// event fidelity should walk the raw XFA XML directly. type FormScript struct { ID string `json:"id"` // stable: SOM owner path + "#" + event + "[" + index + "]" OwnerPath string `json:"owner_path,omitempty"` // SOM path of containing node (e.g. "form1.section.field"); empty for template-level @@ -108,5 +115,5 @@ type FormScript struct { Language string `json:"language"` // "javascript" | "formcalc"; defaults to "formcalc" per XFA spec when contentType is absent RunAt string `json:"run_at,omitempty"` // client | server | both Body string `json:"body"` // verbatim script source - Properties map[string]interface{} `json:"properties,omitempty"` // additional /