From d323962ebb425147e07dc38974a8a4854d6fd261 Mon Sep 17 00:00:00 2001 From: Alan Richardson Date: Thu, 11 Jun 2026 16:27:33 +0100 Subject: [PATCH] Support PICT-compatible inline schema definitions --- README.md | 5 +- .../040-test-data/018-Schema-Definition.md | 10 ++- .../core/js/data_generation/rulesParser.js | 75 ++++++++++++++++++- .../js/data_generation/schema-conversion.js | 8 +- packages/core/src/index.js | 63 +++++++++++++++- .../core-api/amendFromTextSpecAndData.test.js | 18 +++++ .../core-api/generateFromTextSpec.test.js | 23 ++++++ .../schema-rules-adapter.test.js | 24 ++++++ .../data_generation/unit/rulesParser.test.js | 42 +++++++++++ 9 files changed, 260 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 2e9d1181..74d9e63b 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ The spec is a paragraph of text where each line is either a 'name' or a 'rule': - **Comments**: lines starting with `#` (optionally prefixed by whitespace) are treated as comments. - **Blank lines**: blank lines are allowed and ignored, so you can separate column groups for readability. -- **Column definitions**: each column is defined as `name` followed by `rule` on the next logical content line. +- **Column definitions**: each column can be defined either as `name` followed by `rule` on the next logical content line, or inline as `name: rule`. - **Constraints**: optional `IF ... THEN ...` statements may appear in text mode after the field definitions, terminated by either `;` or `ENDIF`. ``` @@ -101,6 +101,9 @@ rule name rule +# compact pict-style alternative +status: enum(active,inactive) + IF [name] = "Bob" THEN [status] = "active" ENDIF ``` diff --git a/docs-src/docs/040-test-data/018-Schema-Definition.md b/docs-src/docs/040-test-data/018-Schema-Definition.md index b6361309..7bc0afbc 100644 --- a/docs-src/docs/040-test-data/018-Schema-Definition.md +++ b/docs-src/docs/040-test-data/018-Schema-Definition.md @@ -16,7 +16,7 @@ This page explains: ## Basic schema format -A schema is written as repeating two-line field definitions: +A schema is usually written as repeating two-line field definitions: ```text Column Name @@ -32,6 +32,14 @@ enum("Open","In Progress","Closed") This creates one output column called `Status`. +You can also use a compact inline form when you prefer a PICT-style layout: + +```text +Status: enum("Open","In Progress","Closed") +``` + +Both formats are supported, and you can mix them in the same schema. + ## Field rule examples ### Literal values diff --git a/packages/core/js/data_generation/rulesParser.js b/packages/core/js/data_generation/rulesParser.js index 32f56840..5768dd88 100644 --- a/packages/core/js/data_generation/rulesParser.js +++ b/packages/core/js/data_generation/rulesParser.js @@ -6,6 +6,57 @@ function startsConstraint(trimmedLine) { return /^IF\s+(?:\[|\(|NOT\b)/i.test(trimmedLine); } +function looksLikeInlineRuleSpec(ruleText) { + const trimmed = String(ruleText ?? '').trim(); + if (trimmed.length === 0 || startsConstraint(trimmed)) { + return false; + } + + if ( + /^(?:enum|literal|regex|datatype\.(?:enum|literal|regex)|awd\.datatype\.(?:enum|literal|regex))\s*\(/i.test(trimmed) + ) { + return true; + } + + if (/^(?:faker\.)?[A-Za-z][A-Za-z0-9_]*(?:\.[A-Za-z][A-Za-z0-9_]*)+(?:\s*\(.*\)\s*|\s*)$/i.test(trimmed)) { + return true; + } + + if (!trimmed.includes(',')) { + return false; + } + + const values = trimmed.split(',').map((value) => value.trim()); + if (values.length < 2 || values.some((value) => value.length === 0 || value.length > 50)) { + return false; + } + + return !values.some((value) => /[[\]{}()^$*+?|\\]/.test(value) || (value.includes('.') && /[A-Z]/.test(value))); +} + +function parseInlineRuleDefinition(line) { + const source = String(line ?? ''); + for (let index = 0; index < source.length; index += 1) { + if (source[index] !== ':') { + continue; + } + + const name = source.slice(0, index).trim(); + const rule = source.slice(index + 1).trim(); + if (name.length === 0 || !looksLikeInlineRuleSpec(rule)) { + continue; + } + + return { + name, + rule, + separator: ': ', + }; + } + + return null; +} + function isEscapedQuote(text, index) { let backslashCount = 0; let back = index - 1; @@ -144,6 +195,22 @@ export class RulesParser { pendingLeadingTextLines = []; continue; } + const inlineRule = parseInlineRuleDefinition(line); + if (inlineRule) { + this.testDataRules.addRule(inlineRule.name, inlineRule.rule, { + comments: pendingLeadingTextLines.join('\n'), + }); + this.schemaTokens.push({ + kind: 'rule', + name: inlineRule.name, + rule: inlineRule.rule, + line: index + 1, + inline: true, + separator: inlineRule.separator, + }); + pendingLeadingTextLines = []; + continue; + } pendingName = trimmed; pendingNameLine = index + 1; continue; @@ -216,8 +283,12 @@ export class RulesParser { } if (token.kind === 'rule') { if (rowIndex < rows.length) { - outputLines.push(rows[rowIndex].name); - outputLines.push(rows[rowIndex].rule); + if (token.inline) { + outputLines.push(`${rows[rowIndex].name}${token.separator || ': '}${rows[rowIndex].rule}`); + } else { + outputLines.push(rows[rowIndex].name); + outputLines.push(rows[rowIndex].rule); + } rowIndex += 1; } } diff --git a/packages/core/js/data_generation/schema-conversion.js b/packages/core/js/data_generation/schema-conversion.js index 41d875df..e96ad2f0 100644 --- a/packages/core/js/data_generation/schema-conversion.js +++ b/packages/core/js/data_generation/schema-conversion.js @@ -100,8 +100,12 @@ function renderSpecFromRulesWithTokens(rules, constraints, schemaTokens) { return; } if (token?.kind === 'rule' && rowIndex < rows.length) { - outputLines.push(rows[rowIndex].name); - outputLines.push(rows[rowIndex].rule); + if (token.inline) { + outputLines.push(`${rows[rowIndex].name}${token.separator || ': '}${rows[rowIndex].rule}`); + } else { + outputLines.push(rows[rowIndex].name); + outputLines.push(rows[rowIndex].rule); + } rowIndex += 1; } }); diff --git a/packages/core/src/index.js b/packages/core/src/index.js index 6678185b..fc2a9693 100644 --- a/packages/core/src/index.js +++ b/packages/core/src/index.js @@ -78,14 +78,73 @@ const SUPPORTED_FORMATS = [ 'asciitable', ]; +function looksLikeInlineSchemaRule(ruleText) { + const trimmed = String(ruleText ?? '').trim(); + if (trimmed.length === 0 || /^IF\s+(?:\[|\(|NOT\b)/i.test(trimmed)) { + return false; + } + + if ( + /^(?:enum|literal|regex|datatype\.(?:enum|literal|regex)|awd\.datatype\.(?:enum|literal|regex))\s*\(/i.test(trimmed) + ) { + return true; + } + + if (/^(?:faker\.)?[A-Za-z][A-Za-z0-9_]*(?:\.[A-Za-z][A-Za-z0-9_]*)+(?:\s*\(.*\)\s*|\s*)$/i.test(trimmed)) { + return true; + } + + if (!trimmed.includes(',')) { + return false; + } + + const values = trimmed.split(',').map((value) => value.trim()); + if (values.length < 2 || values.some((value) => value.length === 0 || value.length > 50)) { + return false; + } + + return !values.some((value) => /[[\]{}()^$*+?|\\]/.test(value) || (value.includes('.') && /[A-Z]/.test(value))); +} + function extractRuleLines(textSpec) { if (typeof textSpec !== 'string') { return []; } const lines = textSpec.split(/\r?\n/); const ruleLines = []; - for (let i = 1; i < lines.length; i += 2) { - ruleLines.push(lines[i].trim()); + let pendingName = null; + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.length === 0 || /^\s*#/.test(line) || /^IF\s+(?:\[|\(|NOT\b)/i.test(trimmed)) { + pendingName = null; + continue; + } + + let matchedInlineRule = false; + for (let separatorIndex = 0; separatorIndex < line.length; separatorIndex += 1) { + if (line[separatorIndex] !== ':') { + continue; + } + const rule = line.slice(separatorIndex + 1).trim(); + if (looksLikeInlineSchemaRule(rule)) { + ruleLines.push(rule); + pendingName = null; + matchedInlineRule = true; + break; + } + } + + if (matchedInlineRule) { + continue; + } + + if (pendingName === null) { + pendingName = trimmed; + continue; + } + + ruleLines.push(trimmed); + pendingName = null; } return ruleLines; } diff --git a/packages/core/src/tests/core-api/amendFromTextSpecAndData.test.js b/packages/core/src/tests/core-api/amendFromTextSpecAndData.test.js index 51c05476..bb7de075 100644 --- a/packages/core/src/tests/core-api/amendFromTextSpecAndData.test.js +++ b/packages/core/src/tests/core-api/amendFromTextSpecAndData.test.js @@ -57,6 +57,24 @@ test('defaults rowCount to imported row count', () => { expect(result.diagnostics.importedRowCount).toBe(2); }); +test('supports pict-style inline schema definitions for amend flows', () => { + const result = amendFromTextSpecAndData({ + textSpec: 'Status: literal(Active)\nRole: enum(Admin,User)', + inputData: '"Name"\n"Alice"\n"Eve"', + inputFormat: 'csv', + outputFormat: 'json', + }); + + expect(result.ok).toBe(true); + expect(result.headers).toEqual(['Name', 'Status', 'Role']); + expect(result.rows).toHaveLength(2); + result.rows.forEach((row) => { + expect(['Alice', 'Eve']).toContain(row[0]); + expect(row[1]).toBe('Active'); + expect(['Admin', 'User']).toContain(row[2]); + }); +}); + test('amends only first N rows when rowCount is smaller', () => { const result = amendFromTextSpecAndData({ textSpec: 'Name\nBob', diff --git a/packages/core/src/tests/core-api/generateFromTextSpec.test.js b/packages/core/src/tests/core-api/generateFromTextSpec.test.js index 4d258c37..6135f5f6 100644 --- a/packages/core/src/tests/core-api/generateFromTextSpec.test.js +++ b/packages/core/src/tests/core-api/generateFromTextSpec.test.js @@ -40,6 +40,24 @@ test('generateFromTextSpec generates rows for valid spec', () => { assertNoCommonErrorPatternsInRows(result.rows); }); +test('generateFromTextSpec supports pict-style inline schema definitions', () => { + const result = generateFromTextSpec({ + textSpec: 'Browser: Chrome,Firefox,Safari\nStatus: enum("Open","Closed")\nName: person.fullName', + rowCount: 3, + outputFormat: 'json', + }); + + expect(result.ok).toBe(true); + expect(result.headers).toEqual(['Browser', 'Status', 'Name']); + expect(result.rows).toHaveLength(3); + result.rows.forEach((row) => { + expect(['Chrome', 'Firefox', 'Safari']).toContain(row[0]); + expect(['Open', 'Closed']).toContain(row[1]); + expect(String(row[2]).length).toBeGreaterThan(0); + }); + assertNoCommonErrorPatternsInRows(result.rows); +}); + test('generateFromTextSpec serializes object return values as JSON strings', () => { const result = generateFromTextSpec({ textSpec: 'Currency\nfinance.currency', @@ -255,6 +273,11 @@ test('validateSafeFakerRules accepts known faker commands with literal args', () expect(result.ok).toBe(true); }); +test('validateSafeFakerRules accepts pict-style inline faker commands', () => { + const result = validateSafeFakerRules('Name: person.firstName("female")\nStatus: enum(active,inactive)'); + expect(result.ok).toBe(true); +}); + test('validateSafeFakerRules accepts js-style object literal faker args', () => { const result = validateSafeFakerRules('Template\nhelpers.mustache("{{name}}", { name: "Ada" })'); expect(result.ok).toBe(true); diff --git a/packages/core/src/tests/data_generation/schema-rules-adapter.test.js b/packages/core/src/tests/data_generation/schema-rules-adapter.test.js index d4b1b7b1..262caf0b 100644 --- a/packages/core/src/tests/data_generation/schema-rules-adapter.test.js +++ b/packages/core/src/tests/data_generation/schema-rules-adapter.test.js @@ -84,6 +84,30 @@ describe('schema rules adapter', () => { expect(rendered.text).toBe('t1\nliteral("")\nt2\nliteral( 123)'); }); + test('round-trips pict-style inline schema tokens', () => { + const schemaText = `Priority: enum(high,medium,low) +Status: person.jobTitle`; + + const parsed = schemaTextToDataRules({ + schemaText, + faker, + RandExp, + }); + + expect(parsed.errors).toEqual([]); + expect(parsed.schemaTokens).toEqual([ + expect.objectContaining({ kind: 'rule', inline: true }), + expect.objectContaining({ kind: 'rule', inline: true }), + ]); + + const rendered = dataRulesToSchemaText({ + dataRules: parsed.dataRules, + schemaTokens: parsed.schemaTokens, + }); + + expect(rendered.text).toBe(schemaText); + }); + test('prefers schema tokens when rendering so blank lines are preserved', () => { const rendered = dataRulesToSchemaText({ dataRules: [ diff --git a/packages/core/src/tests/data_generation/unit/rulesParser.test.js b/packages/core/src/tests/data_generation/unit/rulesParser.test.js index 207acb05..a6039819 100644 --- a/packages/core/src/tests/data_generation/unit/rulesParser.test.js +++ b/packages/core/src/tests/data_generation/unit/rulesParser.test.js @@ -18,6 +18,22 @@ person.fullName`; expect(parser.testDataRules.rules[0].ruleSpec).toBe('person.fullName'); }); + test('can parse inline pict-style column definitions into rules', () => { + const inputText = `Browser: Chrome,Firefox,Safari +Status: enum("Open","Closed") +Name: person.fullName`; + + const parser = new RulesParser(faker, RandExp); + parser.parseText(inputText); + + expect(parser.isValid()).toBe(true); + expect(parser.testDataRules.rules).toHaveLength(3); + expect(parser.testDataRules.rules[0]).toMatchObject({ name: 'Browser', ruleSpec: 'Chrome,Firefox,Safari' }); + expect(parser.testDataRules.rules[1]).toMatchObject({ name: 'Status', ruleSpec: 'enum("Open","Closed")' }); + expect(parser.testDataRules.rules[2]).toMatchObject({ name: 'Name', ruleSpec: 'person.fullName' }); + expect(parser.getSchemaTokens().every((token) => token.kind !== 'rule' || token.inline === true)).toBe(true); + }); + test('flags an empty rule definition line', () => { const inputText = `Name `; @@ -113,6 +129,17 @@ enum(active,inactive,pending)`; expect(output).toBe(inputText); }); + test('preserves inline pict-style rules when rebuilding from parsed tokens', () => { + const inputText = `# compact +Priority: enum(high,medium,low) +Status: person.jobTitle`; + + const parser = new RulesParser(faker, RandExp); + parser.parseText(inputText); + + expect(parser.renderSpecFromRulesWithTokens(parser.testDataRules.rules)).toBe(inputText); + }); + test('preserves comments and blank lines when rebuilding from rule comments', () => { const inputText = `# one @@ -167,6 +194,21 @@ enum(open,closed)`; expect(parser.testDataRules.constraints).toHaveLength(0); }); + test('does not treat non-rule colon lines as inline pict definitions', () => { + const inputText = `Environment: Browser +enum(chrome,firefox)`; + + const parser = new RulesParser(faker, RandExp); + parser.parseText(inputText); + + expect(parser.isValid()).toBe(true); + expect(parser.testDataRules.rules).toHaveLength(1); + expect(parser.testDataRules.rules[0]).toMatchObject({ + name: 'Environment: Browser', + ruleSpec: 'enum(chrome,firefox)', + }); + }); + test('does not treat ENDIF inside a parameter reference as the constraint terminator', () => { const inputText = `ENDIF enum(yes,no)