summaryrefslogtreecommitdiff
path: root/examples/server/public/json-schema-to-grammar.mjs
diff options
context:
space:
mode:
Diffstat (limited to 'examples/server/public/json-schema-to-grammar.mjs')
-rw-r--r--examples/server/public/json-schema-to-grammar.mjs528
1 files changed, 480 insertions, 48 deletions
diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs
index 3f1b255c..baa6761c 100644
--- a/examples/server/public/json-schema-to-grammar.mjs
+++ b/examples/server/public/json-schema-to-grammar.mjs
@@ -1,25 +1,50 @@
+// WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
const SPACE_RULE = '" "?';
const PRIMITIVE_RULES = {
boolean: '("true" | "false") space',
number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
integer: '("-"? ([0-9] | [1-9] [0-9]*)) space',
+ value: 'object | array | string | number | boolean',
+ object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
+ array: '"[" space ( value ("," space value)* )? "]" space',
+ uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space',
string: ` "\\"" (
[^"\\\\] |
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* "\\"" space`,
null: '"null" space',
};
+const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'];
+
+// TODO: support "uri", "email" string formats
+const DATE_RULES = {
+ 'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )',
+ 'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
+ 'date-time': 'date "T" time',
+ 'date-string': '"\\"" date "\\"" space',
+ 'time-string': '"\\"" time "\\"" space',
+ 'date-time-string': '"\\"" date-time "\\"" space',
+};
+
+const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES};
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
-const GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'};
+const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
+const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
+
+const NON_LITERAL_SET = new Set('|.()[]{}*+?');
+const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('[]()|{}*+?');
export class SchemaConverter {
- constructor(propOrder) {
- this._propOrder = propOrder || {};
- this._rules = new Map();
- this._rules.set('space', SPACE_RULE);
+ constructor(options) {
+ this._propOrder = options.prop_order || {};
+ this._allowFetch = options.allow_fetch || false;
+ this._dotall = options.dotall || false;
+ this._rules = {'space': SPACE_RULE};
+ this._refs = {};
+ this._refsBeingResolved = new Set();
}
_formatLiteral(literal) {
@@ -30,83 +55,490 @@ export class SchemaConverter {
return `"${escaped}"`;
}
+ _formatRangeChar(literal) {
+ return JSON.stringify(literal).slice(1, -1).replace(
+ GRAMMAR_RANGE_LITERAL_ESCAPE_RE,
+ m => GRAMMAR_LITERAL_ESCAPES[m]
+ );
+ }
+
_addRule(name, rule) {
let escName = name.replace(INVALID_RULE_CHARS_RE, '-');
let key = escName;
- if (this._rules.has(escName)) {
- if (this._rules.get(escName) === rule) {
+ if (escName in this._rules) {
+ if (this._rules[escName] === rule) {
return key;
}
let i = 0;
- while (this._rules.has(`${escName}${i}`)) {
+ while ((`${escName}${i}` in this._rules) && (this._rules[`${escName}${i}`] !== rule)) {
i += 1;
}
key = `${escName}${i}`;
}
- this._rules.set(key, rule);
+ this._rules[key] = rule;
return key;
}
+ async resolveRefs(schema, url) {
+ const visit = async (n) => {
+ if (Array.isArray(n)) {
+ return Promise.all(n.map(visit));
+ } else if (typeof n === 'object' && n !== null) {
+ let ref = n.$ref;
+ let target;
+ if (ref !== undefined && !this._refs[ref]) {
+ if (ref.startsWith('https://')) {
+ if (!this._allowFetch) {
+ throw new Error('Fetching remote schemas is not allowed (use --allow-fetch for force)');
+ }
+ const fetch = (await import('node-fetch')).default;
+
+ const fragSplit = ref.split('#');
+ const baseUrl = fragSplit[0];
+
+ target = this._refs[baseUrl];
+ if (!target) {
+ target = await this.resolveRefs(await fetch(ref).then(res => res.json()), baseUrl);
+ this._refs[baseUrl] = target;
+ }
+
+ if (fragSplit.length === 1 || fragSplit[fragSplit.length - 1] === '') {
+ return target;
+ }
+ } else if (ref.startsWith('#/')) {
+ target = schema;
+ ref = `${url}${ref}`;
+ n.$ref = ref;
+ } else {
+ throw new Error(`Unsupported ref ${ref}`);
+ }
+
+ const selectors = ref.split('#')[1].split('/').slice(1);
+ for (const sel of selectors) {
+ if (!target || !(sel in target)) {
+ throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
+ }
+ target = target[sel];
+ }
+
+ this._refs[ref] = target;
+ } else {
+ await Promise.all(Object.values(n).map(visit));
+ }
+ }
+
+ return n;
+ };
+
+ return visit(schema);
+ }
+
+ _generateUnionRule(name, altSchemas) {
+ return altSchemas
+ .map((altSchema, i) => this.visit(altSchema, `${name ?? ''}${name ? '-' : 'alternative-'}${i}`))
+ .join(' | ');
+ }
+
+ _visitPattern(pattern, name) {
+ if (!pattern.startsWith('^') || !pattern.endsWith('$')) {
+ throw new Error('Pattern must start with "^" and end with "$"');
+ }
+ pattern = pattern.slice(1, -1);
+ const subRuleIds = {};
+
+ let i = 0;
+ const length = pattern.length;
+
+ const getDot = () => {
+ let rule;
+ if (this._dotall) {
+ rule = '[\\U00000000-\\U0010FFFF]';
+ } else {
+ // Accept any character... except \n and \r line break chars (\x0A and \xOD)
+ rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]';
+ }
+ return this._addRule('dot', rule);
+ };
+
+
+ const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
+
+ const transform = () => {
+ const start = i;
+ // For each component of this sequence, store its string representation and whether it's a literal.
+ // We only need a flat structure here to apply repetition operators to the last item, and
+ // to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
+ // (GBNF's syntax is luckily very close to regular expressions!)
+ const seq = [];
+
+ const joinSeq = () => {
+ const ret = [];
+ for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
+ if (isLiteral) {
+ ret.push([[...g].map(x => x[0]).join(''), true]);
+ } else {
+ ret.push(...g);
+ }
+ }
+ if (ret.length === 1) {
+ return ret[0];
+ }
+ return [ret.map(x => toRule(x)).join(' '), false];
+ };
+
+ while (i < length) {
+ const c = pattern[i];
+ if (c === '.') {
+ seq.push([getDot(), false]);
+ i += 1;
+ } else if (c === '(') {
+ i += 1;
+ if (i < length) {
+ if (pattern[i] === '?') {
+ throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
+ }
+ }
+ seq.push([`(${toRule(transform())})`, false]);
+ } else if (c === ')') {
+ i += 1;
+ if (start <= 0 || pattern[start - 1] !== '(') {
+ throw new Error(`Unbalanced parentheses; start = ${start}, i = ${i}, pattern = ${pattern}`);
+ }
+ return joinSeq();
+ } else if (c === '[') {
+ let squareBrackets = c;
+ i += 1;
+ while (i < length && pattern[i] !== ']') {
+ if (pattern[i] === '\\') {
+ squareBrackets += pattern.slice(i, i + 2);
+ i += 2;
+ } else {
+ squareBrackets += pattern[i];
+ i += 1;
+ }
+ }
+ if (i >= length) {
+ throw new Error(`Unbalanced square brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
+ }
+ squareBrackets += ']';
+ i += 1;
+ seq.push([squareBrackets, false]);
+ } else if (c === '|') {
+ seq.push(['|', false]);
+ i += 1;
+ } else if (c === '*' || c === '+' || c === '?') {
+ seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
+ i += 1;
+ } else if (c === '{') {
+ let curlyBrackets = c;
+ i += 1;
+ while (i < length && pattern[i] !== '}') {
+ curlyBrackets += pattern[i];
+ i += 1;
+ }
+ if (i >= length) {
+ throw new Error(`Unbalanced curly brackets; start = ${start}, i = ${i}, pattern = ${pattern}`);
+ }
+ curlyBrackets += '}';
+ i += 1;
+ const nums = curlyBrackets.slice(1, -1).split(',').map(s => s.trim());
+ let minTimes, maxTimes;
+ if (nums.length === 1) {
+ minTimes = parseInt(nums[0], 10);
+ maxTimes = minTimes;
+ } else {
+ if (nums.length !== 2) {
+ throw new Error(`Invalid quantifier ${curlyBrackets}`);
+ }
+ minTimes = nums[0] ? parseInt(nums[0], 10) : 0;
+ maxTimes = nums[1] ? parseInt(nums[1], 10) : Infinity;
+ }
+
+ let [sub, subIsLiteral] = seq[seq.length - 1];
+
+ if (minTimes === 0 && maxTimes === Infinity) {
+ seq[seq.length - 1] = [`${sub}*`, false];
+ } else if (minTimes === 0 && maxTimes === 1) {
+ seq[seq.length - 1] = [`${sub}?`, false];
+ } else if (minTimes === 1 && maxTimes === Infinity) {
+ seq[seq.length - 1] = [`${sub}+`, false];
+ } else {
+ if (!subIsLiteral) {
+ let id = subRuleIds[sub];
+ if (id === undefined) {
+ id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub);
+ subRuleIds[sub] = id;
+ }
+ sub = id;
+ }
+
+ const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub);
+ const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`];
+ seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
+ }
+ } else {
+ let literal = '';
+ while (i < length) {
+ if (pattern[i] === '\\' && i < length - 1) {
+ const next = pattern[i + 1];
+ if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
+ i += 1;
+ literal += pattern[i];
+ i += 1;
+ } else {
+ literal += pattern.slice(i, i + 2);
+ i += 2;
+ }
+ } else if (pattern[i] === '"') {
+ literal += '\\"';
+ i += 1;
+ } else if (!NON_LITERAL_SET.has(pattern[i]) &&
+ (i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
+ literal += pattern[i];
+ i += 1;
+ } else {
+ break;
+ }
+ }
+ if (literal !== '') {
+ seq.push([literal, true]);
+ }
+ }
+ }
+
+ return joinSeq();
+ };
+
+ return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
+ }
+
+ _resolveRef(ref) {
+ let refName = ref.split('/').pop();
+ if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
+ this._refsBeingResolved.add(ref);
+ const resolved = this._refs[ref];
+ refName = this.visit(resolved, refName);
+ this._refsBeingResolved.delete(ref);
+ }
+ return refName;
+ }
+
+ _generateConstantRule(value) {
+ if (typeof value !== 'string') {
+ throw new Error('Only string constants are supported, got ' + JSON.stringify(value));
+ }
+ return this._formatLiteral(value);
+ }
+
visit(schema, name) {
const schemaType = schema.type;
- const ruleName = name || 'root';
-
- if (schema.oneOf || schema.anyOf) {
- const rule = (schema.oneOf || schema.anyOf).map((altSchema, i) =>
- this.visit(altSchema, `${name}${name ? "-" : ""}${i}`)
- ).join(' | ');
+ const schemaFormat = schema.format;
+ const ruleName = name in RESERVED_NAMES ? name + '-' : name == '' ? 'root' : name;
- return this._addRule(ruleName, rule);
+ const ref = schema.$ref;
+ if (ref !== undefined) {
+ return this._addRule(ruleName, this._resolveRef(ref));
+ } else if (schema.oneOf || schema.anyOf) {
+ return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
+ } else if (Array.isArray(schemaType)) {
+ return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
} else if ('const' in schema) {
- return this._addRule(ruleName, this._formatLiteral(schema.const));
+ if (typeof schema.const !== 'string') {
+ throw new Error('Only string constants are supported, got ' + JSON.stringify(schema.const));
+ }
+ return this._addRule(ruleName, this._generateConstantRule(schema.const));
} else if ('enum' in schema) {
- const rule = schema.enum.map(v => this._formatLiteral(v)).join(' | ');
+ const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
return this._addRule(ruleName, rule);
- } else if (schemaType === 'object' && 'properties' in schema) {
- // TODO: `required` keyword (from python implementation)
- const propOrder = this._propOrder;
- const propPairs = Object.entries(schema.properties).sort((a, b) => {
- // sort by position in prop_order (if specified) then by key
- const orderA = typeof propOrder[a[0]] === 'number' ? propOrder[a[0]] : Infinity;
- const orderB = typeof propOrder[b[0]] === 'number' ? propOrder[b[0]] : Infinity;
- return orderA - orderB || a[0].localeCompare(b[0]);
- });
-
- let rule = '"{" space';
- propPairs.forEach(([propName, propSchema], i) => {
- const propRuleName = this.visit(propSchema, `${name}${name ? "-" : ""}${propName}`);
- if (i > 0) {
- rule += ' "," space';
+ } else if ((schemaType === undefined || schemaType === 'object') &&
+ ('properties' in schema ||
+ ('additionalProperties' in schema && schema.additionalProperties !== true))) {
+ const required = new Set(schema.required || []);
+ const properties = Object.entries(schema.properties ?? {});
+ return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties));
+ } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) {
+ const required = new Set();
+ const properties = [];
+ const addComponent = (compSchema, isRequired) => {
+ const ref = compSchema.$ref;
+ if (ref !== undefined) {
+ compSchema = this._refs[ref];
}
- rule += ` ${this._formatLiteral(propName)} space ":" space ${propRuleName}`;
- });
- rule += ' "}" space';
- return this._addRule(ruleName, rule);
- } else if (schemaType === 'array' && 'items' in schema) {
- // TODO `prefixItems` keyword (from python implementation)
- const itemRuleName = this.visit(schema.items, `${name}${name ? "-" : ""}item`);
- const rule = `"[" space (${itemRuleName} ("," space ${itemRuleName})*)? "]" space`;
- return this._addRule(ruleName, rule);
+ if ('properties' in compSchema) {
+ for (const [propName, propSchema] of Object.entries(compSchema.properties)) {
+ properties.push([propName, propSchema]);
+ if (isRequired) {
+ required.add(propName);
+ }
+ }
+ }
+ };
+
+ for (const t of schema.allOf) {
+ if ('anyOf' in t) {
+ for (const tt of t.anyOf) {
+ addComponent(tt, false);
+ }
+ } else {
+ addComponent(t, true);
+ }
+ }
+
+ return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
+ } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
+ const items = schema.items ?? schema.prefixItems;
+ if (Array.isArray(items)) {
+ return this._addRule(
+ ruleName,
+ '"[" space ' +
+ items.map((item, i) => this.visit(item, `${name ?? ''}${name ? '-' : ''}tuple-${i}`)).join(' "," space ') +
+ ' "]" space'
+ );
+ } else {
+ const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`);
+ const listItemOperator = `( "," space ${itemRuleName} )`;
+ let successiveItems = '';
+ let minItems = schema.minItems || 0;
+ const maxItems = schema.maxItems;
+ if (minItems > 0) {
+ successiveItems = listItemOperator.repeat(minItems - 1);
+ minItems--;
+ }
+ if (maxItems !== undefined && maxItems > minItems) {
+ successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1);
+ } else {
+ successiveItems += `${listItemOperator}*`;
+ }
+ const rule = minItems === 0
+ ? `"[" space ( ${itemRuleName} ${successiveItems} )? "]" space`
+ : `"[" space ${itemRuleName} ${successiveItems} "]" space`;
+ return this._addRule(ruleName, rule);
+ }
+ } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
+ return this._visitPattern(schema.pattern, ruleName);
+ } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
+ return this._addRule(
+ ruleName === 'root' ? 'root' : schemaFormat,
+ PRIMITIVE_RULES['uuid'])
+ } else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
+ for (const [t, r] of Object.entries(DATE_RULES)) {
+ this._addRule(t, r);
+ }
+ return schemaFormat + '-string';
+ } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
+ for (const n of OBJECT_RULE_NAMES) {
+ this._addRule(n, PRIMITIVE_RULES[n]);
+ }
+ return this._addRule(ruleName, 'object');
} else {
- if (!PRIMITIVE_RULES[schemaType]) {
+ if (!(schemaType in PRIMITIVE_RULES)) {
throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`);
}
- return this._addRule(
- ruleName === 'root' ? 'root' : schemaType,
- PRIMITIVE_RULES[schemaType]
+ // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
+ return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]);
+ }
+ }
+
+ _buildObjectRule(properties, required, name, additionalProperties) {
+ const propOrder = this._propOrder;
+ // sort by position in prop_order (if specified) then by original order
+ const sortedProps = properties.map(([k]) => k).sort((a, b) => {
+ const orderA = propOrder[a] || Infinity;
+ const orderB = propOrder[b] || Infinity;
+ return orderA - orderB || properties.findIndex(([k]) => k === a) - properties.findIndex(([k]) => k === b);
+ });
+
+ const propKvRuleNames = {};
+ for (const [propName, propSchema] of properties) {
+ const propRuleName = this.visit(propSchema, `${name ?? ''}${name ? '-' : ''}${propName}`);
+ propKvRuleNames[propName] = this._addRule(
+ `${name ?? ''}${name ? '-' : ''}${propName}-kv`,
+ `${this._formatLiteral(propName)} space ":" space ${propRuleName}`
);
}
+ const requiredProps = sortedProps.filter(k => required.has(k));
+ const optionalProps = sortedProps.filter(k => !required.has(k));
+
+ if (typeof additionalProperties === 'object' || additionalProperties === true) {
+ const subName = `${name ?? ''}${name ? '-' : ''}additional`;
+ const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
+ propKvRuleNames['*'] = this._addRule(
+ `${subName}-kv`,
+ `${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
+ optionalProps.push('*');
+ }
+
+ let rule = '"{" space ';
+ rule += requiredProps.map(k => propKvRuleNames[k]).join(' "," space ');
+
+ if (optionalProps.length > 0) {
+ rule += ' (';
+ if (requiredProps.length > 0) {
+ rule += ' "," space ( ';
+ }
+
+ const getRecursiveRefs = (ks, firstIsOptional) => {
+ const [k, ...rest] = ks;
+ const kvRuleName = propKvRuleNames[k];
+ let res;
+ if (k === '*') {
+ res = this._addRule(
+ `${name ?? ''}${name ? '-' : ''}additional-kvs`,
+ `${kvRuleName} ( "," space ` + kvRuleName + ` )*`
+ )
+ } else if (firstIsOptional) {
+ res = `( "," space ${kvRuleName} )?`;
+ } else {
+ res = kvRuleName;
+ }
+ if (rest.length > 0) {
+ res += ' ' + this._addRule(
+ `${name ?? ''}${name ? '-' : ''}${k}-rest`,
+ getRecursiveRefs(rest, true)
+ );
+ }
+ return res;
+ };
+
+ rule += optionalProps.map((_, i) => getRecursiveRefs(optionalProps.slice(i), false)).join(' | ');
+ if (requiredProps.length > 0) {
+ rule += ' )';
+ }
+ rule += ' )?';
+ }
+
+ rule += ' "}" space';
+
+ return rule;
}
formatGrammar() {
let grammar = '';
- this._rules.forEach((rule, name) => {
+ for (const [name, rule] of Object.entries(this._rules).sort(([a], [b]) => a.localeCompare(b))) {
grammar += `${name} ::= ${rule}\n`;
- });
+ }
return grammar;
}
}
+
+// Helper function to group elements by a key function
+function* groupBy(iterable, keyFn) {
+ let lastKey = null;
+ let group = [];
+ for (const element of iterable) {
+ const key = keyFn(element);
+ if (lastKey !== null && key !== lastKey) {
+ yield [lastKey, group];
+ group = [];
+ }
+ group.push(element);
+ lastKey = key;
+ }
+ if (group.length > 0) {
+ yield [lastKey, group];
+ }
+}