diff options
-rw-r--r-- | common/json-schema-to-grammar.cpp | 2 | ||||
-rwxr-xr-x | examples/json_schema_to_grammar.py | 5 | ||||
-rw-r--r-- | examples/server/public/json-schema-to-grammar.mjs | 2 | ||||
-rw-r--r-- | grammars/json.gbnf | 2 | ||||
-rw-r--r-- | grammars/json_arr.gbnf | 2 | ||||
-rwxr-xr-x | tests/test-json-schema-to-grammar.cpp | 76 |
6 files changed, 44 insertions, 45 deletions
diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 11221a32..10b9b3d1 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -40,7 +40,7 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } -const std::string SPACE_RULE = "\" \"?"; +const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; struct BuiltinRule { std::string content; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index cd444d01..ab19e20d 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -29,9 +29,8 @@ class BuiltinRule: self.content = content self.deps = deps or [] -# whitespace is constrained to a single space char to prevent model "running away" in -# whitespace. Also maybe improves generation quality? -SPACE_RULE = '" "?' +# Constraining spaces to prevent model "running away". +SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}' PRIMITIVE_RULES = { 'boolean' : BuiltinRule('("true" | "false") space', []), diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index dc246839..faed6a32 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -1,5 +1,5 @@ // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first. -const SPACE_RULE = '" "?'; +const SPACE_RULE = '| " " | "\\n" [ \\t]{0,20}'; function _buildRepetition(itemRule, minItems, maxItems, opts={}) { if (minItems === 0 && maxItems === 1) { diff --git a/grammars/json.gbnf b/grammars/json.gbnf index 064a53f8..b6448c87 100644 --- a/grammars/json.gbnf +++ b/grammars/json.gbnf @@ -22,4 +22,4 @@ string ::= number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [0-9] [1-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= [ \t\n]{0,20} +ws ::= | " " | "\n" [ \t]{0,20} diff --git a/grammars/json_arr.gbnf b/grammars/json_arr.gbnf index bd1312d9..b3dc6f9b 100644 --- a/grammars/json_arr.gbnf +++ b/grammars/json_arr.gbnf @@ -31,4 +31,4 @@ string ::= number ::= ("-"? ([0-9] | [1-9] [0-9]{0,15})) ("." [0-9]+)? ([eE] [-+]? [1-9] [0-9]{0,15})? ws # Optional space: by convention, applied in this grammar after literal chars when allowed -ws ::= [ \t\n]{0,20} +ws ::= | " " | "\n" [ \t]{0,20} diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index bea876bd..a33104de 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -112,7 +112,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space root ::= object - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null )""" @@ -135,7 +135,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase date-time ::= date "T" time date-time-string ::= "\"" date-time "\"" space root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) time-string ::= "\"" time "\"" space tuple-0 ::= date-string @@ -154,7 +154,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char* "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -168,7 +168,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char+ "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -182,7 +182,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{3,} "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -196,7 +196,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{0,3} "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -211,7 +211,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "\"" char{1,4} "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -223,7 +223,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= ("true" | "false") space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -236,7 +236,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= ("-"? integral-part) space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -248,7 +248,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "\"foo\"" - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -260,7 +260,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "123" - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -272,7 +272,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]" - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -285,7 +285,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "[" space string "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -302,7 +302,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "[" space string "," space number "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -317,7 +317,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase decimal-part ::= [0-9]{1,16} integral-part ::= [0] | [1-9] [0-9]{0,15} root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -333,7 +333,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( boolean ::= ("true" | "false") space root ::= "[" space boolean ("," space boolean)+ "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -349,7 +349,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( boolean ::= ("true" | "false") space root ::= "[" space boolean? "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -365,7 +365,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase R"""( boolean ::= ("true" | "false") space root ::= "[" space (boolean ("," space boolean)?)? "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -386,7 +386,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase item ::= number | integer number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "[" space item ("," space item){2,4} "]" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -399,7 +399,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -412,7 +412,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "\"" "[]{}()|+*?" "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -425,7 +425,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "\"" "\"" "\"" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -440,7 +440,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase dot ::= [^\x0A\x0D] root ::= "\"" ("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot "\"" space root-1 ::= [0-9] - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -468,7 +468,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase c-kv ::= "\"c\"" space ":" space string char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -488,7 +488,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase a-kv ::= "\"a\"" space ":" space string char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "{" space (a-kv )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -512,7 +512,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase c-kv ::= "\"c\"" space ":" space string char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -538,7 +538,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase d-kv ::= "\"d\"" space ":" space string d-rest ::= ( "," space c-kv )? root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -559,7 +559,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "{" space (additional-kvs )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -581,7 +581,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space root ::= object - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null )""" @@ -603,7 +603,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space root ::= object - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space value ::= object | array | string | number | boolean | null )""" @@ -618,7 +618,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase })""", R"""( root ::= "{" space "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -642,7 +642,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -667,7 +667,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -695,7 +695,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -725,7 +725,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase foo ::= "{" space foo-a-kv "}" space foo-a-kv ::= "\"a\"" space ":" space string root ::= foo - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} string ::= "\"" char* "\"" space )""" }); @@ -759,7 +759,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= alternative-0 | alternative-1 - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -803,7 +803,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase integral-part ::= [0] | [1-9] [0-9]{0,15} number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); @@ -851,7 +851,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase number-number-kv ::= "\"number\"" space ":" space number-number number-number-root-kv ::= "\"root\"" space ":" space number root ::= "{" space number-kv "}" space - space ::= " "? + space ::= | " " | "\n" [ \t]{0,20} )""" }); } |