diff options
author | Olivier Chafik <ochafik@users.noreply.github.com> | 2024-03-21 11:50:43 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-21 11:50:43 +0000 |
commit | 5b7b0ac8dfdd800c0fd0dc69b69991e8cb19fb46 (patch) | |
tree | 6d06dbb6094671bc567f43acf9064bef69329204 /tests | |
parent | 1943c0198125a0da1a200390e82cf461f9080d99 (diff) |
json-schema-to-grammar improvements (+ added to server) (#5978)
* json: fix arrays (disallow `[,1]`)
* json: support tuple types (`[number, string]`)
* json: support additionalProperties (`{[k: string]: [string,number][]}`)
* json: support required / optional properties
* json: add support for pattern
* json: resolve $ref (and support https schema urls)
* json: fix $ref resolution
* join: support union types (mostly for nullable types I think)
* json: support allOf + nested anyOf
* json: support any (`{}` or `{type: object}`)
* json: fix merge
* json: temp fix for escapes
* json: spaces in output and unrestricted output spaces
* json: add typings
* json:fix typo
* Create ts-type-to-grammar.sh
* json: fix _format_literal (json.dumps already escapes quotes)
* json: merge lit sequences and handle negatives
{"type": "string", "pattern": "^({\"question\": \"[^\"]+\", \"response\": \"[^\"]+\"}\\n)+$"}
* json: handle pattern repetitions
* Update json-schema-to-grammar.mjs
* Create regex-to-grammar.py
* json: extract repeated regexp patterns to subrule
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* json: handle schema from pydantic Optional fields
* Update json-schema-to-grammar.py
* Update json-schema-to-grammar.py
* Update ts-type-to-grammar.sh
* Update ts-type-to-grammar.sh
* json: simplify nullable fields handling
* json: accept duplicate identical rules
* json: revert space to 1 at most
* json: reuse regexp pattern subrules
* json: handle uuid string format
* json: fix literal escapes
* json: add --allow-fetch
* json: simplify range escapes
* json: support negative ranges in patterns
* Delete commit.txt
* json: custom regex parser, adds dot support & JS-portable
* json: rm trailing spaces
* Update json-schema-to-grammar.mjs
* json: updated server & chat `( cd examples/server && ./deps.sh )`
* json: port fixes from mjs to python
* Update ts-type-to-grammar.sh
* json: support prefixItems alongside array items
* json: add date format + fix uuid
* json: add date, time, date-time formats
* json: preserve order of props from TS defs
* json: port schema converter to C++, wire in ./server
* json: nits
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* Update json-schema-to-grammar.cpp
* json: fix mjs implementation + align outputs
* Update json-schema-to-grammar.mjs.hpp
* json: test C++, JS & Python versions
* json: nits + regen deps
* json: cleanup test
* json: revert from c++17 to 11
* json: nit fixes
* json: dirty include for test
* json: fix zig build
* json: pass static command to std::system in tests (fixed temp files)
* json: fix top-level $refs
* json: don't use c++20 designated initializers
* nit
* json: basic support for reserved names `{number:{number:{root:number}}}`
* Revamp test cmake to allow args (WORKING_DIRECTORY needed for JSON test)
* json: re-ran server deps.sh
* json: simplify test
* json: support mix of additional props & required/optional
* json: add tests for some expected failures
* json: fix type=const in c++, add failure expectations for non-str const&enum
* json: test (& simplify output of) empty schema
* json: check parsing in test + fix value & string refs
* json: add server tests for OAI JSON response_format
* json: test/fix top-level anyOf
* json: improve grammar parsing failures
* json: test/fix additional props corner cases
* json: fix string patterns (was missing quotes)
* json: ws nit
* json: fix json handling in server when there's no response_format
* json: catch schema conversion errors in server
* json: don't complain about unknown format type in server if unset
* json: cleaner build of test
* json: create examples/json-schema-pydantic-example.py
* json: fix date pattern
* json: move json.hpp & json-schema-to-grammar.{cpp,h} to common
* json: indent 4 spaces
* json: fix naming of top-level c++ function (+ drop unused one)
* json: avoid using namespace std
* json: fix zig build
* Update server.feature
* json: iostream -> fprintf
* json: space before & refs for consistency
* json: nits
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 109 | ||||
-rw-r--r-- | tests/run-json-schema-to-grammar.mjs | 10 | ||||
-rwxr-xr-x | tests/test-json-schema-to-grammar.cpp | 824 |
3 files changed, 893 insertions, 50 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 10326d53..a43439ae 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,66 +1,75 @@ -function(llama_build_executable source) - get_filename_component(TEST_TARGET ${source} NAME_WE) - add_executable(${TEST_TARGET} ${source} get-model.cpp) - install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE common) -endfunction() +# Builds and runs a test source file. +# Optional args: +# - NAME: name of the executable & test target (defaults to the source file name without extension) +# - LABEL: label for the test (defaults to main) +# - ARGS: arguments to pass to the test executable +# - WORKING_DIRECTORY +function(llama_test source) + include(CMakeParseArguments) + set(options) + set(oneValueArgs NAME LABEL WORKING_DIRECTORY) + set(multiValueArgs ARGS) + cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) -function(llama_test_executable name source) - get_filename_component(TEST_TARGET ${source} NAME_WE) - add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) - set_property(TEST ${name} PROPERTY LABELS "main") -endfunction() + if (NOT DEFINED LLAMA_TEST_LABEL) + set(LLAMA_TEST_LABEL "main") + endif() + if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY) + set(LLAMA_TEST_WORKING_DIRECTORY .) + endif() + if (DEFINED LLAMA_TEST_NAME) + set(TEST_TARGET ${LLAMA_TEST_NAME}) + else() + get_filename_component(TEST_TARGET ${source} NAME_WE) + endif() -function(llama_build_and_test_executable source) - llama_build_and_test_executable_with_label(${source} "main") -endfunction() - -function(llama_build_and_test_executable_with_label source label) - get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source} get-model.cpp) install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE common) - add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) - set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label}) + target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar) + add_test( + NAME ${TEST_TARGET} + WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY} + COMMAND $<TARGET_FILE:${TEST_TARGET}> + ${LLAMA_TEST_ARGS}) + + set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL}) endfunction() -# llama_build_and_test_executable(test-double-float.cpp) # SLOW -llama_build_and_test_executable(test-quantize-fns.cpp) -llama_build_and_test_executable(test-quantize-perf.cpp) -llama_build_and_test_executable(test-sampling.cpp) -llama_build_and_test_executable(test-chat-template.cpp) +# llama_test(test-double-float.cpp) # SLOW +llama_test(test-quantize-fns.cpp) +llama_test(test-quantize-perf.cpp) +llama_test(test-sampling.cpp) +llama_test(test-chat-template.cpp) -llama_build_executable(test-tokenizer-0-llama.cpp) -llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-0-llama.cpp NAME test-tokenizer-0-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) -llama_build_executable(test-tokenizer-0-falcon.cpp) -llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) -llama_build_executable(test-tokenizer-1-llama.cpp) -llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) -llama_test_executable (test-tokenizer-1-baichuan test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-stablelm-3b-4e1t ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) +#llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG -llama_build_executable(test-tokenizer-1-bpe.cpp) -llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) -llama_test_executable (test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) -llama_test_executable (test-tokenizer-1-mpt test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) -llama_test_executable (test-tokenizer-1-stablelm-3b-4e1t test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf) -llama_test_executable (test-tokenizer-1-gpt-neox test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) -llama_test_executable (test-tokenizer-1-refact test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) -llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) -llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) -# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG +llama_test(test-grammar-parser.cpp) +llama_test(test-llama-grammar.cpp) +llama_test(test-grad0.cpp) +# llama_test(test-opt.cpp) # SLOW +llama_test(test-backend-ops.cpp) -llama_build_and_test_executable(test-grammar-parser.cpp) -llama_build_and_test_executable(test-llama-grammar.cpp) -llama_build_and_test_executable(test-grad0.cpp) -# llama_build_and_test_executable(test-opt.cpp) # SLOW -llama_build_and_test_executable(test-backend-ops.cpp) +llama_test(test-rope.cpp) -llama_build_and_test_executable(test-rope.cpp) +llama_test(test-model-load-cancel.cpp LABEL "model") +llama_test(test-autorelease.cpp LABEL "model") -llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model") -llama_build_and_test_executable_with_label(test-autorelease.cpp "model") +llama_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) +target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server) # dummy executable - not installed get_filename_component(TEST_TARGET test-c.c NAME_WE) diff --git a/tests/run-json-schema-to-grammar.mjs b/tests/run-json-schema-to-grammar.mjs new file mode 100644 index 00000000..71bf62ed --- /dev/null +++ b/tests/run-json-schema-to-grammar.mjs @@ -0,0 +1,10 @@ +import { readFileSync } from "fs" +import { SchemaConverter } from "../examples/server/public/json-schema-to-grammar.mjs" + +const [, , file] = process.argv +const url = `file://${file}` +let schema = JSON.parse(readFileSync(file, "utf8")); +const converter = new SchemaConverter({}) +schema = await converter.resolveRefs(schema, url) +converter.visit(schema, '') +console.log(converter.formatGrammar()) diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp new file mode 100755 index 00000000..a13bb9d7 --- /dev/null +++ b/tests/test-json-schema-to-grammar.cpp @@ -0,0 +1,824 @@ +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include <fstream> +#include <sstream> +#include <regex> + +#include "json-schema-to-grammar.h" +#include "grammar-parser.h" + +static std::string trim(const std::string & source) { + std::string s(source); + s.erase(0,s.find_first_not_of(" \n\r\t")); + s.erase(s.find_last_not_of(" \n\r\t")+1); + return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1"); +} + +enum TestCaseStatus { + SUCCESS, + FAILURE +}; + +struct TestCase { + TestCaseStatus expected_status; + std::string name; + std::string schema; + std::string expected_grammar; + + void _print_failure_header() const { + fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str()); + } + void verify(const std::string & actual_grammar) const { + if (trim(actual_grammar) != trim(expected_grammar)) { + _print_failure_header(); + fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str()); + assert(false); + } + } + void verify_expectation_parseable() const { + try { + auto state = grammar_parser::parse(expected_grammar.c_str()); + if (state.symbol_ids.find("root") == state.symbol_ids.end()) { + throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar); + } + } catch (const std::runtime_error & ex) { + _print_failure_header(); + fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what()); + assert(false); + } + } + void verify_status(TestCaseStatus status) const { + if (status != expected_status) { + _print_failure_header(); + fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE"); + fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE"); + assert(false); + } + } +}; + +static void write(const std::string & file, const std::string & content) { + std::ofstream f; + f.open(file.c_str()); + f << content.c_str(); + f.close(); +} + +static std::string read(const std::string & file) { + std::ostringstream actuals; + actuals << std::ifstream(file.c_str()).rdbuf(); + return actuals.str(); +} + +static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) { + fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str()); + auto test = [&](const TestCase & tc) { + fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : ""); + runner(tc); + }; + + test({ + FAILURE, + "unknown type", + R"""({ + "type": "kaboom" + })""", + "" + }); + + test({ + FAILURE, + "invalid type type", + R"""({ + "type": 123 + })""", + "" + }); + + test({ + SUCCESS, + "empty schema (object)", + "{}", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "exotic formats", + R"""({ + "items": [ + { "format": "date" }, + { "format": "uuid" }, + { "format": "time" }, + { "format": "date-time" } + ] + })""", + R"""( + date ::= [0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] ) + date-string ::= "\"" date "\"" space + date-time ::= date "T" time + date-time-string ::= "\"" date-time "\"" space + root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space + space ::= " "? + time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) + time-string ::= "\"" time "\"" space + uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space + )""" + }); + + test({ + SUCCESS, + "string", + R"""({ + "type": "string" + })""", + R"""( + root ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "boolean", + R"""({ + "type": "boolean" + })""", + R"""( + root ::= ("true" | "false") space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "integer", + R"""({ + "type": "integer" + })""", + R"""( + root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "string const", + R"""({ + "const": "foo" + })""", + R"""( + root ::= "\"foo\"" + space ::= " "? + )""" + }); + + test({ + FAILURE, + "non-string const", + R"""({ + "const": 123 + })""", + "" + }); + + test({ + FAILURE, + "non-string enum", + R"""({ + "enum": [123] + })""", + "" + }); + + test({ + SUCCESS, + "tuple1", + R"""({ + "prefixItems": [{ "type": "string" }] + })""", + R"""( + root ::= "[" space string "]" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "tuple2", + R"""({ + "prefixItems": [{ "type": "string" }, { "type": "number" }] + })""", + R"""( + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "[" space string "," space number "]" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "number", + R"""({ + "type": "number" + })""", + R"""( + root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "minItems", + R"""({ + "items": { + "type": "boolean" + }, + "minItems": 2 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "maxItems 1", + R"""({ + "items": { + "type": "boolean" + }, + "maxItems": 1 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space ( boolean )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "maxItems 2", + R"""({ + "items": { + "type": "boolean" + }, + "maxItems": 2 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space ( boolean ( "," space boolean )? )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "min + maxItems", + R"""({ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + })""", + R"""( + integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space + item ::= number | integer + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "simple regexp", + R"""({ + "type": "string", + "pattern": "^abc?d*efg+(hij)?kl$" + })""", + R"""( + root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp escapes", + R"""({ + "type": "string", + "pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$" + })""", + R"""( + root ::= "\"" "[]{}()|+*?" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp quote", + R"""({ + "type": "string", + "pattern": "^\"$" + })""", + R"""( + root ::= "\"" "\"" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp", + R"""({ + "type": "string", + "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$" + })""", + R"""( + dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF] + root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space + root-1 ::= [0-9] + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "required props", + R"""({ + "type": "object", + "properties": { + "a": { + "type": "string" + }, + "b": { + "type": "string" + } + }, + "required": [ + "a", + "b" + ], + "additionalProperties": false, + "definitions": {} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + b-kv ::= "\"b\"" space ":" space string + root ::= "{" space a-kv "," space b-kv "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "1 optional prop", + R"""({ + "properties": { + "a": { + "type": "string" + } + }, + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + root ::= "{" space (a-kv )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "N optional props", + R"""({ + "properties": { + "a": {"type": "string"}, + "b": {"type": "string"}, + "c": {"type": "string"} + }, + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + a-rest ::= ( "," space b-kv )? b-rest + b-kv ::= "\"b\"" space ":" space string + b-rest ::= ( "," space c-kv )? + c-kv ::= "\"c\"" space ":" space string + root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "required + optional props", + R"""({ + "properties": { + "a": {"type": "string"}, + "b": {"type": "string"}, + "c": {"type": "string"}, + "d": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + b-kv ::= "\"b\"" space ":" space string + c-kv ::= "\"c\"" space ":" space string + c-rest ::= ( "," space d-kv )? + d-kv ::= "\"d\"" space ":" space string + root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "additional props", + R"""({ + "type": "object", + "additionalProperties": {"type": "array", "items": {"type": "number"}} + })""", + R"""( + additional-kv ::= string ":" space additional-value + additional-kvs ::= additional-kv ( "," space additional-kv )* + additional-value ::= "[" space ( number ( "," space number )* )? "]" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space (additional-kvs )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "additional props (true)", + R"""({ + "type": "object", + "additionalProperties": true + })""", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "additional props (implicit)", + R"""({ + "type": "object" + })""", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "empty w/o additional props", + R"""({ + "type": "object", + "additionalProperties": false + })""", + R"""( + root ::= "{" space "}" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "required + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"} + }, + "required": ["a"], + "additionalProperties": {"type": "string"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + additional-kv ::= string ":" space string + additional-kvs ::= additional-kv ( "," space additional-kv )* + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "optional + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"} + }, + "additionalProperties": {"type": "number"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + a-rest ::= additional-kvs + additional-kv ::= string ":" space number + additional-kvs ::= additional-kv ( "," space additional-kv )* + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "required + optional + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"} + }, + "required": ["a"], + "additionalProperties": {"type": "number"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + additional-kv ::= string ":" space number + additional-kvs ::= additional-kv ( "," space additional-kv )* + b-kv ::= "\"b\"" space ":" space number + b-rest ::= additional-kvs + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "top-level $ref", + R"""({ + "$ref": "#/definitions/MyType", + "definitions": { + "MyType": { + "type": "object", + "properties": { + "a": { + "type": "string" + } + }, + "required": [ + "a" + ], + "additionalProperties": false + } + } + })""", + R"""( + MyType ::= "{" space MyType-a-kv "}" space + MyType-a-kv ::= "\"a\"" space ":" space string + root ::= MyType + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "anyOf", + R"""({ + "anyOf": [ + {"$ref": "#/definitions/foo"}, + {"$ref": "#/definitions/bar"} + ], + "definitions": { + "foo": { + "properties": {"a": {"type": "number"}} + }, + "bar": { + "properties": {"b": {"type": "number"}} + } + }, + "type": "object" + })""", + R"""( + alternative-0 ::= foo + alternative-1 ::= bar + bar ::= "{" space (bar-b-kv )? "}" space + bar-b-kv ::= "\"b\"" space ":" space number + foo ::= "{" space (foo-a-kv )? "}" space + foo-a-kv ::= "\"a\"" space ":" space number + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= alternative-0 | alternative-1 + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)", + R"""({ + "allOf": [ + {"$ref": "#/definitions/foo"}, + {"$ref": "#/definitions/bar"}, + { + "anyOf": [ + {"$ref": "#/definitions/baz"}, + {"$ref": "#/definitions/bam"} + ] + } + ], + "definitions": { + "foo": { + "properties": {"a": {"type": "number"}} + }, + "bar": { + "properties": {"b": {"type": "number"}} + }, + "bam": { + "properties": {"c": {"type": "number"}} + }, + "baz": { + "properties": {"d": {"type": "number"}} + } + }, + "type": "object" + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + b-kv ::= "\"b\"" space ":" space number + c-kv ::= "\"c\"" space ":" space number + d-kv ::= "\"d\"" space ":" space number + d-rest ::= ( "," space c-kv )? + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "conflicting names", + R"""({ + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "root": { + "type": "number" + } + }, + "required": [ + "root" + ], + "additionalProperties": false + } + }, + "required": [ + "number" + ], + "additionalProperties": false + } + }, + "required": [ + "number" + ], + "additionalProperties": false, + "definitions": {} + })""", + R"""( + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + number- ::= "{" space number-number-kv "}" space + number-kv ::= "\"number\"" space ":" space number- + number-number ::= "{" space number-number-root-kv "}" space + number-number-kv ::= "\"number\"" space ":" space number-number + number-number-root-kv ::= "\"root\"" space ":" space number + root ::= "{" space number-kv "}" space + space ::= " "? + )""" + }); +} + +int main() { + test_all("C++", [](const TestCase & tc) { + try { + tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema))); + tc.verify_status(SUCCESS); + } catch (const std::runtime_error & ex) { + fprintf(stderr, "Error: %s\n", ex.what()); + tc.verify_status(FAILURE); + } + }); + test_all("Python", [](const TestCase & tc) { + write("test-json-schema-input.tmp", tc.schema); + tc.verify_status(std::system( + "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); + tc.verify(read("test-grammar-output.tmp")); + }); + test_all("JavaScript", [](const TestCase & tc) { + write("test-json-schema-input.tmp", tc.schema); + tc.verify_status(std::system( + "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); + tc.verify(read("test-grammar-output.tmp")); + }); + + test_all("Check Expectations Validity", [](const TestCase & tc) { + if (tc.expected_status == SUCCESS) { + tc.verify_expectation_parseable(); + } + }); +} |