diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 109 | ||||
-rw-r--r-- | tests/run-json-schema-to-grammar.mjs | 10 | ||||
-rwxr-xr-x | tests/test-json-schema-to-grammar.cpp | 824 |
3 files changed, 893 insertions, 50 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 10326d53..a43439ae 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,66 +1,75 @@ -function(llama_build_executable source) - get_filename_component(TEST_TARGET ${source} NAME_WE) - add_executable(${TEST_TARGET} ${source} get-model.cpp) - install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE common) -endfunction() +# Builds and runs a test source file. +# Optional args: +# - NAME: name of the executable & test target (defaults to the source file name without extension) +# - LABEL: label for the test (defaults to main) +# - ARGS: arguments to pass to the test executable +# - WORKING_DIRECTORY +function(llama_test source) + include(CMakeParseArguments) + set(options) + set(oneValueArgs NAME LABEL WORKING_DIRECTORY) + set(multiValueArgs ARGS) + cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) -function(llama_test_executable name source) - get_filename_component(TEST_TARGET ${source} NAME_WE) - add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) - set_property(TEST ${name} PROPERTY LABELS "main") -endfunction() + if (NOT DEFINED LLAMA_TEST_LABEL) + set(LLAMA_TEST_LABEL "main") + endif() + if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY) + set(LLAMA_TEST_WORKING_DIRECTORY .) + endif() + if (DEFINED LLAMA_TEST_NAME) + set(TEST_TARGET ${LLAMA_TEST_NAME}) + else() + get_filename_component(TEST_TARGET ${source} NAME_WE) + endif() -function(llama_build_and_test_executable source) - llama_build_and_test_executable_with_label(${source} "main") -endfunction() - -function(llama_build_and_test_executable_with_label source label) - get_filename_component(TEST_TARGET ${source} NAME_WE) add_executable(${TEST_TARGET} ${source} get-model.cpp) install(TARGETS ${TEST_TARGET} RUNTIME) - target_link_libraries(${TEST_TARGET} PRIVATE common) - add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) - set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label}) + target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar) + add_test( + NAME ${TEST_TARGET} + WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY} + COMMAND $<TARGET_FILE:${TEST_TARGET}> + ${LLAMA_TEST_ARGS}) + + set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL}) endfunction() -# llama_build_and_test_executable(test-double-float.cpp) # SLOW -llama_build_and_test_executable(test-quantize-fns.cpp) -llama_build_and_test_executable(test-quantize-perf.cpp) -llama_build_and_test_executable(test-sampling.cpp) -llama_build_and_test_executable(test-chat-template.cpp) +# llama_test(test-double-float.cpp) # SLOW +llama_test(test-quantize-fns.cpp) +llama_test(test-quantize-perf.cpp) +llama_test(test-sampling.cpp) +llama_test(test-chat-template.cpp) -llama_build_executable(test-tokenizer-0-llama.cpp) -llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-0-llama.cpp NAME test-tokenizer-0-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) -llama_build_executable(test-tokenizer-0-falcon.cpp) -llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-llama ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) +llama_test(test-tokenizer-1-llama.cpp NAME test-tokenizer-1-baichuan ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) -llama_build_executable(test-tokenizer-1-llama.cpp) -llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf) -llama_test_executable (test-tokenizer-1-baichuan test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-falcon ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-aquila ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-mpt ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-stablelm-3b-4e1t ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt-neox ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-refact ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-starcoder ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) +llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-gpt2 ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) +#llama_test(test-tokenizer-1-bpe.cpp NAME test-tokenizer-1-bloom ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG -llama_build_executable(test-tokenizer-1-bpe.cpp) -llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf) -llama_test_executable (test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf) -llama_test_executable (test-tokenizer-1-mpt test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf) -llama_test_executable (test-tokenizer-1-stablelm-3b-4e1t test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf) -llama_test_executable (test-tokenizer-1-gpt-neox test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf) -llama_test_executable (test-tokenizer-1-refact test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf) -llama_test_executable (test-tokenizer-1-starcoder test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf) -llama_test_executable (test-tokenizer-1-gpt2 test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf) -# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG +llama_test(test-grammar-parser.cpp) +llama_test(test-llama-grammar.cpp) +llama_test(test-grad0.cpp) +# llama_test(test-opt.cpp) # SLOW +llama_test(test-backend-ops.cpp) -llama_build_and_test_executable(test-grammar-parser.cpp) -llama_build_and_test_executable(test-llama-grammar.cpp) -llama_build_and_test_executable(test-grad0.cpp) -# llama_build_and_test_executable(test-opt.cpp) # SLOW -llama_build_and_test_executable(test-backend-ops.cpp) +llama_test(test-rope.cpp) -llama_build_and_test_executable(test-rope.cpp) +llama_test(test-model-load-cancel.cpp LABEL "model") +llama_test(test-autorelease.cpp LABEL "model") -llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model") -llama_build_and_test_executable_with_label(test-autorelease.cpp "model") +llama_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..) +target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server) # dummy executable - not installed get_filename_component(TEST_TARGET test-c.c NAME_WE) diff --git a/tests/run-json-schema-to-grammar.mjs b/tests/run-json-schema-to-grammar.mjs new file mode 100644 index 00000000..71bf62ed --- /dev/null +++ b/tests/run-json-schema-to-grammar.mjs @@ -0,0 +1,10 @@ +import { readFileSync } from "fs" +import { SchemaConverter } from "../examples/server/public/json-schema-to-grammar.mjs" + +const [, , file] = process.argv +const url = `file://${file}` +let schema = JSON.parse(readFileSync(file, "utf8")); +const converter = new SchemaConverter({}) +schema = await converter.resolveRefs(schema, url) +converter.visit(schema, '') +console.log(converter.formatGrammar()) diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp new file mode 100755 index 00000000..a13bb9d7 --- /dev/null +++ b/tests/test-json-schema-to-grammar.cpp @@ -0,0 +1,824 @@ +#ifdef NDEBUG +#undef NDEBUG +#endif + +#include <fstream> +#include <sstream> +#include <regex> + +#include "json-schema-to-grammar.h" +#include "grammar-parser.h" + +static std::string trim(const std::string & source) { + std::string s(source); + s.erase(0,s.find_first_not_of(" \n\r\t")); + s.erase(s.find_last_not_of(" \n\r\t")+1); + return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1"); +} + +enum TestCaseStatus { + SUCCESS, + FAILURE +}; + +struct TestCase { + TestCaseStatus expected_status; + std::string name; + std::string schema; + std::string expected_grammar; + + void _print_failure_header() const { + fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str()); + } + void verify(const std::string & actual_grammar) const { + if (trim(actual_grammar) != trim(expected_grammar)) { + _print_failure_header(); + fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str()); + assert(false); + } + } + void verify_expectation_parseable() const { + try { + auto state = grammar_parser::parse(expected_grammar.c_str()); + if (state.symbol_ids.find("root") == state.symbol_ids.end()) { + throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar); + } + } catch (const std::runtime_error & ex) { + _print_failure_header(); + fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what()); + assert(false); + } + } + void verify_status(TestCaseStatus status) const { + if (status != expected_status) { + _print_failure_header(); + fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE"); + fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE"); + assert(false); + } + } +}; + +static void write(const std::string & file, const std::string & content) { + std::ofstream f; + f.open(file.c_str()); + f << content.c_str(); + f.close(); +} + +static std::string read(const std::string & file) { + std::ostringstream actuals; + actuals << std::ifstream(file.c_str()).rdbuf(); + return actuals.str(); +} + +static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) { + fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str()); + auto test = [&](const TestCase & tc) { + fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : ""); + runner(tc); + }; + + test({ + FAILURE, + "unknown type", + R"""({ + "type": "kaboom" + })""", + "" + }); + + test({ + FAILURE, + "invalid type type", + R"""({ + "type": 123 + })""", + "" + }); + + test({ + SUCCESS, + "empty schema (object)", + "{}", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "exotic formats", + R"""({ + "items": [ + { "format": "date" }, + { "format": "uuid" }, + { "format": "time" }, + { "format": "date-time" } + ] + })""", + R"""( + date ::= [0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] ) + date-string ::= "\"" date "\"" space + date-time ::= date "T" time + date-time-string ::= "\"" date-time "\"" space + root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space + space ::= " "? + time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) + time-string ::= "\"" time "\"" space + uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space + )""" + }); + + test({ + SUCCESS, + "string", + R"""({ + "type": "string" + })""", + R"""( + root ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "boolean", + R"""({ + "type": "boolean" + })""", + R"""( + root ::= ("true" | "false") space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "integer", + R"""({ + "type": "integer" + })""", + R"""( + root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "string const", + R"""({ + "const": "foo" + })""", + R"""( + root ::= "\"foo\"" + space ::= " "? + )""" + }); + + test({ + FAILURE, + "non-string const", + R"""({ + "const": 123 + })""", + "" + }); + + test({ + FAILURE, + "non-string enum", + R"""({ + "enum": [123] + })""", + "" + }); + + test({ + SUCCESS, + "tuple1", + R"""({ + "prefixItems": [{ "type": "string" }] + })""", + R"""( + root ::= "[" space string "]" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "tuple2", + R"""({ + "prefixItems": [{ "type": "string" }, { "type": "number" }] + })""", + R"""( + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "[" space string "," space number "]" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "number", + R"""({ + "type": "number" + })""", + R"""( + root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "minItems", + R"""({ + "items": { + "type": "boolean" + }, + "minItems": 2 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "maxItems 1", + R"""({ + "items": { + "type": "boolean" + }, + "maxItems": 1 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space ( boolean )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "maxItems 2", + R"""({ + "items": { + "type": "boolean" + }, + "maxItems": 2 + })""", + R"""( + boolean ::= ("true" | "false") space + root ::= "[" space ( boolean ( "," space boolean )? )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "min + maxItems", + R"""({ + "items": { + "type": ["number", "integer"] + }, + "minItems": 3, + "maxItems": 5 + })""", + R"""( + integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space + item ::= number | integer + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "simple regexp", + R"""({ + "type": "string", + "pattern": "^abc?d*efg+(hij)?kl$" + })""", + R"""( + root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp escapes", + R"""({ + "type": "string", + "pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$" + })""", + R"""( + root ::= "\"" "[]{}()|+*?" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp quote", + R"""({ + "type": "string", + "pattern": "^\"$" + })""", + R"""( + root ::= "\"" "\"" "\"" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "regexp", + R"""({ + "type": "string", + "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$" + })""", + R"""( + dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF] + root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space + root-1 ::= [0-9] + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "required props", + R"""({ + "type": "object", + "properties": { + "a": { + "type": "string" + }, + "b": { + "type": "string" + } + }, + "required": [ + "a", + "b" + ], + "additionalProperties": false, + "definitions": {} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + b-kv ::= "\"b\"" space ":" space string + root ::= "{" space a-kv "," space b-kv "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "1 optional prop", + R"""({ + "properties": { + "a": { + "type": "string" + } + }, + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + root ::= "{" space (a-kv )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "N optional props", + R"""({ + "properties": { + "a": {"type": "string"}, + "b": {"type": "string"}, + "c": {"type": "string"} + }, + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + a-rest ::= ( "," space b-kv )? b-rest + b-kv ::= "\"b\"" space ":" space string + b-rest ::= ( "," space c-kv )? + c-kv ::= "\"c\"" space ":" space string + root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "required + optional props", + R"""({ + "properties": { + "a": {"type": "string"}, + "b": {"type": "string"}, + "c": {"type": "string"}, + "d": {"type": "string"} + }, + "required": ["a", "b"], + "additionalProperties": false + })""", + R"""( + a-kv ::= "\"a\"" space ":" space string + b-kv ::= "\"b\"" space ":" space string + c-kv ::= "\"c\"" space ":" space string + c-rest ::= ( "," space d-kv )? + d-kv ::= "\"d\"" space ":" space string + root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "additional props", + R"""({ + "type": "object", + "additionalProperties": {"type": "array", "items": {"type": "number"}} + })""", + R"""( + additional-kv ::= string ":" space additional-value + additional-kvs ::= additional-kv ( "," space additional-kv )* + additional-value ::= "[" space ( number ( "," space number )* )? "]" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space (additional-kvs )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "additional props (true)", + R"""({ + "type": "object", + "additionalProperties": true + })""", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "additional props (implicit)", + R"""({ + "type": "object" + })""", + R"""( + array ::= "[" space ( value ("," space value)* )? "]" space + boolean ::= ("true" | "false") space + null ::= "null" space + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space + root ::= object + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + value ::= object | array | string | number | boolean + )""" + }); + + test({ + SUCCESS, + "empty w/o additional props", + R"""({ + "type": "object", + "additionalProperties": false + })""", + R"""( + root ::= "{" space "}" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "required + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"} + }, + "required": ["a"], + "additionalProperties": {"type": "string"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + additional-kv ::= string ":" space string + additional-kvs ::= additional-kv ( "," space additional-kv )* + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "optional + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"} + }, + "additionalProperties": {"type": "number"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + a-rest ::= additional-kvs + additional-kv ::= string ":" space number + additional-kvs ::= additional-kv ( "," space additional-kv )* + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space (a-kv a-rest | additional-kvs )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "required + optional + additional props", + R"""({ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"} + }, + "required": ["a"], + "additionalProperties": {"type": "number"} + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + additional-kv ::= string ":" space number + additional-kvs ::= additional-kv ( "," space additional-kv )* + b-kv ::= "\"b\"" space ":" space number + b-rest ::= additional-kvs + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "top-level $ref", + R"""({ + "$ref": "#/definitions/MyType", + "definitions": { + "MyType": { + "type": "object", + "properties": { + "a": { + "type": "string" + } + }, + "required": [ + "a" + ], + "additionalProperties": false + } + } + })""", + R"""( + MyType ::= "{" space MyType-a-kv "}" space + MyType-a-kv ::= "\"a\"" space ":" space string + root ::= MyType + space ::= " "? + string ::= "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space + )""" + }); + + test({ + SUCCESS, + "anyOf", + R"""({ + "anyOf": [ + {"$ref": "#/definitions/foo"}, + {"$ref": "#/definitions/bar"} + ], + "definitions": { + "foo": { + "properties": {"a": {"type": "number"}} + }, + "bar": { + "properties": {"b": {"type": "number"}} + } + }, + "type": "object" + })""", + R"""( + alternative-0 ::= foo + alternative-1 ::= bar + bar ::= "{" space (bar-b-kv )? "}" space + bar-b-kv ::= "\"b\"" space ":" space number + foo ::= "{" space (foo-a-kv )? "}" space + foo-a-kv ::= "\"a\"" space ":" space number + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= alternative-0 | alternative-1 + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)", + R"""({ + "allOf": [ + {"$ref": "#/definitions/foo"}, + {"$ref": "#/definitions/bar"}, + { + "anyOf": [ + {"$ref": "#/definitions/baz"}, + {"$ref": "#/definitions/bam"} + ] + } + ], + "definitions": { + "foo": { + "properties": {"a": {"type": "number"}} + }, + "bar": { + "properties": {"b": {"type": "number"}} + }, + "bam": { + "properties": {"c": {"type": "number"}} + }, + "baz": { + "properties": {"d": {"type": "number"}} + } + }, + "type": "object" + })""", + R"""( + a-kv ::= "\"a\"" space ":" space number + b-kv ::= "\"b\"" space ":" space number + c-kv ::= "\"c\"" space ":" space number + d-kv ::= "\"d\"" space ":" space number + d-rest ::= ( "," space c-kv )? + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space + space ::= " "? + )""" + }); + + test({ + SUCCESS, + "conflicting names", + R"""({ + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "number": { + "type": "object", + "properties": { + "root": { + "type": "number" + } + }, + "required": [ + "root" + ], + "additionalProperties": false + } + }, + "required": [ + "number" + ], + "additionalProperties": false + } + }, + "required": [ + "number" + ], + "additionalProperties": false, + "definitions": {} + })""", + R"""( + number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space + number- ::= "{" space number-number-kv "}" space + number-kv ::= "\"number\"" space ":" space number- + number-number ::= "{" space number-number-root-kv "}" space + number-number-kv ::= "\"number\"" space ":" space number-number + number-number-root-kv ::= "\"root\"" space ":" space number + root ::= "{" space number-kv "}" space + space ::= " "? + )""" + }); +} + +int main() { + test_all("C++", [](const TestCase & tc) { + try { + tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema))); + tc.verify_status(SUCCESS); + } catch (const std::runtime_error & ex) { + fprintf(stderr, "Error: %s\n", ex.what()); + tc.verify_status(FAILURE); + } + }); + test_all("Python", [](const TestCase & tc) { + write("test-json-schema-input.tmp", tc.schema); + tc.verify_status(std::system( + "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); + tc.verify(read("test-grammar-output.tmp")); + }); + test_all("JavaScript", [](const TestCase & tc) { + write("test-json-schema-input.tmp", tc.schema); + tc.verify_status(std::system( + "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); + tc.verify(read("test-grammar-output.tmp")); + }); + + test_all("Check Expectations Validity", [](const TestCase & tc) { + if (tc.expected_status == SUCCESS) { + tc.verify_expectation_parseable(); + } + }); +} |