json-schema-to-grammar improvements (+ added to server) (#5978)

* json: fix arrays (disallow `[,1]`) * json: support tuple types (`[number, string]`) * json: support additionalProperties (`{[k: string]: [string,number][]}`) * json: support required / optional properties * json: add support for pattern * json: resolve $ref (and support https schema urls) * json: fix $ref resolution * join: support union types (mostly for nullable types I think) * json: support allOf + nested anyOf * json: support any (`{}` or `{type: object}`) * json: fix merge * json: temp fix for escapes * json: spaces in output and unrestricted output spaces * json: add typings * json:fix typo * Create ts-type-to-grammar.sh * json: fix _format_literal (json.dumps already escapes quotes) * json: merge lit sequences and handle negatives {"type": "string", "pattern": "^({\"question\": \"[^\"]+\", \"response\": \"[^\"]+\"}\\n)+$"} * json: handle pattern repetitions * Update json-schema-to-grammar.mjs * Create regex-to-grammar.py * json: extract repeated regexp patterns to subrule * Update json-schema-to-grammar.py * Update json-schema-to-grammar.py * Update json-schema-to-grammar.py * json: handle schema from pydantic Optional fields * Update json-schema-to-grammar.py * Update json-schema-to-grammar.py * Update ts-type-to-grammar.sh * Update ts-type-to-grammar.sh * json: simplify nullable fields handling * json: accept duplicate identical rules * json: revert space to 1 at most * json: reuse regexp pattern subrules * json: handle uuid string format * json: fix literal escapes * json: add --allow-fetch * json: simplify range escapes * json: support negative ranges in patterns * Delete commit.txt * json: custom regex parser, adds dot support & JS-portable * json: rm trailing spaces * Update json-schema-to-grammar.mjs * json: updated server & chat `( cd examples/server && ./deps.sh )` * json: port fixes from mjs to python * Update ts-type-to-grammar.sh * json: support prefixItems alongside array items * json: add date format + fix uuid * json: add date, time, date-time formats * json: preserve order of props from TS defs * json: port schema converter to C++, wire in ./server * json: nits * Update json-schema-to-grammar.cpp * Update json-schema-to-grammar.cpp * Update json-schema-to-grammar.cpp * json: fix mjs implementation + align outputs * Update json-schema-to-grammar.mjs.hpp * json: test C++, JS & Python versions * json: nits + regen deps * json: cleanup test * json: revert from c++17 to 11 * json: nit fixes * json: dirty include for test * json: fix zig build * json: pass static command to std::system in tests (fixed temp files) * json: fix top-level $refs * json: don't use c++20 designated initializers * nit * json: basic support for reserved names `{number:{number:{root:number}}}` * Revamp test cmake to allow args (WORKING_DIRECTORY needed for JSON test) * json: re-ran server deps.sh * json: simplify test * json: support mix of additional props & required/optional * json: add tests for some expected failures * json: fix type=const in c++, add failure expectations for non-str const&enum * json: test (& simplify output of) empty schema * json: check parsing in test + fix value & string refs * json: add server tests for OAI JSON response_format * json: test/fix top-level anyOf * json: improve grammar parsing failures * json: test/fix additional props corner cases * json: fix string patterns (was missing quotes) * json: ws nit * json: fix json handling in server when there's no response_format * json: catch schema conversion errors in server * json: don't complain about unknown format type in server if unset * json: cleaner build of test * json: create examples/json-schema-pydantic-example.py * json: fix date pattern * json: move json.hpp & json-schema-to-grammar.{cpp,h} to common * json: indent 4 spaces * json: fix naming of top-level c++ function (+ drop unused one) * json: avoid using namespace std * json: fix zig build * Update server.feature * json: iostream -> fprintf * json: space before & refs for consistency * json: nits
author: Olivier Chafik <ochafik@users.noreply.github.com> 2024-03-21 11:50:43 +0000
committer: GitHub <noreply@github.com> 2024-03-21 11:50:43 +0000
commit: 5b7b0ac8dfdd800c0fd0dc69b69991e8cb19fb46 (patch)
tree: 6d06dbb6094671bc567f43acf9064bef69329204 /tests
parent: 1943c0198125a0da1a200390e82cf461f9080d99 (diff)
3 files changed, 893 insertions, 50 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 10326d53..a43439ae 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,66 +1,75 @@
-function(llama_build_executable source)
-    get_filename_component(TEST_TARGET ${source} NAME_WE)
-    add_executable(${TEST_TARGET} ${source} get-model.cpp)
-    install(TARGETS ${TEST_TARGET} RUNTIME)
-    target_link_libraries(${TEST_TARGET} PRIVATE common)
-endfunction()
+# Builds and runs a test source file.
+# Optional args:
+# - NAME: name of the executable & test target (defaults to the source file name without extension)
+# - LABEL: label for the test (defaults to main)
+# - ARGS: arguments to pass to the test executable
+# - WORKING_DIRECTORY
+function(llama_test source)
+    include(CMakeParseArguments)
+    set(options)
+    set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
+    set(multiValueArgs ARGS)
+    cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
 
-function(llama_test_executable name source)
-    get_filename_component(TEST_TARGET ${source} NAME_WE)
-    add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
-    set_property(TEST ${name} PROPERTY LABELS "main")
-endfunction()
+    if (NOT DEFINED LLAMA_TEST_LABEL)
+        set(LLAMA_TEST_LABEL "main")
+    endif()
+    if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
+        set(LLAMA_TEST_WORKING_DIRECTORY .)
+    endif()
+    if (DEFINED LLAMA_TEST_NAME)
+        set(TEST_TARGET ${LLAMA_TEST_NAME})
+    else()
+        get_filename_component(TEST_TARGET ${source} NAME_WE)
+    endif()
 
-function(llama_build_and_test_executable source)
-    llama_build_and_test_executable_with_label(${source} "main")
-endfunction()
-
-function(llama_build_and_test_executable_with_label source label)
-    get_filename_component(TEST_TARGET ${source} NAME_WE)
     add_executable(${TEST_TARGET} ${source} get-model.cpp)
     install(TARGETS ${TEST_TARGET} RUNTIME)
-    target_link_libraries(${TEST_TARGET} PRIVATE common)
-    add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
-    set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${label})
+    target_link_libraries(${TEST_TARGET} PRIVATE common json-schema-to-grammar)
+    add_test(
+        NAME ${TEST_TARGET}
+        WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
+        COMMAND $<TARGET_FILE:${TEST_TARGET}>
+        ${LLAMA_TEST_ARGS})
+
+    set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
 endfunction()
 
-# llama_build_and_test_executable(test-double-float.cpp) # SLOW
-llama_build_and_test_executable(test-quantize-fns.cpp)
-llama_build_and_test_executable(test-quantize-perf.cpp)
-llama_build_and_test_executable(test-sampling.cpp)
-llama_build_and_test_executable(test-chat-template.cpp)
+# llama_test(test-double-float.cpp) # SLOW
+llama_test(test-quantize-fns.cpp)
+llama_test(test-quantize-perf.cpp)
+llama_test(test-sampling.cpp)
+llama_test(test-chat-template.cpp)
 
-llama_build_executable(test-tokenizer-0-llama.cpp)
-llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+llama_test(test-tokenizer-0-llama.cpp  NAME test-tokenizer-0-llama            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+llama_test(test-tokenizer-0-falcon.cpp NAME test-tokenizer-0-falcon           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
 
-llama_build_executable(test-tokenizer-0-falcon.cpp)
-llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
+llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-llama            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
+llama_test(test-tokenizer-1-llama.cpp  NAME test-tokenizer-1-baichuan         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
 
-llama_build_executable(test-tokenizer-1-llama.cpp)
-llama_test_executable (test-tokenizer-1-llama    test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
-llama_test_executable (test-tokenizer-1-baichuan test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-baichuan.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-falcon           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-aquila           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-mpt              ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-stablelm-3b-4e1t ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-gpt-neox         ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-refact           ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-starcoder        ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
+llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-gpt2             ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
+#llama_test(test-tokenizer-1-bpe.cpp    NAME test-tokenizer-1-bloom            ARGS ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
 
-llama_build_executable(test-tokenizer-1-bpe.cpp)
-llama_test_executable (test-tokenizer-1-falcon           test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
-llama_test_executable (test-tokenizer-1-aquila           test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
-llama_test_executable (test-tokenizer-1-mpt              test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-mpt.gguf)
-llama_test_executable (test-tokenizer-1-stablelm-3b-4e1t test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-stablelm-3b-4e1t.gguf)
-llama_test_executable (test-tokenizer-1-gpt-neox         test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt-neox.gguf)
-llama_test_executable (test-tokenizer-1-refact           test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-refact.gguf)
-llama_test_executable (test-tokenizer-1-starcoder        test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-starcoder.gguf)
-llama_test_executable (test-tokenizer-1-gpt2             test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-gpt2.gguf)
-# llama_test_executable (test-tokenizer-1-bloom test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-bloom.gguf) # BIG
+llama_test(test-grammar-parser.cpp)
+llama_test(test-llama-grammar.cpp)
+llama_test(test-grad0.cpp)
+# llama_test(test-opt.cpp) # SLOW
+llama_test(test-backend-ops.cpp)
 
-llama_build_and_test_executable(test-grammar-parser.cpp)
-llama_build_and_test_executable(test-llama-grammar.cpp)
-llama_build_and_test_executable(test-grad0.cpp)
-# llama_build_and_test_executable(test-opt.cpp) # SLOW
-llama_build_and_test_executable(test-backend-ops.cpp)
+llama_test(test-rope.cpp)
 
-llama_build_and_test_executable(test-rope.cpp)
+llama_test(test-model-load-cancel.cpp  LABEL "model")
+llama_test(test-autorelease.cpp        LABEL "model")
 
-llama_build_and_test_executable_with_label(test-model-load-cancel.cpp "model")
-llama_build_and_test_executable_with_label(test-autorelease.cpp "model")
+llama_test(test-json-schema-to-grammar.cpp   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
+target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
 
 # dummy executable - not installed
 get_filename_component(TEST_TARGET test-c.c NAME_WE)
diff --git a/tests/run-json-schema-to-grammar.mjs b/tests/run-json-schema-to-grammar.mjs
new file mode 100644
index 00000000..71bf62ed
--- /dev/null
+++ b/tests/run-json-schema-to-grammar.mjs
@@ -0,0 +1,10 @@
+import { readFileSync } from "fs"
+import { SchemaConverter } from "../examples/server/public/json-schema-to-grammar.mjs"
+
+const [, , file] = process.argv
+const url = `file://${file}`
+let schema = JSON.parse(readFileSync(file, "utf8"));
+const converter = new SchemaConverter({})
+schema = await converter.resolveRefs(schema, url)
+converter.visit(schema, '')
+console.log(converter.formatGrammar())
diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp
new file mode 100755
index 00000000..a13bb9d7
--- /dev/null
+++ b/tests/test-json-schema-to-grammar.cpp
@@ -0,0 +1,824 @@
+#ifdef NDEBUG
+#undef NDEBUG
+#endif
+
+#include <fstream>
+#include <sstream>
+#include <regex>
+
+#include "json-schema-to-grammar.h"
+#include "grammar-parser.h"
+
+static std::string trim(const std::string & source) {
+    std::string s(source);
+    s.erase(0,s.find_first_not_of(" \n\r\t"));
+    s.erase(s.find_last_not_of(" \n\r\t")+1);
+    return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1");
+}
+
+enum TestCaseStatus {
+    SUCCESS,
+    FAILURE
+};
+
+struct TestCase {
+    TestCaseStatus expected_status;
+    std::string name;
+    std::string schema;
+    std::string expected_grammar;
+
+    void _print_failure_header() const {
+        fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str());
+    }
+    void verify(const std::string & actual_grammar) const {
+        if (trim(actual_grammar) != trim(expected_grammar)) {
+        _print_failure_header();
+        fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str());
+        assert(false);
+        }
+    }
+    void verify_expectation_parseable() const {
+        try {
+            auto state = grammar_parser::parse(expected_grammar.c_str());
+            if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
+                throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
+            }
+        } catch (const std::runtime_error & ex) {
+            _print_failure_header();
+            fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what());
+            assert(false);
+        }
+    }
+    void verify_status(TestCaseStatus status) const {
+        if (status != expected_status) {
+            _print_failure_header();
+            fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE");
+            fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE");
+            assert(false);
+        }
+    }
+};
+
+static void write(const std::string & file, const std::string & content) {
+    std::ofstream f;
+    f.open(file.c_str());
+    f << content.c_str();
+    f.close();
+}
+
+static std::string read(const std::string & file) {
+    std::ostringstream actuals;
+    actuals << std::ifstream(file.c_str()).rdbuf();
+    return actuals.str();
+}
+
+static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) {
+    fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str());
+    auto test = [&](const TestCase & tc) {
+        fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : "");
+        runner(tc);
+    };
+
+    test({
+        FAILURE,
+        "unknown type",
+        R"""({
+            "type": "kaboom"
+        })""",
+        ""
+    });
+
+    test({
+        FAILURE,
+        "invalid type type",
+        R"""({
+            "type": 123
+        })""",
+        ""
+    });
+
+    test({
+        SUCCESS,
+        "empty schema (object)",
+        "{}",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            null ::= "null" space
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= object
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+            value ::= object | array | string | number | boolean
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "exotic formats",
+        R"""({
+            "items": [
+                { "format": "date" },
+                { "format": "uuid" },
+                { "format": "time" },
+                { "format": "date-time" }
+            ]
+        })""",
+        R"""(
+            date ::= [0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] )
+            date-string ::= "\"" date "\"" space
+            date-time ::= date "T" time
+            date-time-string ::= "\"" date-time "\"" space
+            root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space
+            space ::= " "?
+            time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
+            time-string ::= "\"" time "\"" space
+            uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "string",
+        R"""({
+            "type": "string"
+        })""",
+        R"""(
+            root ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "boolean",
+        R"""({
+            "type": "boolean"
+        })""",
+        R"""(
+            root ::= ("true" | "false") space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "integer",
+        R"""({
+            "type": "integer"
+        })""",
+        R"""(
+            root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "string const",
+        R"""({
+            "const": "foo"
+        })""",
+        R"""(
+            root ::= "\"foo\""
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        FAILURE,
+        "non-string const",
+        R"""({
+            "const": 123
+        })""",
+        ""
+    });
+
+    test({
+        FAILURE,
+        "non-string enum",
+        R"""({
+            "enum": [123]
+        })""",
+        ""
+    });
+
+    test({
+        SUCCESS,
+        "tuple1",
+        R"""({
+            "prefixItems": [{ "type": "string" }]
+        })""",
+        R"""(
+            root ::= "[" space string "]" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "tuple2",
+        R"""({
+            "prefixItems": [{ "type": "string" }, { "type": "number" }]
+        })""",
+        R"""(
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "[" space string "," space number "]" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "number",
+        R"""({
+            "type": "number"
+        })""",
+        R"""(
+            root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "minItems",
+        R"""({
+            "items": {
+                "type": "boolean"
+            },
+            "minItems": 2
+        })""",
+        R"""(
+            boolean ::= ("true" | "false") space
+            root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "maxItems 1",
+        R"""({
+            "items": {
+                "type": "boolean"
+            },
+            "maxItems": 1
+        })""",
+        R"""(
+            boolean ::= ("true" | "false") space
+            root ::= "[" space ( boolean  )? "]" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "maxItems 2",
+        R"""({
+            "items": {
+                "type": "boolean"
+            },
+            "maxItems": 2
+        })""",
+        R"""(
+            boolean ::= ("true" | "false") space
+            root ::= "[" space ( boolean ( "," space boolean )? )? "]" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "min + maxItems",
+        R"""({
+            "items": {
+                "type": ["number", "integer"]
+            },
+            "minItems": 3,
+            "maxItems": 5
+        })""",
+        R"""(
+            integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
+            item ::= number | integer
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "simple regexp",
+        R"""({
+            "type": "string",
+            "pattern": "^abc?d*efg+(hij)?kl$"
+        })""",
+        R"""(
+            root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "regexp escapes",
+        R"""({
+            "type": "string",
+            "pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
+        })""",
+        R"""(
+            root ::= "\"" "[]{}()|+*?" "\"" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "regexp quote",
+        R"""({
+            "type": "string",
+            "pattern": "^\"$"
+        })""",
+        R"""(
+            root ::= "\"" "\"" "\"" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "regexp",
+        R"""({
+            "type": "string",
+            "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$"
+        })""",
+        R"""(
+            dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
+            root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
+            root-1 ::= [0-9]
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "required props",
+        R"""({
+            "type": "object",
+            "properties": {
+                "a": {
+                "type": "string"
+                },
+                "b": {
+                "type": "string"
+                }
+            },
+            "required": [
+                "a",
+                "b"
+            ],
+            "additionalProperties": false,
+            "definitions": {}
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space string
+            b-kv ::= "\"b\"" space ":" space string
+            root ::= "{" space a-kv "," space b-kv "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "1 optional prop",
+        R"""({
+            "properties": {
+                "a": {
+                "type": "string"
+                }
+            },
+            "additionalProperties": false
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space string
+            root ::= "{" space  (a-kv )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "N optional props",
+        R"""({
+            "properties": {
+                "a": {"type": "string"},
+                "b": {"type": "string"},
+                "c": {"type": "string"}
+            },
+            "additionalProperties": false
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space string
+            a-rest ::= ( "," space b-kv )? b-rest
+            b-kv ::= "\"b\"" space ":" space string
+            b-rest ::= ( "," space c-kv )?
+            c-kv ::= "\"c\"" space ":" space string
+            root ::= "{" space  (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "required + optional props",
+        R"""({
+            "properties": {
+                "a": {"type": "string"},
+                "b": {"type": "string"},
+                "c": {"type": "string"},
+                "d": {"type": "string"}
+            },
+            "required": ["a", "b"],
+            "additionalProperties": false
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space string
+            b-kv ::= "\"b\"" space ":" space string
+            c-kv ::= "\"c\"" space ":" space string
+            c-rest ::= ( "," space d-kv )?
+            d-kv ::= "\"d\"" space ":" space string
+            root ::= "{" space a-kv "," space b-kv ( "," space ( c-kv c-rest | d-kv ) )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "additional props",
+        R"""({
+            "type": "object",
+            "additionalProperties": {"type": "array", "items": {"type": "number"}}
+        })""",
+        R"""(
+            additional-kv ::= string ":" space additional-value
+            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            additional-value ::= "[" space ( number ( "," space number )* )? "]" space
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "{" space  (additional-kvs )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "additional props (true)",
+        R"""({
+            "type": "object",
+            "additionalProperties": true
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            null ::= "null" space
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= object
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+            value ::= object | array | string | number | boolean
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "additional props (implicit)",
+        R"""({
+            "type": "object"
+        })""",
+        R"""(
+            array ::= "[" space ( value ("," space value)* )? "]" space
+            boolean ::= ("true" | "false") space
+            null ::= "null" space
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
+            root ::= object
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+            value ::= object | array | string | number | boolean
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "empty w/o additional props",
+        R"""({
+            "type": "object",
+            "additionalProperties": false
+        })""",
+        R"""(
+            root ::= "{" space  "}" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "required + additional props",
+        R"""({
+            "type": "object",
+            "properties": {
+                "a": {"type": "number"}
+            },
+            "required": ["a"],
+            "additionalProperties": {"type": "string"}
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space number
+            additional-kv ::= string ":" space string
+            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "optional + additional props",
+        R"""({
+            "type": "object",
+            "properties": {
+                "a": {"type": "number"}
+            },
+            "additionalProperties": {"type": "number"}
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space number
+            a-rest ::= additional-kvs
+            additional-kv ::= string ":" space number
+            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "{" space  (a-kv a-rest | additional-kvs )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "required + optional + additional props",
+        R"""({
+            "type": "object",
+            "properties": {
+                "a": {"type": "number"},
+                "b": {"type": "number"}
+            },
+            "required": ["a"],
+            "additionalProperties": {"type": "number"}
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space number
+            additional-kv ::= string ":" space number
+            additional-kvs ::= additional-kv ( "," space additional-kv )*
+            b-kv ::= "\"b\"" space ":" space number
+            b-rest ::= additional-kvs
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "top-level $ref",
+        R"""({
+            "$ref": "#/definitions/MyType",
+            "definitions": {
+                "MyType": {
+                "type": "object",
+                "properties": {
+                    "a": {
+                    "type": "string"
+                    }
+                },
+                "required": [
+                    "a"
+                ],
+                "additionalProperties": false
+                }
+            }
+        })""",
+        R"""(
+            MyType ::= "{" space MyType-a-kv "}" space
+            MyType-a-kv ::= "\"a\"" space ":" space string
+            root ::= MyType
+            space ::= " "?
+            string ::=  "\"" (
+                    [^"\\] |
+                    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
+                    )* "\"" space
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "anyOf",
+        R"""({
+            "anyOf": [
+                {"$ref": "#/definitions/foo"},
+                {"$ref": "#/definitions/bar"}
+            ],
+            "definitions": {
+                "foo": {
+                "properties": {"a": {"type": "number"}}
+                },
+                "bar": {
+                "properties": {"b": {"type": "number"}}
+                }
+            },
+            "type": "object"
+        })""",
+        R"""(
+            alternative-0 ::= foo
+            alternative-1 ::= bar
+            bar ::= "{" space  (bar-b-kv )? "}" space
+            bar-b-kv ::= "\"b\"" space ":" space number
+            foo ::= "{" space  (foo-a-kv )? "}" space
+            foo-a-kv ::= "\"a\"" space ":" space number
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= alternative-0 | alternative-1
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
+        R"""({
+            "allOf": [
+                {"$ref": "#/definitions/foo"},
+                {"$ref": "#/definitions/bar"},
+                {
+                "anyOf": [
+                    {"$ref": "#/definitions/baz"},
+                    {"$ref": "#/definitions/bam"}
+                ]
+                }
+            ],
+            "definitions": {
+                "foo": {
+                "properties": {"a": {"type": "number"}}
+                },
+                "bar": {
+                "properties": {"b": {"type": "number"}}
+                },
+                "bam": {
+                "properties": {"c": {"type": "number"}}
+                },
+                "baz": {
+                "properties": {"d": {"type": "number"}}
+                }
+            },
+            "type": "object"
+        })""",
+        R"""(
+            a-kv ::= "\"a\"" space ":" space number
+            b-kv ::= "\"b\"" space ":" space number
+            c-kv ::= "\"c\"" space ":" space number
+            d-kv ::= "\"d\"" space ":" space number
+            d-rest ::= ( "," space c-kv )?
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
+            space ::= " "?
+        )"""
+    });
+
+    test({
+        SUCCESS,
+        "conflicting names",
+        R"""({
+            "type": "object",
+            "properties": {
+                "number": {
+                "type": "object",
+                "properties": {
+                    "number": {
+                    "type": "object",
+                    "properties": {
+                        "root": {
+                        "type": "number"
+                        }
+                    },
+                    "required": [
+                        "root"
+                    ],
+                    "additionalProperties": false
+                    }
+                },
+                "required": [
+                    "number"
+                ],
+                "additionalProperties": false
+                }
+            },
+            "required": [
+                "number"
+            ],
+            "additionalProperties": false,
+            "definitions": {}
+        })""",
+        R"""(
+            number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
+            number- ::= "{" space number-number-kv "}" space
+            number-kv ::= "\"number\"" space ":" space number-
+            number-number ::= "{" space number-number-root-kv "}" space
+            number-number-kv ::= "\"number\"" space ":" space number-number
+            number-number-root-kv ::= "\"root\"" space ":" space number
+            root ::= "{" space number-kv "}" space
+            space ::= " "?
+        )"""
+    });
+}
+
+int main() {
+    test_all("C++", [](const TestCase & tc) {
+        try {
+            tc.verify(json_schema_to_grammar(nlohmann::json::parse(tc.schema)));
+            tc.verify_status(SUCCESS);
+        } catch (const std::runtime_error & ex) {
+            fprintf(stderr, "Error: %s\n", ex.what());
+            tc.verify_status(FAILURE);
+        }
+    });
+    test_all("Python", [](const TestCase & tc) {
+        write("test-json-schema-input.tmp", tc.schema);
+        tc.verify_status(std::system(
+            "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
+        tc.verify(read("test-grammar-output.tmp"));
+    });
+    test_all("JavaScript", [](const TestCase & tc) {
+        write("test-json-schema-input.tmp", tc.schema);
+        tc.verify_status(std::system(
+            "node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
+        tc.verify(read("test-grammar-output.tmp"));
+    });
+
+    test_all("Check Expectations Validity", [](const TestCase & tc) {
+        if (tc.expected_status == SUCCESS) {
+            tc.verify_expectation_parseable();
+        }
+    });
+}
author	Olivier Chafik <ochafik@users.noreply.github.com>	2024-03-21 11:50:43 +0000
committer	GitHub <noreply@github.com>	2024-03-21 11:50:43 +0000
commit	5b7b0ac8dfdd800c0fd0dc69b69991e8cb19fb46 (patch)
tree	6d06dbb6094671bc567f43acf9064bef69329204 /tests
parent	1943c0198125a0da1a200390e82cf461f9080d99 (diff)