diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-07-27 07:55:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-27 07:55:01 +0200 |
commit | 154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch) | |
tree | 81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /tests/test-llama-grammar.cpp | |
parent | 0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff) |
Merge mainline llama.cpp (#3)
* Merging mainline - WIP
* Merging mainline - WIP
AVX2 and CUDA appear to work.
CUDA performance seems slightly (~1-2%) lower as it is so often
the case with llama.cpp/ggml after some "improvements" have been made.
* Merging mainline - fix Metal
* Remove check
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'tests/test-llama-grammar.cpp')
-rw-r--r-- | tests/test-llama-grammar.cpp | 24 |
1 files changed, 15 insertions, 9 deletions
diff --git a/tests/test-llama-grammar.cpp b/tests/test-llama-grammar.cpp index 27ca4d26..1f3a267b 100644 --- a/tests/test-llama-grammar.cpp +++ b/tests/test-llama-grammar.cpp @@ -2,10 +2,12 @@ #undef NDEBUG #endif -#include "llama.cpp" // TODO: not great +#define LLAMA_API_INTERNAL +#include "llama.h" #include "grammar-parser.h" #include <cassert> +#include <stdexcept> int main() { @@ -112,10 +114,14 @@ int main() } } - llama_grammar *grammar = NULL; + llama_grammar * grammar = NULL; std::vector<const llama_grammar_element *> grammar_rules(parsed_grammar.c_rules()); - grammar = llama_grammar_init( - grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); + + grammar = llama_grammar_init(grammar_rules.data(), grammar_rules.size(), parsed_grammar.symbol_ids.at("root")); + if (grammar == nullptr) + { + throw std::runtime_error("Failed to initialize llama_grammar"); + } std::vector<std::vector<llama_grammar_element>> expected_stacks = { { @@ -168,7 +174,7 @@ int main() }}; auto index = 0; - for (auto stack : grammar->stacks) + for (auto stack : llama_grammar_get_stacks(grammar)) { // compare stack to expected_stack for (uint32_t i = 0; i < stack.size(); i++) @@ -370,13 +376,13 @@ int main() }, }; - std::vector<llama_grammar_candidate> rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[0], next_candidates); + std::vector<llama_grammar_candidate> rejects = llama_grammar_reject_candidates_for_stack(llama_grammar_get_rules(grammar), llama_grammar_get_stacks(grammar)[0], next_candidates); std::vector<std::vector<llama_grammar_candidate>> all_rejects; - for (std::size_t count = 0; count < grammar->stacks.size(); ++count) + for (std::size_t count = 0; count < llama_grammar_get_stacks(grammar).size(); ++count) { - rejects = llama_grammar_reject_candidates_for_stack(grammar->rules, grammar->stacks[count], next_candidates); + rejects = llama_grammar_reject_candidates_for_stack(llama_grammar_get_rules(grammar), llama_grammar_get_stacks(grammar)[count], next_candidates); all_rejects.push_back(rejects); } @@ -397,6 +403,6 @@ int main() delete[] candidate.code_points; candidate.code_points = nullptr; } - delete grammar; + llama_grammar_free(grammar); return 0; } |