From 9b84ae1806cded4d6683c7b810925da5ead40607 Mon Sep 17 00:00:00 2001 From: Clint Herron Date: Thu, 4 Apr 2024 03:44:28 -0400 Subject: examples : add GBNF validator program (#5948) * Revising GBNF validator program to be much simpler. * Changing from streams to using cstdio * Adding final newline character. --- llama.cpp | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 267ac4cc..9a1c1104 100644 --- a/llama.cpp +++ b/llama.cpp @@ -11621,28 +11621,10 @@ static std::vector llama_tokenize_internal(const llama_vocab & // grammar - internal // -struct llama_partial_utf8 { - uint32_t value; // bit value so far (unshifted) - int n_remain; // num bytes remaining; -1 indicates invalid sequence -}; - -struct llama_grammar { - const std::vector> rules; - std::vector> stacks; - - // buffer for partially generated UTF-8 sequence from accepted tokens - llama_partial_utf8 partial_utf8; -}; - -struct llama_grammar_candidate { - size_t index; - const uint32_t * code_points; - llama_partial_utf8 partial_utf8; -}; // Decodes a UTF-8 string which may end in an incomplete sequence. Adds a terminating 0 for use as // pointer. If an invalid sequence is encountered, returns `llama_partial_utf8.n_remain == -1`. -static std::pair, llama_partial_utf8> decode_utf8( +std::pair, llama_partial_utf8> decode_utf8( const std::string & src, llama_partial_utf8 partial_start) { static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4 }; @@ -11844,7 +11826,7 @@ static void llama_grammar_advance_stack( // be positioned at a character range (see `llama_grammar_advance_stack`), and // produces the N possible stacks if the given char is accepted at those // positions -static std::vector> llama_grammar_accept( +std::vector> llama_grammar_accept( const std::vector> & rules, const std::vector> & stacks, const uint32_t chr) { -- cgit v1.2.3