summaryrefslogtreecommitdiff
path: root/examples/grammar-parser.h
diff options
context:
space:
mode:
authorEvan Jones <evan.q.jones@gmail.com>2023-07-23 23:58:10 -0400
committerGitHub <noreply@github.com>2023-07-23 23:58:10 -0400
commit84e09a7d8bc4ab6d658b5cd81295ac0add60be78 (patch)
tree934c5480d917325ac8baa29f4edfae99137b56bb /examples/grammar-parser.h
parent2f9cf974a066ac0e03fbb235d834b01b0164d743 (diff)
llama : add grammar-based sampling (#1773)
* llama, main : constrain sampling to grammar * allow loading grammar from file * fix whitespace errors * handle & print parser errors * add comments to grammar syntax and allow newlines where unambiguous * add missing include * support alternates in root rule * fix bugs with empty token and EOS * adjust JSON grammar * remove swp file * rewrite ternary expressions Co-authored-by: Henri Vasserman <henv@hot.ee> * use struct for grammar elements and add Unicode support * add unicode escapes * add inverse char ranges * only sample full tokens (no peeking or truncation) * llama : minor style changes blindly applied in online editor - hopefully I didn't break something * update help text * add warning message if EOS is disabled --------- Co-authored-by: Henri Vasserman <henv@hot.ee> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/grammar-parser.h')
-rw-r--r--examples/grammar-parser.h29
1 files changed, 29 insertions, 0 deletions
diff --git a/examples/grammar-parser.h b/examples/grammar-parser.h
new file mode 100644
index 00000000..9037d727
--- /dev/null
+++ b/examples/grammar-parser.h
@@ -0,0 +1,29 @@
+// Implements a parser for an extended Backus-Naur form (BNF), producing the
+// binary context-free grammar format specified by llama.h. Supports character
+// ranges, grouping, and repetition operators. As an example, a grammar for
+// arithmetic might look like:
+//
+// root ::= expr
+// expr ::= term ([-+*/] term)*
+// term ::= num | "(" space expr ")" space
+// num ::= [0-9]+ space
+// space ::= [ \t\n]*
+
+#pragma once
+#include "llama.h"
+#include <vector>
+#include <map>
+#include <cstdint>
+#include <string>
+
+namespace grammar_parser {
+ struct parse_state {
+ std::map<std::string, uint32_t> symbol_ids;
+ std::vector<std::vector<llama_grammar_element>> rules;
+
+ std::vector<const llama_grammar_element *> c_rules();
+ };
+
+ parse_state parse(const char * src);
+ void print_grammar(FILE * file, const parse_state & state);
+}