llama : support models without vocabulary (#5798)

* additional methods to read model and ctx parameters * vocab size as a part of a model metadata * models without vocabulary, convert.py part * models without vocabulary, llama.cpp part * PR clean up * converter scrypt fixes * llama_vocab_type update (renamed the new key) * pr review fixes * revert function renaming * one more NoVocab assert
author: Michael Podvitskiy <podvitskiymichael@gmail.com> 2024-03-14 17:21:56 +0100
committer: GitHub <noreply@github.com> 2024-03-14 18:21:56 +0200
commit: 69ff61397d2b7b550dcdda4a35b35128892408b0 (patch)
tree: 70b5bac95a58dcf734e8035c5fd47b92ceb510a1 /gguf-py
parent: 044ec4b2a567f649459ccd20af2f387c784faa51 (diff)
2 files changed, 5 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 99f71f0a..2d7cf16c 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -32,6 +32,7 @@ class Keys:
         FILE_TYPE            = "general.file_type"
 
     class LLM:
+        VOCAB_SIZE            = "{arch}.vocab_size"
         CONTEXT_LENGTH        = "{arch}.context_length"
         EMBEDDING_LENGTH      = "{arch}.embedding_length"
         BLOCK_COUNT           = "{arch}.block_count"
@@ -752,6 +753,7 @@ KEY_GENERAL_SOURCE_HF_REPO       = Keys.General.SOURCE_HF_REPO
 KEY_GENERAL_FILE_TYPE            = Keys.General.FILE_TYPE
 
 # LLM
+KEY_VOCAB_SIZE            = Keys.LLM.VOCAB_SIZE
 KEY_CONTEXT_LENGTH        = Keys.LLM.CONTEXT_LENGTH
 KEY_EMBEDDING_LENGTH      = Keys.LLM.EMBEDDING_LENGTH
 KEY_BLOCK_COUNT           = Keys.LLM.BLOCK_COUNT
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 4d389be9..81b2eb88 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -321,6 +321,9 @@ class GGUFWriter:
         self.data_alignment = alignment
         self.add_uint32(Keys.General.ALIGNMENT, alignment)
 
+    def add_vocab_size(self, size: int) -> None:
+        self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
+
     def add_context_length(self, length: int) -> None:
         self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
author	Michael Podvitskiy <podvitskiymichael@gmail.com>	2024-03-14 17:21:56 +0100
committer	GitHub <noreply@github.com>	2024-03-14 18:21:56 +0200
commit	69ff61397d2b7b550dcdda4a35b35128892408b0 (patch)
tree	70b5bac95a58dcf734e8035c5fd47b92ceb510a1 /gguf-py
parent	044ec4b2a567f649459ccd20af2f387c784faa51 (diff)