From 69ff61397d2b7b550dcdda4a35b35128892408b0 Mon Sep 17 00:00:00 2001
From: Michael Podvitskiy <podvitskiymichael@gmail.com>
Date: Thu, 14 Mar 2024 17:21:56 +0100
Subject: llama : support models without vocabulary (#5798)

* additional methods to read model and ctx parameters

* vocab size as a part of a model metadata

* models without vocabulary, convert.py part

* models without vocabulary, llama.cpp part

* PR clean up

* converter scrypt fixes

* llama_vocab_type update (renamed the new key)

* pr review fixes

* revert function renaming

* one more NoVocab assert
---
 gguf-py/gguf/constants.py   | 2 ++
 gguf-py/gguf/gguf_writer.py | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'gguf-py')

diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 99f71f0a..2d7cf16c 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -32,6 +32,7 @@ class Keys:
         FILE_TYPE            = "general.file_type"
 
     class LLM:
+        VOCAB_SIZE            = "{arch}.vocab_size"
         CONTEXT_LENGTH        = "{arch}.context_length"
         EMBEDDING_LENGTH      = "{arch}.embedding_length"
         BLOCK_COUNT           = "{arch}.block_count"
@@ -752,6 +753,7 @@ KEY_GENERAL_SOURCE_HF_REPO       = Keys.General.SOURCE_HF_REPO
 KEY_GENERAL_FILE_TYPE            = Keys.General.FILE_TYPE
 
 # LLM
+KEY_VOCAB_SIZE            = Keys.LLM.VOCAB_SIZE
 KEY_CONTEXT_LENGTH        = Keys.LLM.CONTEXT_LENGTH
 KEY_EMBEDDING_LENGTH      = Keys.LLM.EMBEDDING_LENGTH
 KEY_BLOCK_COUNT           = Keys.LLM.BLOCK_COUNT
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 4d389be9..81b2eb88 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -321,6 +321,9 @@ class GGUFWriter:
         self.data_alignment = alignment
         self.add_uint32(Keys.General.ALIGNMENT, alignment)
 
+    def add_vocab_size(self, size: int) -> None:
+        self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
+
     def add_context_length(self, length: int) -> None:
         self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
 
-- 
cgit v1.2.3