summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconvert-falcon-hf-to-gguf.py6
-rwxr-xr-xconvert-starcoder-hf-to-gguf.py6
-rw-r--r--llama.cpp18
3 files changed, 8 insertions, 22 deletions
diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py
index 88338d82..95835856 100755
--- a/convert-falcon-hf-to-gguf.py
+++ b/convert-falcon-hf-to-gguf.py
@@ -133,8 +133,6 @@ gguf_writer.add_file_type(ftype)
print("gguf: get tokenizer metadata")
tokens: list[bytearray] = []
-scores: list[float] = []
-toktypes: list[int] = []
tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
@@ -177,12 +175,8 @@ for i in range(vocab_size):
text = bytearray(pad_token)
tokens.append(text)
- scores.append(0.0) # dymmy
- toktypes.append(gguf.TokenType.NORMAL) # dummy
gguf_writer.add_token_list(tokens)
-gguf_writer.add_token_scores(scores)
-gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
special_vocab.add_to_gguf(gguf_writer)
diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py
index 331e84e9..48e88a77 100755
--- a/convert-starcoder-hf-to-gguf.py
+++ b/convert-starcoder-hf-to-gguf.py
@@ -117,8 +117,6 @@ gguf_writer.add_file_type(ftype)
print("gguf: get tokenizer metadata")
tokens: list[bytearray] = []
-scores: list[float] = []
-toktypes: list[int] = []
tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
@@ -161,12 +159,8 @@ for i in range(vocab_size):
text = bytearray(pad_token)
tokens.append(text)
- scores.append(0.0) # dymmy
- toktypes.append(gguf.TokenType.NORMAL) # dummy
gguf_writer.add_token_list(tokens)
-gguf_writer.add_token_scores(scores)
-gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
special_vocab.add_to_gguf(gguf_writer)
diff --git a/llama.cpp b/llama.cpp
index 14053355..15de7600 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1931,20 +1931,18 @@ static void llm_load_vocab(
throw std::runtime_error("cannot find tokenizer vocab in model file\n");
}
+ const float * scores = nullptr;
const int score_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_SCORES).c_str());
- if (score_idx == -1) {
- throw std::runtime_error("cannot find tokenizer scores in model file\n");
+ if (score_idx != -1) {
+ scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
}
- const float * scores = (const float * ) gguf_get_arr_data(ctx, score_idx);
-
+ const int * toktypes = nullptr;
const int toktype_idx = gguf_find_key(ctx, kv(LLM_KV_TOKENIZER_TOKEN_TYPE).c_str());
- if (toktype_idx == -1) {
- throw std::runtime_error("cannot find token type list in GGUF file\n");
+ if (toktype_idx != -1) {
+ toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
}
- const int * toktypes = (const int * ) gguf_get_arr_data(ctx, toktype_idx);
-
// determine vocab type
{
std::string tokenizer_name;
@@ -2012,8 +2010,8 @@ static void llm_load_vocab(
auto & token_data = vocab.id_to_token[i];
token_data.text = std::move(word);
- token_data.score = scores[i];
- token_data.type = (llama_token_type) toktypes[i];
+ token_data.score = scores ? scores[i] : 0.0f;
+ token_data.type = toktypes ? (llama_token_type) toktypes[i] : LLAMA_TOKEN_TYPE_NORMAL;
}
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'