summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp72
1 files changed, 53 insertions, 19 deletions
diff --git a/llama.cpp b/llama.cpp
index fa7c022f..8ca9650d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2120,7 +2120,7 @@ struct llama_vocab {
id special_prefix_id = -1;
id special_suffix_id = -1;
id special_middle_id = -1;
- id special_eot_id = -1;
+ id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
bool add_space_prefix = true;
@@ -3770,7 +3770,7 @@ static void llm_load_hparams(
switch (hparams.n_layer) {
case 22: model.type = e_model::MODEL_1B; break;
case 26: model.type = e_model::MODEL_3B; break;
- case 32: model.type = e_model::MODEL_7B; break;
+ case 32: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_7B : e_model::MODEL_8B; break; // LLaMa 8B v3 uses GQA
case 40: model.type = e_model::MODEL_13B; break;
case 48: model.type = e_model::MODEL_34B; break;
case 60: model.type = e_model::MODEL_30B; break;
@@ -4179,7 +4179,10 @@ static void llm_load_vocab(
vocab.special_prefix_id = 67;
vocab.special_suffix_id = 69;
vocab.special_middle_id = 68;
- vocab.special_eot_id = 70;
+ // TODO: this is not EOT, it is "file separator" token, needs fix
+ // https://huggingface.co/google/codegemma-7b-it/blob/9b1d9231388358c04d90bd003458f5070d97db44/tokenizer_config.json#L565-L572
+ //vocab.special_eot_id = 70;
+ vocab.special_eot_id = 107;
}
}
@@ -4308,6 +4311,7 @@ static void llm_load_vocab(
{ LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
{ LLM_KV_TOKENIZER_EOT_ID, vocab.special_eot_id },
};
+
for (const auto & it : special_token_types) {
const std::string & key = kv(std::get<0>(it));
int32_t & id = std::get<1>(it);
@@ -4322,7 +4326,6 @@ static void llm_load_vocab(
} else {
id = new_id;
}
-
}
// Handle add_bos_token and add_eos_token
@@ -4336,6 +4339,27 @@ static void llm_load_vocab(
vocab.special_add_eos = int(temp);
}
}
+
+ // find EOT token: "<|eot_id|>", "<|im_emd|>", "<end_of_turn>", etc.
+ //
+ // TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOT_ID
+ // for now, we apply this workaround to find the EOT token based on its text
+ if (vocab.special_eot_id == -1) {
+ for (const auto & t : vocab.token_to_id) {
+ if (
+ // TODO: gemma "<end_of_turn>" is exported as a normal token, so the following check does not work
+ // need to fix convert script
+ //vocab.id_to_token[t.second].type == LLAMA_TOKEN_TYPE_CONTROL &&
+ (t.first == "<|eot_id|>" ||
+ t.first == "<|im_emd|>" ||
+ t.first == "<end_of_turn>"
+ )
+ ) {
+ vocab.special_eot_id = t.second;
+ break;
+ }
+ }
+ }
}
// build special tokens cache
@@ -4498,14 +4522,19 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
LLAMA_LOG_INFO("%s: general.name = %s\n", __func__, model.name.c_str());
// special tokens
- if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
- if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
- if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
- if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
- if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
- if (vocab.special_cls_id != -1) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
- if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
- if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
+ if (vocab.special_bos_id != -1) { LLAMA_LOG_INFO( "%s: BOS token = %d '%s'\n", __func__, vocab.special_bos_id, vocab.id_to_token[vocab.special_bos_id].text.c_str() ); }
+ if (vocab.special_eos_id != -1) { LLAMA_LOG_INFO( "%s: EOS token = %d '%s'\n", __func__, vocab.special_eos_id, vocab.id_to_token[vocab.special_eos_id].text.c_str() ); }
+ if (vocab.special_unk_id != -1) { LLAMA_LOG_INFO( "%s: UNK token = %d '%s'\n", __func__, vocab.special_unk_id, vocab.id_to_token[vocab.special_unk_id].text.c_str() ); }
+ if (vocab.special_sep_id != -1) { LLAMA_LOG_INFO( "%s: SEP token = %d '%s'\n", __func__, vocab.special_sep_id, vocab.id_to_token[vocab.special_sep_id].text.c_str() ); }
+ if (vocab.special_pad_id != -1) { LLAMA_LOG_INFO( "%s: PAD token = %d '%s'\n", __func__, vocab.special_pad_id, vocab.id_to_token[vocab.special_pad_id].text.c_str() ); }
+ if (vocab.special_cls_id != -1) { LLAMA_LOG_INFO( "%s: CLS token = %d '%s'\n", __func__, vocab.special_cls_id, vocab.id_to_token[vocab.special_cls_id].text.c_str() ); }
+ if (vocab.special_mask_id != -1) { LLAMA_LOG_INFO( "%s: MASK token = %d '%s'\n", __func__, vocab.special_mask_id, vocab.id_to_token[vocab.special_mask_id].text.c_str() ); }
+
+ if (vocab.linefeed_id != -1) { LLAMA_LOG_INFO( "%s: LF token = %d '%s'\n", __func__, vocab.linefeed_id, vocab.id_to_token[vocab.linefeed_id].text.c_str() ); }
+ if (vocab.special_prefix_id != -1) { LLAMA_LOG_INFO( "%s: PRE token = %d '%s'\n", __func__, vocab.special_prefix_id, vocab.id_to_token[vocab.special_prefix_id].text.c_str() ); }
+ if (vocab.special_suffix_id != -1) { LLAMA_LOG_INFO( "%s: SUF token = %d '%s'\n", __func__, vocab.special_suffix_id, vocab.id_to_token[vocab.special_suffix_id].text.c_str() ); }
+ if (vocab.special_middle_id != -1) { LLAMA_LOG_INFO( "%s: MID token = %d '%s'\n", __func__, vocab.special_middle_id, vocab.id_to_token[vocab.special_middle_id].text.c_str() ); }
+ if (vocab.special_eot_id != -1) { LLAMA_LOG_INFO( "%s: EOT token = %d '%s'\n", __func__, vocab.special_eot_id, vocab.id_to_token[vocab.special_eot_id].text.c_str() ); }
}
// Returns false if cancelled by progress_callback
@@ -13268,16 +13297,14 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
GGML_ASSERT(ctx);
const int64_t t_start_sample_us = ggml_time_us();
- bool allow_eos = false;
+ bool allow_eog = false;
for (const auto & stack : grammar->stacks) {
if (stack.empty()) {
- allow_eos = true;
+ allow_eog = true;
break;
}
}
- const llama_token eos = llama_token_eos(&ctx->model);
-
std::vector<std::pair<std::vector<uint32_t>, llama_partial_utf8>> candidates_decoded;
candidates_decoded.reserve(candidates->size);
std::vector<llama_grammar_candidate> candidates_grammar;
@@ -13286,8 +13313,8 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
for (size_t i = 0; i < candidates->size; ++i) {
const llama_token id = candidates->data[i].id;
const std::string piece = llama_token_to_piece(ctx, id);
- if (id == eos) {
- if (!allow_eos) {
+ if (llama_token_is_eog(&ctx->model, id)) {
+ if (!allow_eog) {
candidates->data[i].logit = -INFINITY;
}
} else if (piece.empty() || piece[0] == 0) {
@@ -13476,7 +13503,7 @@ llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_arra
void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar * grammar, llama_token token) {
const int64_t t_start_sample_us = ggml_time_us();
- if (token == llama_token_eos(&ctx->model)) {
+ if (llama_token_is_eog(&ctx->model, token)) {
for (const auto & stack : grammar->stacks) {
if (stack.empty()) {
return;
@@ -16880,6 +16907,13 @@ llama_token_type llama_token_get_type(const struct llama_model * model, llama_to
return model->vocab.id_to_token[token].type;
}
+bool llama_token_is_eog(const struct llama_model * model, llama_token token) {
+ return token != -1 && (
+ token == llama_token_eos(model) ||
+ token == llama_token_eot(model)
+ );
+}
+
llama_token llama_token_bos(const struct llama_model * model) {
return model->vocab.special_bos_id;
}