summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-05-10 14:28:01 +0200
committerGitHub <noreply@github.com>2024-05-10 14:28:01 +0200
commit25c6e82e7a1ad25a42b0894e87d9b5c557409516 (patch)
tree06475eb8fa3f183d2f5cb4aea8cf926a3c6ce036 /llama.cpp
parent4e3880978f8b1bf546dd4e6f3b524d6b8739c49c (diff)
llama : use n_vocab to differentiate between mistral 7B and llama3 8B (#7200)
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index e7b3fd8b..2f1123d4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3860,7 +3860,7 @@ static void llm_load_hparams(
switch (hparams.n_layer) {
case 22: model.type = e_model::MODEL_1B; break;
case 26: model.type = e_model::MODEL_3B; break;
- case 32: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_7B : e_model::MODEL_8B; break; // LLaMa 8B v3 uses GQA
+ case 32: model.type = hparams.n_vocab < 40000 ? e_model::MODEL_7B : e_model::MODEL_8B; break;
case 40: model.type = e_model::MODEL_13B; break;
case 48: model.type = e_model::MODEL_34B; break;
case 60: model.type = e_model::MODEL_30B; break;