summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-04-10 17:24:14 +0200
committerGitHub <noreply@github.com>2024-04-10 17:24:14 +0200
commit4f407a0a353dae4726c74cc33250b623a4911dd7 (patch)
tree8d2b543e08fc20853fdd7a865936ace54d23455a
parent65c64dc36f9bca5b3f100614cdd02bf12d6b3e49 (diff)
llama : add model types for mixtral (#6589)
-rw-r--r--llama.cpp30
1 files changed, 21 insertions, 9 deletions
diff --git a/llama.cpp b/llama.cpp
index 8dbf4748..9ad9b10c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1705,6 +1705,8 @@ enum e_model {
MODEL_MEDIUM,
MODEL_LARGE,
MODEL_XL,
+ MODEL_8x7B,
+ MODEL_8x22B,
};
static const size_t kiB = 1024;
@@ -3558,6 +3560,8 @@ static const char * llama_model_type_name(e_model type) {
case MODEL_MEDIUM: return "0.4B";
case MODEL_LARGE: return "0.8B";
case MODEL_XL: return "1.5B";
+ case MODEL_8x7B: return "8x7B";
+ case MODEL_8x22B: return "8x22B";
default: return "?B";
}
}
@@ -3672,15 +3676,23 @@ static void llm_load_hparams(
{
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
- switch (hparams.n_layer) {
- case 22: model.type = e_model::MODEL_1B; break;
- case 26: model.type = e_model::MODEL_3B; break;
- case 32: model.type = e_model::MODEL_7B; break;
- case 40: model.type = e_model::MODEL_13B; break;
- case 48: model.type = e_model::MODEL_34B; break;
- case 60: model.type = e_model::MODEL_30B; break;
- case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
- default: model.type = e_model::MODEL_UNKNOWN;
+ if (hparams.n_expert == 8) {
+ switch (hparams.n_layer) {
+ case 32: model.type = e_model::MODEL_8x7B; break;
+ case 56: model.type = e_model::MODEL_8x22B; break;
+ default: model.type = e_model::MODEL_UNKNOWN;
+ }
+ } else {
+ switch (hparams.n_layer) {
+ case 22: model.type = e_model::MODEL_1B; break;
+ case 26: model.type = e_model::MODEL_3B; break;
+ case 32: model.type = e_model::MODEL_7B; break;
+ case 40: model.type = e_model::MODEL_13B; break;
+ case 48: model.type = e_model::MODEL_34B; break;
+ case 60: model.type = e_model::MODEL_30B; break;
+ case 80: model.type = hparams.n_head == hparams.n_head_kv ? e_model::MODEL_65B : e_model::MODEL_70B; break;
+ default: model.type = e_model::MODEL_UNKNOWN;
+ }
}
} break;
case LLM_ARCH_MINICPM: