summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp18
1 files changed, 15 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 1802d231..6ebe85d0 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -406,6 +406,7 @@ enum llama_file_version {
LLAMA_FILE_VERSION_GGMF_V1, // added version field and scores in vocab
LLAMA_FILE_VERSION_GGJT_V1, // added padding
LLAMA_FILE_VERSION_GGJT_V2, // changed quantization format
+ LLAMA_FILE_VERSION_GGJT_V3, // changed Q4 and Q8 quantization format
};
struct llama_file_loader {
@@ -438,6 +439,8 @@ struct llama_file_loader {
file_version = LLAMA_FILE_VERSION_GGJT_V1;
} else if (magic == 'ggjt' && version == 2) {
file_version = LLAMA_FILE_VERSION_GGJT_V2;
+ } else if (magic == 'ggjt' && version == 3) {
+ file_version = LLAMA_FILE_VERSION_GGJT_V3;
} else {
throw format("unknown (magic, version) combination: %08x, %08x; is this really a GGML file?",
magic, version);
@@ -844,7 +847,8 @@ static const char *llama_file_version_name(llama_file_version version) {
case LLAMA_FILE_VERSION_GGML: return "'ggml' (old version with low tokenizer quality and no mmap support)";
case LLAMA_FILE_VERSION_GGMF_V1: return "ggmf v1 (old version with no mmap support)";
case LLAMA_FILE_VERSION_GGJT_V1: return "ggjt v1 (pre #1405)";
- case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (latest)";
+ case LLAMA_FILE_VERSION_GGJT_V2: return "ggjt v2 (pre #1508)";
+ case LLAMA_FILE_VERSION_GGJT_V3: return "ggjt v3 (latest)";
}
return "unknown";
@@ -924,11 +928,19 @@ static void llama_model_load_internal(
fprintf(stderr, "%s: model size = %s\n", __func__, llama_model_type_name(model.type));
}
- if (file_version != LLAMA_FILE_VERSION_GGJT_V2) {
+ if (file_version < LLAMA_FILE_VERSION_GGJT_V2) {
if (hparams.ftype != LLAMA_FTYPE_ALL_F32 &&
hparams.ftype != LLAMA_FTYPE_MOSTLY_F16 &&
hparams.ftype != LLAMA_FTYPE_MOSTLY_Q8_0) {
- throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1305)");
+ throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1405)");
+ }
+ }
+
+ if (file_version < LLAMA_FILE_VERSION_GGJT_V3) {
+ if (hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_0 ||
+ hparams.ftype == LLAMA_FTYPE_MOSTLY_Q4_1 ||
+ hparams.ftype == LLAMA_FTYPE_MOSTLY_Q8_0) {
+ throw format("this format is no longer supported (see https://github.com/ggerganov/llama.cpp/pull/1508)");
}
}