summaryrefslogtreecommitdiff
path: root/examples/quantize/quantize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/quantize/quantize.cpp')
-rw-r--r--examples/quantize/quantize.cpp103
1 files changed, 59 insertions, 44 deletions
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index da1850df..432cc2b4 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -8,7 +8,6 @@
#include <unordered_map>
#include <fstream>
#include <cmath>
-#include <algorithm>
struct quant_option {
std::string name;
@@ -53,6 +52,10 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = {
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },
};
+static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = "quantize.imatrix.file";
+static const char * const LLM_KV_QUANTIZE_IMATRIX_DATASET = "quantize.imatrix.dataset";
+static const char * const LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES = "quantize.imatrix.entries_count";
+static const char * const LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS = "quantize.imatrix.chunks_count";
static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std::string & ftype_str_out) {
std::string ftype_str;
@@ -113,7 +116,7 @@ static void usage(const char * executable) {
exit(1);
}
-static void load_imatrix(const std::string & imatrix_file, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
+static int load_imatrix(const std::string & imatrix_file, std::string & imatrix_dataset, std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
std::ifstream in(imatrix_file.c_str(), std::ios::binary);
if (!in) {
printf("%s: failed to open %s\n",__func__, imatrix_file.c_str());
@@ -160,18 +163,33 @@ static void load_imatrix(const std::string & imatrix_file, std::unordered_map<st
printf("%s: loaded data (size = %6d, ncall = %6d) for '%s'\n", __func__, int(e.size()), ncall, name.c_str());
}
}
- printf("%s: loaded %d importance matrix entries from %s\n", __func__, int(imatrix_data.size()), imatrix_file.c_str());
+
+ // latest imatrix version contains the dataset filename at the end of the file
+ int m_last_call = 0;
+ if (in.peek() != EOF) {
+ in.read((char *)&m_last_call, sizeof(m_last_call));
+ int dataset_len;
+ in.read((char *)&dataset_len, sizeof(dataset_len));
+ std::vector<char> dataset_as_vec(dataset_len);
+ in.read(dataset_as_vec.data(), dataset_len);
+ imatrix_dataset.assign(dataset_as_vec.begin(), dataset_as_vec.end());
+ printf("%s: imatrix dataset='%s'\n", __func__, imatrix_dataset.c_str());
+ }
+ printf("%s: loaded %d importance matrix entries from %s computed on %d chunks\n", __func__, int(imatrix_data.size()), imatrix_file.c_str(), m_last_call);
+ return m_last_call;
}
-static void prepare_imatrix(const std::string & imatrix_file,
+static int prepare_imatrix(const std::string & imatrix_file,
+ std::string & imatrix_dataset,
const std::vector<std::string> & included_weights,
const std::vector<std::string> & excluded_weights,
std::unordered_map<std::string, std::vector<float>> & imatrix_data) {
+ int m_last_call = -1;
if (!imatrix_file.empty()) {
- load_imatrix(imatrix_file, imatrix_data);
+ m_last_call = load_imatrix(imatrix_file, imatrix_dataset, imatrix_data);
}
if (imatrix_data.empty()) {
- return;
+ return m_last_call;
}
if (!excluded_weights.empty()) {
for (auto& name : excluded_weights) {
@@ -197,6 +215,7 @@ static void prepare_imatrix(const std::string & imatrix_file,
if (!imatrix_data.empty()) {
printf("%s: have %d importance matrix entries\n", __func__, int(imatrix_data.size()));
}
+ return m_last_call;
}
static ggml_type parse_ggml_type(const char * arg) {
@@ -211,43 +230,6 @@ static ggml_type parse_ggml_type(const char * arg) {
return result;
}
-static bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
- const char* sep = strchr(data, '=');
- if (sep == nullptr || sep - data >= 128) {
- fprintf(stderr, "%s: malformed KV override '%s'\n", __func__, data);
- return false;
- }
- llama_model_kv_override kvo;
- std::strncpy(kvo.key, data, sep - data);
- kvo.key[sep - data] = 0;
- sep++;
- if (strncmp(sep, "int:", 4) == 0) {
- sep += 4;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
- kvo.int_value = std::atol(sep);
- } else if (strncmp(sep, "float:", 6) == 0) {
- sep += 6;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
- kvo.float_value = std::atof(sep);
- } else if (strncmp(sep, "bool:", 5) == 0) {
- sep += 5;
- kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
- if (std::strcmp(sep, "true") == 0) {
- kvo.bool_value = true;
- } else if (std::strcmp(sep, "false") == 0) {
- kvo.bool_value = false;
- } else {
- fprintf(stderr, "%s: invalid boolean value for KV override '%s'\n", __func__, data);
- return false;
- }
- } else {
- fprintf(stderr, "%s: invalid type for KV override '%s'\n", __func__, data);
- return false;
- }
- overrides.emplace_back(std::move(kvo));
- return true;
-}
-
int main(int argc, char ** argv) {
if (argc < 3) {
usage(argv[0]);
@@ -316,10 +298,43 @@ int main(int argc, char ** argv) {
usage(argv[0]);
}
+ std::string imatrix_dataset;
std::unordered_map<std::string, std::vector<float>> imatrix_data;
- prepare_imatrix(imatrix_file, included_weights, excluded_weights, imatrix_data);
+ int m_last_call = prepare_imatrix(imatrix_file, imatrix_dataset, included_weights, excluded_weights, imatrix_data);
if (!imatrix_data.empty()) {
params.imatrix = &imatrix_data;
+ {
+ llama_model_kv_override kvo;
+ std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_FILE);
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+ strncpy(kvo.val_str, imatrix_file.c_str(), 127);
+ kvo.val_str[127] = '\0';
+ kv_overrides.emplace_back(std::move(kvo));
+ }
+ if (!imatrix_dataset.empty()) {
+ llama_model_kv_override kvo;
+ std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_DATASET);
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
+ strncpy(kvo.val_str, imatrix_dataset.c_str(), 127);
+ kvo.val_str[127] = '\0';
+ kv_overrides.emplace_back(std::move(kvo));
+ }
+
+ {
+ llama_model_kv_override kvo;
+ std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_ENTRIES);
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+ kvo.val_i64 = imatrix_data.size();
+ kv_overrides.emplace_back(std::move(kvo));
+ }
+
+ if (m_last_call > 0) {
+ llama_model_kv_override kvo;
+ std::strcpy(kvo.key, LLM_KV_QUANTIZE_IMATRIX_N_CHUNKS);
+ kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
+ kvo.val_i64 = m_last_call;
+ kv_overrides.emplace_back(std::move(kvo));
+ }
}
if (!kv_overrides.empty()) {
kv_overrides.emplace_back();