summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp12
1 files changed, 1 insertions, 11 deletions
diff --git a/llama.cpp b/llama.cpp
index 81829b13..d28382f7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8747,8 +8747,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// placeholder for the meta data
::zeros(fout, meta_size);
- std::set<ggml_type> used_iq2;
-
for (int i = 0; i < ml.n_tensors; ++i) {
struct ggml_tensor * tensor = ml.get_tensor_meta(i);
@@ -8801,11 +8799,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
} else {
const size_t nelements = ggml_nelements(tensor);
- if ((new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_XS) && used_iq2.find(new_type) == used_iq2.end()) {
- ggml_init_iq2_quantization(new_type);
- used_iq2.insert(new_type);
- }
-
const float * imatrix = nullptr;
if (imatrix_data) {
auto it = imatrix_data->find(tensor->name);
@@ -8931,10 +8924,6 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
fout.close();
- for (auto type : used_iq2) {
- ggml_deinit_iq2_quantization(type);
- }
-
gguf_free(ctx_out);
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
@@ -9342,6 +9331,7 @@ void llama_backend_free(void) {
#ifdef GGML_USE_MPI
ggml_mpi_backend_free();
#endif
+ ggml_quantize_free();
}
int64_t llama_time_us(void) {