summaryrefslogtreecommitdiff
path: root/ggml-vulkan.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-03-09 15:53:59 +0200
committerGitHub <noreply@github.com>2024-03-09 15:53:59 +0200
commit5b09797321430f08caf0473143a962916ab2ea89 (patch)
treeb6077558c8b25bbe073736c0f13c6ef169842300 /ggml-vulkan.cpp
parent97c09585d65a95864773b4d25d66d0f708baf38d (diff)
ggml : remove old quantization functions (#5942)
* ggml : remove old quantization functions ggml-ci * ggml : simplify ggml_quantize_chunk ggml-ci * ggml : restrict correctness ggml-ci * ggml : remove hist data from the quantization API ggml-ci * tests : remove hist usage in test-backend-ops ggml-ci * vulkan : remove hist and fix typo
Diffstat (limited to 'ggml-vulkan.cpp')
-rw-r--r--ggml-vulkan.cpp40
1 files changed, 1 insertions, 39 deletions
diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp
index 5a1b3f47..d41aa7d2 100644
--- a/ggml-vulkan.cpp
+++ b/ggml-vulkan.cpp
@@ -4102,45 +4102,7 @@ static void ggml_vk_test_transfer(ggml_backend_vk_context * ctx, size_t ne, bool
}
static void ggml_vk_quantize_data(const float * from, void * to, size_t ne, ggml_type quant) {
- std::vector<int64_t> hist_cur(1 << 4, 0);
-
- switch(quant) {
- case GGML_TYPE_F32:
- memcpy(to, from, sizeof(float) * ne);
- break;
- case GGML_TYPE_Q4_0:
- ggml_quantize_q4_0(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q4_1:
- ggml_quantize_q4_1(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q5_0:
- ggml_quantize_q5_0(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q5_1:
- ggml_quantize_q5_1(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q8_0:
- ggml_quantize_q8_0(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q2_K:
- ggml_quantize_q2_K(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q3_K:
- ggml_quantize_q3_K(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q4_K:
- ggml_quantize_q4_K(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q5_K:
- ggml_quantize_q5_K(from, to, ne, ne, hist_cur.data());
- break;
- case GGML_TYPE_Q6_K:
- ggml_quantize_q6_K(from, to, ne, ne, hist_cur.data());
- break;
- default:
- GGML_ASSERT(false);
- }
+ ggml_quantize_chunk(quant, from, to, 0, 1, ne, nullptr);
}
static void ggml_vk_test_dequant(ggml_backend_vk_context * ctx, size_t ne, ggml_type quant) {