From 0bf4d99774aa3b6d00ef564acbc4dc211e45db33 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Fri, 4 Oct 2024 11:22:57 +0300 Subject: Do not quantize activations if not necessary (#79) * Do not quantize activations if not necessary * Do not quantize activations if not necessary also for MoE models --------- Co-authored-by: Iwan Kawrakow --- ggml/include/ggml.h | 1 + 1 file changed, 1 insertion(+) (limited to 'ggml/include') diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index b1aebd21..13aaeafb 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -654,6 +654,7 @@ extern "C" { // since https://github.com/ggerganov/ggml/issues/287 struct ggml_cplan { size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()` + size_t q_size; uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()` int n_threads; -- cgit v1.2.3