From 0bf4d99774aa3b6d00ef564acbc4dc211e45db33 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Fri, 4 Oct 2024 11:22:57 +0300
Subject: Do not quantize activations if not necessary (#79)

* Do not quantize activations if not necessary

* Do not quantize activations if not necessary also for MoE models

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
---
 ggml/include/ggml.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'ggml/include')

diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index b1aebd21..13aaeafb 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -654,6 +654,7 @@ extern "C" {
     // since https://github.com/ggerganov/ggml/issues/287
     struct ggml_cplan {
         size_t    work_size; // size of work buffer, calculated by `ggml_graph_plan()`
+        size_t    q_size;
         uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
 
         int n_threads;
-- 
cgit v1.2.3