summaryrefslogtreecommitdiff
path: root/ggml/include
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-10-04 11:22:57 +0300
committerGitHub <noreply@github.com>2024-10-04 11:22:57 +0300
commit0bf4d99774aa3b6d00ef564acbc4dc211e45db33 (patch)
tree01ee9fc68059f4eaa24a264576797af0431e42b7 /ggml/include
parentba392802ef41d7e77092a0f7102fdacf73aaeacf (diff)
Do not quantize activations if not necessary (#79)
* Do not quantize activations if not necessary * Do not quantize activations if not necessary also for MoE models --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/include')
-rw-r--r--ggml/include/ggml.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index b1aebd21..13aaeafb 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -654,6 +654,7 @@ extern "C" {
// since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan {
size_t work_size; // size of work buffer, calculated by `ggml_graph_plan()`
+ size_t q_size;
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
int n_threads;