imatrix : offload to GPU support (#4957)

* backend : add eval callback ggml-ci * backend : group nodes in a single compute when user don't need them * backend : clean-up the implementation ggml-ci * simple : do not perform tensor data copy if not needed * simple : fix * imatrix : offload to GPU support * imatrix : fix ggml_mul_mat_id hanlding ggml-ci * ci : add imatrix test ggml-ci * ci : rearrange output ggml-ci
author: Georgi Gerganov <ggerganov@gmail.com> 2024-01-17 18:46:30 +0200
committer: GitHub <noreply@github.com> 2024-01-17 18:46:30 +0200
commit: ba69bbc84ced580fe4fdb0713ca2d95634325b7a (patch)
tree: 75628460a0036db7ca1105bf3093f76c71ab3eed /ggml.c
parent: 44a1a4a41a4c0b03afaa7d9e06bcbc7cf95aa1e6 (diff)
1 files changed, 0 insertions, 14 deletions
diff --git a/ggml.c b/ggml.c
index d7e01b81..35fd29a9 100644
--- a/ggml.c
+++ b/ggml.c
@@ -394,12 +394,6 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float);
 static void ggml_vec_dot_f32(const int n, float * restrict s, const float * restrict x, const float * restrict y);
 static void ggml_vec_dot_f16(const int n, float * restrict s, ggml_fp16_t * restrict x, ggml_fp16_t * restrict y);
 
-ggml_collect_imatrix_t g_imatrix_collect = NULL;
-
-void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect) {
-    g_imatrix_collect = imatrix_collect;
-}
-
 static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
     [GGML_TYPE_I8] = {
         .type_name                = "i8",
@@ -9790,10 +9784,6 @@ static void ggml_compute_forward_mul_mat(
     const int ith = params->ith;
     const int nth = params->nth;
 
-    if (ith == 1 && g_imatrix_collect) {
-        g_imatrix_collect(src0, src1);
-    }
-
     const enum ggml_type type = src0->type;
 
     const bool src1_cont = ggml_is_contiguous(src1);
@@ -10097,10 +10087,6 @@ static void ggml_compute_forward_mul_mat_id(
 
         const struct ggml_tensor * src0_cur = dst->src[cur_a + 2];
 
-        if (ith == 1 && g_imatrix_collect) {
-            g_imatrix_collect(src0_cur, src1);
-        }
-
         const void * wdata    = (src1->type == vec_dot_type) ? src1->data : params->wdata;
         const size_t row_size = ggml_row_size(vec_dot_type, ne10);
author	Georgi Gerganov <ggerganov@gmail.com>	2024-01-17 18:46:30 +0200
committer	GitHub <noreply@github.com>	2024-01-17 18:46:30 +0200
commit	ba69bbc84ced580fe4fdb0713ca2d95634325b7a (patch)
tree	75628460a0036db7ca1105bf3093f76c71ab3eed /ggml.c
parent	44a1a4a41a4c0b03afaa7d9e06bcbc7cf95aa1e6 (diff)