sync : ggml (backend v2) (#3912)

* sync : ggml (backend v2) (wip) * sync : migrate examples and llama.cpp to dynamic graphs (wip) * sync : update tests + fix max op params to 64 ggml-ci * sync : ggml-cuda ggml-ci * llama : fix save/load state context size ggml-ci * sync : try to fix build on tvOS * sync : pass custom graph sizes in training examples * sync : update graph copies to new ggml API * sync : update sync-ggml.sh with new files * scripts : fix header in sync script * train : fix context size calculations * llama : increase inference graph size up to 4096 nodes * train : allocate grads for backward graphs * train : allocate grads for gb_tmp
author: Georgi Gerganov <ggerganov@gmail.com> 2023-11-13 14:16:23 +0200
committer: GitHub <noreply@github.com> 2023-11-13 14:16:23 +0200
commit: 4760e7cc0b68570d58f55e8dda469805d1759d0d (patch)
tree: cd983b1f2833f0094c0539f7943703c6787bf12b /ggml-metal.m
parent: bb50a792ec2a49944470c82694fa364345e95170 (diff)
1 files changed, 16 insertions, 9 deletions
diff --git a/ggml-metal.m b/ggml-metal.m
index 78ae4485..c2cda0bf 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -1,5 +1,6 @@
 #import "ggml-metal.h"
 
+#import "ggml-backend-impl.h"
 #import "ggml.h"
 
 #import <Foundation/Foundation.h>
@@ -23,7 +24,7 @@
 
 #define UNUSED(x) (void)(x)
 
-#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
+#define GGML_MAX_CONCUR (2*GGML_DEFAULT_GRAPH_SIZE)
 
 struct ggml_metal_buffer {
     const char * name;
@@ -744,6 +745,20 @@ void ggml_metal_graph_compute(
                 struct ggml_tensor * src1 = gf->nodes[i]->src[1];
                 struct ggml_tensor * dst  = gf->nodes[i];
 
+                switch (dst->op) {
+                    case GGML_OP_NONE:
+                    case GGML_OP_RESHAPE:
+                    case GGML_OP_VIEW:
+                    case GGML_OP_TRANSPOSE:
+                    case GGML_OP_PERMUTE:
+                        {
+                            // noop -> next node
+                        } continue;
+                    default:
+                        {
+                        } break;
+                }
+
                 const int64_t  ne00 = src0 ? src0->ne[0] : 0;
                 const int64_t  ne01 = src0 ? src0->ne[1] : 0;
                 const int64_t  ne02 = src0 ? src0->ne[2] : 0;
@@ -797,14 +812,6 @@ void ggml_metal_graph_compute(
                 //}
 
                 switch (dst->op) {
-                    case GGML_OP_NONE:
-                    case GGML_OP_RESHAPE:
-                    case GGML_OP_VIEW:
-                    case GGML_OP_TRANSPOSE:
-                    case GGML_OP_PERMUTE:
-                        {
-                            // noop
-                        } break;
                     case GGML_OP_CONCAT:
                         {
                             const int64_t nb = ne00;
author	Georgi Gerganov <ggerganov@gmail.com>	2023-11-13 14:16:23 +0200
committer	GitHub <noreply@github.com>	2023-11-13 14:16:23 +0200
commit	4760e7cc0b68570d58f55e8dda469805d1759d0d (patch)
tree	cd983b1f2833f0094c0539f7943703c6787bf12b /ggml-metal.m
parent	bb50a792ec2a49944470c82694fa364345e95170 (diff)