llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
author: Radoslav Gerganov <rgerganov@gmail.com> 2024-06-03 20:03:26 +0300
committer: GitHub <noreply@github.com> 2024-06-03 20:03:26 +0300
commit: bde7cd3cd949c1a85d3a199498ac98e78039d46f (patch)
tree: 738364c20669cea19d3ca5df2baec036429b9b40 /ggml-alloc.c
parent: a5735e4426b19a3ebd0c653ad8ac01420458ee95 (diff)
1 files changed, 3 insertions, 3 deletions
diff --git a/ggml-alloc.c b/ggml-alloc.c
index 0146946e..73a3c157 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
                 // this tensor was allocated without ggml-backend
                 return;
             }
-            ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
+            ggml_backend_view_init(tensor);
         }
     } else {
         if (tensor->data == NULL) {
@@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
             if (t->view_src == NULL) {
                 ggml_tallocr_alloc(&tallocr, t);
             } else if (t->buffer == NULL) {
-                ggml_backend_view_init(buffer, t);
+                ggml_backend_view_init(t);
             }
         } else {
             if (t->view_src != NULL && t->buffer == NULL) {
                 // view of a pre-allocated tensor
-                ggml_backend_view_init(buffer, t);
+                ggml_backend_view_init(t);
             }
         }
     }
author	Radoslav Gerganov <rgerganov@gmail.com>	2024-06-03 20:03:26 +0300
committer	GitHub <noreply@github.com>	2024-06-03 20:03:26 +0300
commit	bde7cd3cd949c1a85d3a199498ac98e78039d46f (patch)
tree	738364c20669cea19d3ca5df2baec036429b9b40 /ggml-alloc.c
parent	a5735e4426b19a3ebd0c653ad8ac01420458ee95 (diff)