llama : offload to RPC in addition to other backends (#7640)

* llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com>
author: Radoslav Gerganov <rgerganov@gmail.com> 2024-06-03 20:03:26 +0300
committer: GitHub <noreply@github.com> 2024-06-03 20:03:26 +0300
commit: bde7cd3cd949c1a85d3a199498ac98e78039d46f (patch)
tree: 738364c20669cea19d3ca5df2baec036429b9b40 /ggml-rpc.cpp
parent: a5735e4426b19a3ebd0c653ad8ac01420458ee95 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/ggml-rpc.cpp b/ggml-rpc.cpp
index 49a20df4..679ce4f2 100644
--- a/ggml-rpc.cpp
+++ b/ggml-rpc.cpp
@@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
     if (remote_ptr != 0) {
         ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
             ggml_backend_rpc_buffer_interface,
-            new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"},
+            new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
             remote_size);
         return buffer;
     } else {
@@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
 GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
     ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
         /* .endpoint  = */ endpoint,
-        /* .name      = */ "RPC",
+        /* .name      = */ "RPC[" + std::string(endpoint) + "]",
     };
 
     ggml_backend_t backend = new ggml_backend {
author	Radoslav Gerganov <rgerganov@gmail.com>	2024-06-03 20:03:26 +0300
committer	GitHub <noreply@github.com>	2024-06-03 20:03:26 +0300
commit	bde7cd3cd949c1a85d3a199498ac98e78039d46f (patch)
tree	738364c20669cea19d3ca5df2baec036429b9b40 /ggml-rpc.cpp
parent	a5735e4426b19a3ebd0c653ad8ac01420458ee95 (diff)