summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorRadoslav Gerganov <rgerganov@gmail.com>2024-05-14 14:27:19 +0300
committerGitHub <noreply@github.com>2024-05-14 14:27:19 +0300
commit5e31828d3e35c76ecfee665bc23771a4bec1d130 (patch)
tree7f5f2edc7c3fc3e7655904316897e32202edd5d6 /llama.h
parent541600201e6480f54ae09e58d16b154d4b4b331d (diff)
ggml : add RPC backend (#6829)
* ggml : add RPC backend The RPC backend proxies all operations to a remote server which runs a regular backend (CPU, CUDA, Metal, etc). * set TCP_NODELAY * add CI workflows * Address review comments * fix warning * implement llama_max_devices() for RPC * Address review comments * Address review comments * wrap sockfd into a struct * implement get_alignment and get_max_size * add get_device_memory * fix warning * win32 support * add README * readme : trim trailing whitespace * Address review comments * win32 fix * Address review comments * fix compile warnings on macos
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 0b2e708d..612e32c4 100644
--- a/llama.h
+++ b/llama.h
@@ -242,6 +242,9 @@ extern "C" {
// proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
const float * tensor_split;
+ // comma separated list of RPC servers to use for offloading
+ const char * rpc_servers;
+
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
// If the provided progress_callback returns true, model loading continues.
// If it returns false, model loading is immediately aborted.