From 5cb04dbc16d1da38c8fdcc0111b40e67d00dd1c3 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 31 Jan 2024 17:30:17 +0200
Subject: llama : remove LLAMA_MAX_DEVICES and LLAMA_SUPPORTS_GPU_OFFLOAD
 (#5240)

* llama : remove LLAMA_MAX_DEVICES from llama.h

ggml-ci

* Update llama.cpp

Co-authored-by: slaren <slarengh@gmail.com>

* server : remove LLAMA_MAX_DEVICES

ggml-ci

* llama : remove LLAMA_SUPPORTS_GPU_OFFLOAD

ggml-ci

* train : remove LLAMA_SUPPORTS_GPU_OFFLOAD

* readme : add deprecation notice

* readme : change deprecation notice to "remove" and fix url

* llama : remove gpu includes from llama.h

ggml-ci

---------

Co-authored-by: slaren <slarengh@gmail.com>
---
 examples/batched-bench/batched-bench.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'examples/batched-bench')
diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp
index 7924db26..b52d6845 100644
--- a/examples/batched-bench/batched-bench.cpp
+++ b/examples/batched-bench/batched-bench.cpp
@@ -88,7 +88,7 @@ int main(int argc, char ** argv) {
 
     llama_model_params model_params = llama_model_default_params();
 
-    const std::vector<float> t_split (LLAMA_MAX_DEVICES, 0.0f);
+    const std::vector<float> t_split(llama_max_devices(), 0.0f);
 
     model_params.n_gpu_layers = n_gpu_layers;
     model_params.tensor_split = t_split.data();
-- 
cgit v1.2.3