summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorKunshang Ji <kunshang.ji@intel.com>2023-09-08 09:46:56 +0800
committerGitHub <noreply@github.com>2023-09-08 03:46:56 +0200
commit7f412dab9c8801f5d37904f7dce1faf4c2b43b42 (patch)
treebc1b629e787a261378a7b123b1e7299464ab2c7f /llama.cpp
parent6336d834ec7bff3e93e24182c0f609d2f2bdce26 (diff)
enable CPU HBM (#2603)
* add cpu hbm support * add memalign 0 byte check * Update ggml.c * Update llama.cpp * ggml : allow ggml_init with 0 size * retrigger ci * fix code style --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp12
1 files changed, 11 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index 208dcef0..cab7156f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -126,6 +126,9 @@ void replace_all(std::string & s, const std::string & search, const std::string
}
s = std::move(result);
}
+#ifdef GGML_USE_CPU_HBM
+#include <hbwmalloc.h>
+#endif
static void zeros(std::ofstream & file, size_t n) {
char zero = 0;
@@ -450,6 +453,9 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph *
#elif GGML_USE_METAL
# define llama_host_malloc(n) ggml_metal_host_malloc(n)
# define llama_host_free(data) ggml_metal_host_free(data)
+#elif GGML_USE_CPU_HBM
+# define llama_host_malloc(n) hbw_malloc(n)
+# define llama_host_free(data) if (data != NULL) hbw_free(data)
#else
# define llama_host_malloc(n) malloc(n)
# define llama_host_free(data) free(data)
@@ -1489,7 +1495,11 @@ struct llama_model_loader {
// allocate temp buffer if not using mmap
if (!use_mmap && cur->data == NULL) {
GGML_ASSERT(cur->backend != GGML_BACKEND_CPU);
- cur->data = malloc(ggml_nbytes(cur));
+ #ifdef GGML_USE_CPU_HBM
+ cur->data = (uint8_t*)hbw_malloc(ggml_nbytes(cur));
+ #else
+ cur->data = (uint8_t*)malloc(ggml_nbytes(cur));
+ #endif
}
load_data_for(cur);