diff options
author | Kunshang Ji <kunshang.ji@intel.com> | 2023-09-08 09:46:56 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-08 03:46:56 +0200 |
commit | 7f412dab9c8801f5d37904f7dce1faf4c2b43b42 (patch) | |
tree | bc1b629e787a261378a7b123b1e7299464ab2c7f /llama.cpp | |
parent | 6336d834ec7bff3e93e24182c0f609d2f2bdce26 (diff) |
enable CPU HBM (#2603)
* add cpu hbm support
* add memalign 0 byte check
* Update ggml.c
* Update llama.cpp
* ggml : allow ggml_init with 0 size
* retrigger ci
* fix code style
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 12 |
1 files changed, 11 insertions, 1 deletions
@@ -126,6 +126,9 @@ void replace_all(std::string & s, const std::string & search, const std::string } s = std::move(result); } +#ifdef GGML_USE_CPU_HBM +#include <hbwmalloc.h> +#endif static void zeros(std::ofstream & file, size_t n) { char zero = 0; @@ -450,6 +453,9 @@ static void ggml_graph_compute_helper(std::vector<uint8_t> & buf, ggml_cgraph * #elif GGML_USE_METAL # define llama_host_malloc(n) ggml_metal_host_malloc(n) # define llama_host_free(data) ggml_metal_host_free(data) +#elif GGML_USE_CPU_HBM +# define llama_host_malloc(n) hbw_malloc(n) +# define llama_host_free(data) if (data != NULL) hbw_free(data) #else # define llama_host_malloc(n) malloc(n) # define llama_host_free(data) free(data) @@ -1489,7 +1495,11 @@ struct llama_model_loader { // allocate temp buffer if not using mmap if (!use_mmap && cur->data == NULL) { GGML_ASSERT(cur->backend != GGML_BACKEND_CPU); - cur->data = malloc(ggml_nbytes(cur)); + #ifdef GGML_USE_CPU_HBM + cur->data = (uint8_t*)hbw_malloc(ggml_nbytes(cur)); + #else + cur->data = (uint8_t*)malloc(ggml_nbytes(cur)); + #endif } load_data_for(cur); |