From 5bf3953d7e9831ea22b0bc017ce97409b801ccf1 Mon Sep 17 00:00:00 2001 From: slaren Date: Sun, 24 Dec 2023 14:34:22 +0100 Subject: cuda : improve cuda pool efficiency using virtual memory (#4606) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cuda : improve cuda pool efficiency using virtual memory * fix mixtral * fix cmake build * check for vmm support, disable for hip ggml-ci * fix hip build * clarify granularity * move all caps to g_device_caps * refactor error checking * add cuda_pool_alloc, refactor most pool allocations ggml-ci * fix hip build * CUBLAS_TF32_TENSOR_OP_MATH is not a macro * more hip crap * llama : fix msvc warnings * ggml : fix msvc warnings * minor * minor * cuda : fallback to CPU on host buffer alloc fail * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler * ensure allocations are always aligned * act_size -> actual_size --------- Co-authored-by: Johannes Gäßler --- ggml.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ggml.c') diff --git a/ggml.c b/ggml.c index 3656422d..73600ab0 100644 --- a/ggml.c +++ b/ggml.c @@ -19351,7 +19351,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) { data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data; } gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n); - free(data); + free((void *)data); } else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) { GGML_ASSERT(false && "nested arrays not supported"); } else { -- cgit v1.2.3