summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2023-12-24 14:34:22 +0100
committerGitHub <noreply@github.com>2023-12-24 14:34:22 +0100
commit5bf3953d7e9831ea22b0bc017ce97409b801ccf1 (patch)
tree48c0136d9943fb9cca22209894464970549c24b5 /ggml.c
parent708e179e8562c2604240df95a2241dea17fd808b (diff)
cuda : improve cuda pool efficiency using virtual memory (#4606)
* cuda : improve cuda pool efficiency using virtual memory * fix mixtral * fix cmake build * check for vmm support, disable for hip ggml-ci * fix hip build * clarify granularity * move all caps to g_device_caps * refactor error checking * add cuda_pool_alloc, refactor most pool allocations ggml-ci * fix hip build * CUBLAS_TF32_TENSOR_OP_MATH is not a macro * more hip crap * llama : fix msvc warnings * ggml : fix msvc warnings * minor * minor * cuda : fallback to CPU on host buffer alloc fail * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * ensure allocations are always aligned * act_size -> actual_size --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml.c b/ggml.c
index 3656422d..73600ab0 100644
--- a/ggml.c
+++ b/ggml.c
@@ -19351,7 +19351,7 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
}
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
- free(data);
+ free((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
GGML_ASSERT(false && "nested arrays not supported");
} else {