summaryrefslogtreecommitdiff
path: root/ggml-alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-alloc.c')
-rw-r--r--ggml-alloc.c106
1 files changed, 82 insertions, 24 deletions
diff --git a/ggml-alloc.c b/ggml-alloc.c
index 95a93c99..dfe5ba2e 100644
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@@ -778,38 +778,26 @@ size_t ggml_allocr_alloc_graph(ggml_allocr_t alloc, struct ggml_cgraph * graph)
}
// utils
-ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
- GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
-
- size_t alignment = ggml_backend_buft_get_alignment(buft);
-
- size_t nbytes = 0;
- for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
- if (t->data == NULL && t->view_src == NULL) {
- nbytes += GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
- }
- }
-
- if (nbytes == 0) {
- // all the tensors in the context are already allocated
-#ifndef NDEBUG
- fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
-#endif
- return NULL;
- }
- ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, nbytes);
+static bool alloc_tensor_range(struct ggml_context * ctx,
+ struct ggml_tensor * first, struct ggml_tensor * last,
+ ggml_backend_buffer_type_t buft, size_t size,
+ ggml_backend_buffer_t ** buffers, size_t * n_buffers) {
+ ggml_backend_buffer_t buffer = ggml_backend_buft_alloc_buffer(buft, size);
if (buffer == NULL) {
- // failed to allocate buffer
#ifndef NDEBUG
- fprintf(stderr, "%s: failed to allocate buffer\n", __func__);
+ fprintf(stderr, "%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(buft), size);
#endif
- return NULL;
+ for (size_t i = 0; i < *n_buffers; i++) {
+ ggml_backend_buffer_free(*buffers[i]);
+ }
+ free(buffers);
+ return false;
}
ggml_tallocr_t tallocr = ggml_tallocr_new_from_buffer(buffer);
- for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
if (t->data == NULL) {
if (t->view_src == NULL) {
ggml_tallocr_alloc(tallocr, t);
@@ -826,6 +814,76 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
ggml_tallocr_free(tallocr);
+ *buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
+ (*buffers)[(*n_buffers)++] = buffer;
+
+ return true;
+}
+
+ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_context * ctx, ggml_backend_buffer_type_t buft) {
+ GGML_ASSERT(ggml_get_no_alloc(ctx) == true);
+
+ size_t alignment = ggml_backend_buft_get_alignment(buft);
+ size_t max_size = ggml_backend_buft_get_max_size(buft);
+
+ ggml_backend_buffer_t * buffers = NULL;
+ size_t n_buffers = 0;
+
+ size_t cur_buf_size = 0;
+ struct ggml_tensor * first = ggml_get_first_tensor(ctx);
+ for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+ size_t this_size = 0;
+ if (t->data == NULL && t->view_src == NULL) {
+ this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
+ }
+
+ if (this_size > max_size) {
+ // tensor is too large to fit in a single buffer
+ fprintf(stderr, "%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
+ __func__, t->name,
+ ggml_backend_buft_name(buft),
+ this_size, max_size);
+ for (size_t i = 0; i < n_buffers; i++) {
+ ggml_backend_buffer_free(buffers[i]);
+ }
+ free(buffers);
+ return NULL;
+ }
+
+ if ((cur_buf_size + this_size) > max_size) {
+ // allocate tensors in the current buffer
+ if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
+ return NULL;
+ }
+ first = t;
+ cur_buf_size = this_size;
+ } else {
+ cur_buf_size += this_size;
+ }
+ }
+
+ // allocate remaining tensors
+ if (cur_buf_size > 0) {
+ if (!alloc_tensor_range(ctx, first, NULL, buft, cur_buf_size, &buffers, &n_buffers)) {
+ return NULL;
+ }
+ }
+
+ if (n_buffers == 0) {
+ // all the tensors in the context are already allocated
+#ifndef NDEBUG
+ fprintf(stderr, "%s: all tensors in the context are already allocated\n", __func__);
+#endif
+ return NULL;
+ }
+
+ ggml_backend_buffer_t buffer;
+ if (n_buffers == 1) {
+ buffer = buffers[0];
+ } else {
+ buffer = ggml_backend_multi_buffer_alloc_buffer(buffers, n_buffers);
+ }
+ free(buffers);
return buffer;
}