diff options
author | slaren <slarengh@gmail.com> | 2024-03-18 11:03:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-18 11:03:04 +0100 |
commit | 2bf8d0f7c4cc1235755ad06961ca761e458c5e55 (patch) | |
tree | d2a462deb3c0e34cfb26eab6881a65bfb9fc3b28 /ggml-alloc.c | |
parent | 496bc79bc2b79bfd6124b8687a8dbd6a646e9b06 (diff) |
backend : offload large batches to GPU (#6083)
* backend : offload large batches to GPU
* fix hip
* code cleanup
* fix CUDA split buffers
* Update ggml-backend-impl.h
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
* cuda : fix memset without set_device
* imatrix : remove sched affix from weight names
* sched : add a new split if the current one has too many inputs
reduce max inputs per split
more cleanup
* update backends
ggml-ci
---------
Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Diffstat (limited to 'ggml-alloc.c')
-rw-r--r-- | ggml-alloc.c | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/ggml-alloc.c b/ggml-alloc.c index 8ac1d3e5..643b2e55 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -548,7 +548,11 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr for (int i = 0; i < graph->n_nodes; i++) { struct ggml_tensor * node = graph->nodes[i]; - if (ggml_is_view(node)) { + // TODO: better way to add external dependencies + // GGML_OP_NONE does not appear normally in the graph nodes, but is used by ggml-backend to add dependencies to + // control when some tensors are allocated and freed. in this case, the dependencies are in `src`, but the node + // itself is never used and should not be considered a dependency + if (ggml_is_view(node) && node->op != GGML_OP_NONE) { struct ggml_tensor * view_src = node->view_src; ggml_gallocr_hash_get(galloc, view_src)->n_views += 1; } @@ -565,8 +569,8 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr ggml_gallocr_hash_get(galloc, src)->n_children += 1; - // allocate explicit inputs and leafs - if (src->flags & GGML_TENSOR_FLAG_INPUT || src->op == GGML_OP_NONE) { + // allocate explicit inputs + if (src->flags & GGML_TENSOR_FLAG_INPUT) { ggml_gallocr_allocate_node(galloc, src, get_node_buffer_id(node_buffer_ids, i)); } } |