diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-02-17 23:04:16 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-17 23:04:16 +0200 |
commit | 8f1be0d42f23016cb6819dbae01126699c4bd9bc (patch) | |
tree | 4a142e745a73307190e9c5ef5c41aeb4aadaca7a /ggml-alloc.c | |
parent | 6e4e973b2615f8d390b1c4f4a7e05a119078bb0f (diff) |
ggml : add ALiBi support for ggml_soft_max_ext (#5488)
* ggml : avoid recomputing alibi slopes (CPU)
* llama : reuse hparams.f_max_alibi_bias in all cases
ggml-ci
* ggml : support alibi bias in ggml_soft_max_ext (CPU + Metal)
ggml-ci
* ggml : handle all SRCs (do not break on first null)
ggml-ci
* tests : do not use slope for large soft_max
accumulates too much error
ggml-ci
* ggml : alternative ALiBi without extra tensor
We compute the slopes in the kernel
ggml-ci
* cuda : add ALiBi support in ggml_soft_max_ext
ggml-ci
* ggml : deprecate ggml_alibi
* ggml : support multi-sequence ALiBi (Metal)
ggml-ci
* cuda : add multi-seq ALiBi + remote F16 soft_max
ggml-ci
* ggml : update deprecation message
* ggml : fix pos ptr when no ALiBi
ggml-ci
* cuda : fix performance (pow -> powf)
* cuda : precompute ALiBi constants
* metal : pre-compute ALiBi slopes
ggml-ci
* llama : init kq_pos only if needed
ggml-ci
* test-backend-ops : add null pos test to soft_max
test-backend-ops : replace soft_max tests
ggml-ci
---------
Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'ggml-alloc.c')
-rw-r--r-- | ggml-alloc.c | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/ggml-alloc.c b/ggml-alloc.c index c28c37c4..d4123564 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -551,7 +551,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr } for (int j = 0; j < GGML_MAX_SRC; j++) { if (graph->nodes[i]->src[j] == NULL) { - break; + continue; } if (graph->nodes[i]->src[j]->flags & GGML_TENSOR_FLAG_INPUT) { ggml_gallocr_allocate_node(galloc, graph->nodes[i]->src[j], get_node_buffer_id(node_buffer_ids, i)); @@ -787,7 +787,7 @@ static bool ggml_gallocr_needs_realloc(ggml_gallocr_t galloc, struct ggml_cgraph for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { - break; + continue; } if (!ggml_gallocr_node_needs_realloc(galloc, src, node_alloc, &node_alloc->src[j])) { #ifndef NDEBUG @@ -833,7 +833,7 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph) for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { - break; + continue; } ggml_gallocr_init_tensor(galloc, src, node_alloc, &node_alloc->src[j]); } |