summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>2024-02-05 06:13:57 -0500
committerGitHub <noreply@github.com>2024-02-05 13:13:57 +0200
commitabb61944a5f64dec62c893ed0db10790169b672a (patch)
tree9ba841d2219cc0ea717a792e6c2e63a3fa4baa9d
parent89503dcb5f764a5cc7093db1f395f5121876a2cc (diff)
ggml : avoid duplicating function calls using MIN/MAX macros (#5325)
* Avoid duplicating function calls when using MIN/MAX macros. Since these copy "a" and "b" they ask the compiler to evaluate one of them twice. The compiler doesn't have a problem with removing the duplication in something like MAX(0, x + 2), but in some cases we're calling functions, and those calls just happen twice. By explicitly evaluating at the expression we get smaller and faster code without duplicate calls. See ggml_rope_yarn_corr_dims in Compiler Explorer: https://godbolt.org/z/Ee4KMrvKh Code behaves exactly the same. * Update ggml.c --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
-rw-r--r--ggml.c9
1 files changed, 6 insertions, 3 deletions
diff --git a/ggml.c b/ggml.c
index ee994c87..b9ec0c98 100644
--- a/ggml.c
+++ b/ggml.c
@@ -2470,7 +2470,8 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
size_t max_size = 0;
for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
- max_size = MAX(max_size, ggml_nbytes(tensor));
+ size_t bytes = ggml_nbytes(tensor);
+ max_size = MAX(max_size, bytes);
}
return max_size;
@@ -11887,8 +11888,10 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
) {
// start and end correction dims
- dims[0] = MAX(0, floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)));
- dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)));
+ float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base));
+ float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base));
+ dims[0] = MAX(0, start);
+ dims[1] = MIN(n_dims - 1, end);
}
static void ggml_compute_forward_rope_f32(