From c4f496648c1e32efeb714200e7eae7fc7cfbb223 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 7 Sep 2023 15:49:09 +0300 Subject: metal : fix kernel_norm (fixes Falcon on Metal) (#3057) * metal : fix kernel_norm ggml-ci * metal : put warning in kernel_norm to not combine the loops * metal : restore original F16 mat-vec multiplication It works after the norm fixes * common : don't do warm-up with more than n_batch tokens (close #3058) ggml-ci * metal : minor --- common/common.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'common/common.cpp') diff --git a/common/common.cpp b/common/common.cpp index 22f65ac4..28b7c630 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -773,7 +773,7 @@ std::tuple llama_init_from_gpt_par LOG("warming up the model with an empty run\n"); const std::vector tmp = { llama_token_bos(lctx), llama_token_eos(lctx), }; - llama_eval(lctx, tmp.data(), tmp.size(), 0, params.n_threads); + llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads); llama_reset_timings(lctx); } -- cgit v1.2.3