diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-09-07 15:49:09 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-07 15:49:09 +0300 |
commit | c4f496648c1e32efeb714200e7eae7fc7cfbb223 (patch) | |
tree | 876320eb5fa8b02682e0b0d88fe325b40da2f23a /common | |
parent | fec2fb19e4229aac58c98171c46e77144b99f8a3 (diff) |
metal : fix kernel_norm (fixes Falcon on Metal) (#3057)
* metal : fix kernel_norm
ggml-ci
* metal : put warning in kernel_norm to not combine the loops
* metal : restore original F16 mat-vec multiplication
It works after the norm fixes
* common : don't do warm-up with more than n_batch tokens (close #3058)
ggml-ci
* metal : minor
Diffstat (limited to 'common')
-rw-r--r-- | common/common.cpp | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/common/common.cpp b/common/common.cpp index 22f65ac4..28b7c630 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -773,7 +773,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par LOG("warming up the model with an empty run\n"); const std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), }; - llama_eval(lctx, tmp.data(), tmp.size(), 0, params.n_threads); + llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads); llama_reset_timings(lctx); } |