From 154e0d75fccf1784fe9ff6fd76a630b66563da3d Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Sat, 27 Jul 2024 07:55:01 +0200 Subject: Merge mainline llama.cpp (#3) * Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow --- common/ngram-cache.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'common/ngram-cache.h') diff --git a/common/ngram-cache.h b/common/ngram-cache.h index e4fa4cbd..ab4c9b37 100644 --- a/common/ngram-cache.h +++ b/common/ngram-cache.h @@ -37,11 +37,18 @@ struct llama_ngram { } }; +struct llama_token_hash_function { + size_t operator()(const llama_token token) const { + // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ + return token * 11400714819323198485llu; + } +}; + struct llama_ngram_hash_function { size_t operator()(const llama_ngram & ngram) const { - size_t hash = 0; - for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) { - hash ^= std::hash{}(ngram.tokens[i]); + size_t hash = llama_token_hash_function{}(ngram.tokens[0]); + for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) { + hash ^= llama_token_hash_function{}(ngram.tokens[i]); } return hash; } -- cgit v1.2.3