summaryrefslogtreecommitdiff
path: root/common/ngram-cache.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-07-27 07:55:01 +0200
committerGitHub <noreply@github.com>2024-07-27 07:55:01 +0200
commit154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch)
tree81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /common/ngram-cache.h
parent0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff)
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'common/ngram-cache.h')
-rw-r--r--common/ngram-cache.h13
1 files changed, 10 insertions, 3 deletions
diff --git a/common/ngram-cache.h b/common/ngram-cache.h
index e4fa4cbd..ab4c9b37 100644
--- a/common/ngram-cache.h
+++ b/common/ngram-cache.h
@@ -37,11 +37,18 @@ struct llama_ngram {
}
};
+struct llama_token_hash_function {
+ size_t operator()(const llama_token token) const {
+ // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+ return token * 11400714819323198485llu;
+ }
+};
+
struct llama_ngram_hash_function {
size_t operator()(const llama_ngram & ngram) const {
- size_t hash = 0;
- for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
- hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
+ size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
+ for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
+ hash ^= llama_token_hash_function{}(ngram.tokens[i]);
}
return hash;
}