summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2023-12-24 14:34:22 +0100
committerGitHub <noreply@github.com>2023-12-24 14:34:22 +0100
commit5bf3953d7e9831ea22b0bc017ce97409b801ccf1 (patch)
tree48c0136d9943fb9cca22209894464970549c24b5 /llama.cpp
parent708e179e8562c2604240df95a2241dea17fd808b (diff)
cuda : improve cuda pool efficiency using virtual memory (#4606)
* cuda : improve cuda pool efficiency using virtual memory * fix mixtral * fix cmake build * check for vmm support, disable for hip ggml-ci * fix hip build * clarify granularity * move all caps to g_device_caps * refactor error checking * add cuda_pool_alloc, refactor most pool allocations ggml-ci * fix hip build * CUBLAS_TF32_TENSOR_OP_MATH is not a macro * more hip crap * llama : fix msvc warnings * ggml : fix msvc warnings * minor * minor * cuda : fallback to CPU on host buffer alloc fail * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * ensure allocations are always aligned * act_size -> actual_size --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp6
1 files changed, 3 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 5699a0fc..a2462153 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1281,7 +1281,7 @@ struct llama_hparams {
if (this->rope_finetuned != other.rope_finetuned) return true;
if (this->n_yarn_orig_ctx != other.n_yarn_orig_ctx) return true;
- const float EPSILON = 1e-9;
+ const float EPSILON = 1e-9f;
if (!is_float_close(this->f_norm_eps, other.f_norm_eps, EPSILON)) return true;
if (!is_float_close(this->f_norm_rms_eps, other.f_norm_rms_eps, EPSILON)) return true;
@@ -10300,7 +10300,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
std::string result = model->vocab.id_to_token[token].text;
llama_unescape_whitespace(result);
if (length < (int) result.length()) {
- return -result.length();
+ return -(int) result.length();
}
memcpy(buf, result.c_str(), result.length());
return result.length();
@@ -10330,7 +10330,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
std::string result = model->vocab.id_to_token[token].text;
result = llama_decode_text(result);
if (length < (int) result.length()) {
- return -result.length();
+ return -(int) result.length();
}
memcpy(buf, result.c_str(), result.length());
return result.length();