summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-07 11:21:53 +0200
committerGitHub <noreply@github.com>2024-01-07 11:21:53 +0200
commit3c36213df850a2353e95572b3636797c79b7c815 (patch)
tree40d3edf06d566e8960a1d64476ee9ae795a80056 /llama.cpp
parent72d8407b3696dd1293bd233b6db392be108bc377 (diff)
llama : remove redundant GQA check (#4796)
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp8
1 files changed, 0 insertions, 8 deletions
diff --git a/llama.cpp b/llama.cpp
index 06db4030..021e79a8 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4776,7 +4776,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@@ -4900,7 +4899,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * pos;
@@ -5001,7 +4999,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
const int64_t n_rot = n_embd_head_k / 2;
@@ -5215,7 +5212,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@@ -5308,7 +5304,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@@ -5404,7 +5399,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * inpL;
@@ -5731,7 +5725,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * attn_norm_output;
@@ -5955,7 +5948,6 @@ struct llm_build_context {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
- GGML_ASSERT(n_embd_gqa == n_embd);
struct ggml_tensor * cur;
struct ggml_tensor * pos;