summaryrefslogtreecommitdiff
path: root/common/common.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-08 11:18:32 +0200
committerGitHub <noreply@github.com>2024-01-08 11:18:32 +0200
commit52531fdff88764282c1b233174721aab8347252d (patch)
treed4aeec20b4b634f5de3bd9839df507dee85c9e1f /common/common.h
parentb0034d93ce2949ce7d9c098ca02e56f66cd484e2 (diff)
main : add self-extend support (#4815)
* examples : add passkey test * passkey : better prints * passkey : select pass key pos from CLI * passkey : simplify n_past logic * llama : "self-extend"-like context extension * passkey : add comment * main : add Self-Extend support * llama : add comment about llama_kv_cache_seq_div
Diffstat (limited to 'common/common.h')
-rw-r--r--common/common.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/common/common.h b/common/common.h
index 9659aa04..e2bbfc25 100644
--- a/common/common.h
+++ b/common/common.h
@@ -62,6 +62,8 @@ struct gpt_params {
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
float tensor_split[LLAMA_MAX_DEVICES] = {0}; // how split tensors should be distributed across GPUs
int32_t n_beams = 0; // if non-zero then use beam search of given width.
+ int32_t grp_attn_n = 1; // group-attention factor
+ int32_t grp_attn_w = 512; // group-attention width
float rope_freq_base = 0.0f; // RoPE base frequency
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor