summaryrefslogtreecommitdiff
path: root/common/common.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-08 11:18:32 +0200
committerGitHub <noreply@github.com>2024-01-08 11:18:32 +0200
commit52531fdff88764282c1b233174721aab8347252d (patch)
treed4aeec20b4b634f5de3bd9839df507dee85c9e1f /common/common.cpp
parentb0034d93ce2949ce7d9c098ca02e56f66cd484e2 (diff)
main : add self-extend support (#4815)
* examples : add passkey test * passkey : better prints * passkey : select pass key pos from CLI * passkey : simplify n_past logic * llama : "self-extend"-like context extension * passkey : add comment * main : add Self-Extend support * llama : add comment about llama_kv_cache_seq_div
Diffstat (limited to 'common/common.cpp')
-rw-r--r--common/common.cpp18
1 files changed, 18 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp
index eacaee18..6b4913a6 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -220,6 +220,20 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
break;
}
params.n_ctx = std::stoi(argv[i]);
+ } else if (arg == "--grp-attn-n" || arg == "-gan") {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+
+ params.grp_attn_n = std::stoi(argv[i]);
+ } else if (arg == "--grp-attn-w" || arg == "-gaw") {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+
+ params.grp_attn_w = std::stoi(argv[i]);
} else if (arg == "--rope-freq-base") {
if (++i >= argc) {
invalid_param = true;
@@ -904,6 +918,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" Not recommended since this is both slower and uses more VRAM.\n");
#endif // GGML_USE_CUBLAS
#endif
+ printf(" -gan N, --grp-attn-n N\n");
+ printf(" group-attention factor (default: %d)\n", params.grp_attn_n);
+ printf(" -gat N, --grp-attn-w N\n");
+ printf(" group-attention width (default: %.1f)\n", (double)params.grp_attn_w);
printf(" --verbose-prompt print prompt before generation\n");
printf(" -dkvc, --dump-kv-cache\n");
printf(" verbose print of the KV cache\n");