From 52531fdff88764282c1b233174721aab8347252d Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 8 Jan 2024 11:18:32 +0200 Subject: main : add self-extend support (#4815) * examples : add passkey test * passkey : better prints * passkey : select pass key pos from CLI * passkey : simplify n_past logic * llama : "self-extend"-like context extension * passkey : add comment * main : add Self-Extend support * llama : add comment about llama_kv_cache_seq_div --- common/common.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'common/common.cpp') diff --git a/common/common.cpp b/common/common.cpp index eacaee18..6b4913a6 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -220,6 +220,20 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } params.n_ctx = std::stoi(argv[i]); + } else if (arg == "--grp-attn-n" || arg == "-gan") { + if (++i >= argc) { + invalid_param = true; + break; + } + + params.grp_attn_n = std::stoi(argv[i]); + } else if (arg == "--grp-attn-w" || arg == "-gaw") { + if (++i >= argc) { + invalid_param = true; + break; + } + + params.grp_attn_w = std::stoi(argv[i]); } else if (arg == "--rope-freq-base") { if (++i >= argc) { invalid_param = true; @@ -904,6 +918,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" Not recommended since this is both slower and uses more VRAM.\n"); #endif // GGML_USE_CUBLAS #endif + printf(" -gan N, --grp-attn-n N\n"); + printf(" group-attention factor (default: %d)\n", params.grp_attn_n); + printf(" -gat N, --grp-attn-w N\n"); + printf(" group-attention width (default: %.1f)\n", (double)params.grp_attn_w); printf(" --verbose-prompt print prompt before generation\n"); printf(" -dkvc, --dump-kv-cache\n"); printf(" verbose print of the KV cache\n"); -- cgit v1.2.3