llama : expose model's rope_freq_scale in the API (#3418)

so it can be scaled further before creating a context.
author: Alex Klinkhamer <git@grencez.dev> 2023-10-03 10:09:28 -0700
committer: GitHub <noreply@github.com> 2023-10-03 20:09:28 +0300
commit: 48be797ffbd80b062f55778e09e97180eb25d2ab (patch)
tree: 9ba8d8ed5d596e0c95de4c7b87c43fd979641008
parent: f56e1baec361b5381e32ee6b6e56e4f00e002dfe (diff)
2 files changed, 7 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index 4a61eecd..aa1b4732 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7038,6 +7038,10 @@ int llama_n_embd(const struct llama_model * model) {
     return model->hparams.n_embd;
 }
 
+float llama_rope_freq_scale_train(const struct llama_model * model) {
+    return model->hparams.rope_freq_scale_train;
+}
+
 int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
     return snprintf(buf, buf_size, "%s %s %s",
             llama_model_arch_name(model->arch).c_str(),
diff --git a/llama.h b/llama.h
index fd215840..0177d07a 100644
--- a/llama.h
+++ b/llama.h
@@ -282,6 +282,9 @@ extern "C" {
     LLAMA_API int llama_n_ctx_train(const struct llama_model * model);
     LLAMA_API int llama_n_embd     (const struct llama_model * model);
 
+    // Get the model's RoPE frequency scaling factor
+    LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
+
     // Get a string describing the model type
     LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
author	Alex Klinkhamer <git@grencez.dev>	2023-10-03 10:09:28 -0700
committer	GitHub <noreply@github.com>	2023-10-03 20:09:28 +0300
commit	48be797ffbd80b062f55778e09e97180eb25d2ab (patch)
tree	9ba8d8ed5d596e0c95de4c7b87c43fd979641008
parent	f56e1baec361b5381e32ee6b6e56e4f00e002dfe (diff)