llama : add llama_beam_search() (#2267)

* Add llama_beam_search(). * Add '// Beam search' heading to llama.{h,cpp} after llama_grammar_accept_token(). * Add space around * pointers and & references. * Add spaces around comparison and assignment operators. * Prefer west const. * Use llama_ prefix for structs in global namespace. * Delete obsolete comment from an earlier revision. * Change eos to eob in llama_beam and llama_beam_view structs.
author: Matt Pulver <matt.pulver@heavy.ai> 2023-08-25 11:18:48 -0400
committer: GitHub <noreply@github.com> 2023-08-25 18:18:48 +0300
commit: c82742ac9cd96fd34aa961978805c1d8a361d589 (patch)
tree: ee377f2559d967955ce1dde65b698504a33e2928 /common/common.h
parent: 28b2c996ca0ab90a5669946084f13443ec98e241 (diff)
1 files changed, 1 insertions, 0 deletions
diff --git a/common/common.h b/common/common.h
index 17d271e6..ce61265f 100644
--- a/common/common.h
+++ b/common/common.h
@@ -28,6 +28,7 @@ struct gpt_params {
     int32_t main_gpu                        = 0;    // the GPU that is used for scratch and small tensors
     float   tensor_split[LLAMA_MAX_DEVICES] = {0};  // how split tensors should be distributed across GPUs
     int32_t n_probs                         = 0;    // if greater than 0, output the probabilities of top n_probs tokens.
+    int32_t n_beams                         = 0;    // if non-zero then use beam search of given width.
     float   rope_freq_base                  = 10000.0f; // RoPE base frequency
     float   rope_freq_scale                 = 1.0f;     // RoPE frequency scaling factor
author	Matt Pulver <matt.pulver@heavy.ai>	2023-08-25 11:18:48 -0400
committer	GitHub <noreply@github.com>	2023-08-25 18:18:48 +0300
commit	c82742ac9cd96fd34aa961978805c1d8a361d589 (patch)
tree	ee377f2559d967955ce1dde65b698504a33e2928 /common/common.h
parent	28b2c996ca0ab90a5669946084f13443ec98e241 (diff)