summaryrefslogtreecommitdiff
path: root/src/llama-impl.h
diff options
context:
space:
mode:
authorfirecoperana <xuqiaowei1124@gmail.com>2025-06-19 02:24:53 -0500
committerGitHub <noreply@github.com>2025-06-19 10:24:53 +0300
commit3f111ad7bbb2d4f721332f9b2b344e48b3bbf9aa (patch)
treea3a17ee74e0436253e17f0d322320ed554d34b0a /src/llama-impl.h
parentc5368148cf3af7a3694e0eb03d24a08326c01d12 (diff)
add dry sampler (#513)
* add dry sampler * use vocab instead of model in dry_init function * fix compile error for build test --------- Co-authored-by: firecoperana <firecoperana>
Diffstat (limited to 'src/llama-impl.h')
-rw-r--r--src/llama-impl.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/src/llama-impl.h b/src/llama-impl.h
index a9cbe0df..a50f60cf 100644
--- a/src/llama-impl.h
+++ b/src/llama-impl.h
@@ -9,6 +9,7 @@
#define LLAMA_API_INTERNAL
#include "llama.h"
+#include <stdexcept>
#ifdef __GNUC__
#ifdef __MINGW32__
@@ -20,6 +21,7 @@
#define LLAMA_ATTRIBUTE_FORMAT(...)
#endif
+
//
// logging
//
@@ -52,3 +54,115 @@ static void replace_all(std::string & s, const std::string & search, const std::
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}
+
+
+// the ring buffer works similarly to std::deque, but with a fixed capacity
+template<typename T>
+struct ring_buffer {
+ ring_buffer(size_t cap) : capacity(cap), data(cap) {}
+
+ T& front() {
+ if (sz == 0) {
+ throw std::runtime_error("ring buffer is empty");
+ }
+ return data[first];
+ }
+
+ const T& front() const {
+ if (sz == 0) {
+ throw std::runtime_error("ring buffer is empty");
+ }
+ return data[first];
+ }
+
+ T& back() {
+ if (sz == 0) {
+ throw std::runtime_error("ring buffer is empty");
+ }
+ return data[pos];
+ }
+
+ const T& back() const {
+ if (sz == 0) {
+ throw std::runtime_error("ring buffer is empty");
+ }
+ return data[pos];
+ }
+
+ void push_back(const T& value) {
+ if (capacity == 0) {
+ throw std::runtime_error("ring buffer: capacity is zero");
+ }
+
+ if (sz == capacity) {
+ // advance the start when buffer is full
+ first = (first + 1) % capacity;
+ }
+ else {
+ sz++;
+ }
+ data[pos] = value;
+ pos = (pos + 1) % capacity;
+ }
+
+ T pop_front() {
+ if (sz == 0) {
+ throw std::runtime_error("ring buffer is empty");
+ }
+ T value = data[first];
+ first = (first + 1) % capacity;
+ sz--;
+ return value;
+ }
+
+ //T & operator[](size_t i) {
+ // if (i >= sz) {
+ // throw std::runtime_error("ring buffer: index out of bounds");
+ // }
+ // return data[(first + i) % capacity];
+ //}
+
+ //const T & at(size_t i) const {
+ // if (i >= sz) {
+ // throw std::runtime_error("ring buffer: index out of bounds");
+ // }
+ // return data[(first + i) % capacity];
+ //}
+
+ const T& rat(size_t i) const {
+ if (i >= sz) {
+ throw std::runtime_error("ring buffer: index out of bounds");
+ }
+ return data[(first + sz - i - 1) % capacity];
+ }
+
+ std::vector<T> to_vector() const {
+ std::vector<T> result;
+ result.reserve(sz);
+ for (size_t i = 0; i < sz; i++) {
+ result.push_back(data[(first + i) % capacity]);
+ }
+ return result;
+ }
+
+ void clear() {
+ // here only reset the status of the buffer
+ sz = 0;
+ first = 0;
+ pos = 0;
+ }
+
+ bool empty() const {
+ return sz == 0;
+ }
+
+ size_t size() const {
+ return sz;
+ }
+
+ size_t capacity = 0;
+ size_t sz = 0;
+ size_t first = 0;
+ size_t pos = 0;
+ std::vector<T> data;
+};