diff options
author | firecoperana <xuqiaowei1124@gmail.com> | 2025-06-19 02:24:53 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-06-19 10:24:53 +0300 |
commit | 3f111ad7bbb2d4f721332f9b2b344e48b3bbf9aa (patch) | |
tree | a3a17ee74e0436253e17f0d322320ed554d34b0a /src/llama-impl.h | |
parent | c5368148cf3af7a3694e0eb03d24a08326c01d12 (diff) |
add dry sampler (#513)
* add dry sampler
* use vocab instead of model in dry_init function
* fix compile error for build test
---------
Co-authored-by: firecoperana <firecoperana>
Diffstat (limited to 'src/llama-impl.h')
-rw-r--r-- | src/llama-impl.h | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/src/llama-impl.h b/src/llama-impl.h index a9cbe0df..a50f60cf 100644 --- a/src/llama-impl.h +++ b/src/llama-impl.h @@ -9,6 +9,7 @@ #define LLAMA_API_INTERNAL #include "llama.h" +#include <stdexcept> #ifdef __GNUC__ #ifdef __MINGW32__ @@ -20,6 +21,7 @@ #define LLAMA_ATTRIBUTE_FORMAT(...) #endif + // // logging // @@ -52,3 +54,115 @@ static void replace_all(std::string & s, const std::string & search, const std:: builder.append(s, last_pos, std::string::npos); s = std::move(builder); } + + +// the ring buffer works similarly to std::deque, but with a fixed capacity +template<typename T> +struct ring_buffer { + ring_buffer(size_t cap) : capacity(cap), data(cap) {} + + T& front() { + if (sz == 0) { + throw std::runtime_error("ring buffer is empty"); + } + return data[first]; + } + + const T& front() const { + if (sz == 0) { + throw std::runtime_error("ring buffer is empty"); + } + return data[first]; + } + + T& back() { + if (sz == 0) { + throw std::runtime_error("ring buffer is empty"); + } + return data[pos]; + } + + const T& back() const { + if (sz == 0) { + throw std::runtime_error("ring buffer is empty"); + } + return data[pos]; + } + + void push_back(const T& value) { + if (capacity == 0) { + throw std::runtime_error("ring buffer: capacity is zero"); + } + + if (sz == capacity) { + // advance the start when buffer is full + first = (first + 1) % capacity; + } + else { + sz++; + } + data[pos] = value; + pos = (pos + 1) % capacity; + } + + T pop_front() { + if (sz == 0) { + throw std::runtime_error("ring buffer is empty"); + } + T value = data[first]; + first = (first + 1) % capacity; + sz--; + return value; + } + + //T & operator[](size_t i) { + // if (i >= sz) { + // throw std::runtime_error("ring buffer: index out of bounds"); + // } + // return data[(first + i) % capacity]; + //} + + //const T & at(size_t i) const { + // if (i >= sz) { + // throw std::runtime_error("ring buffer: index out of bounds"); + // } + // return data[(first + i) % capacity]; + //} + + const T& rat(size_t i) const { + if (i >= sz) { + throw std::runtime_error("ring buffer: index out of bounds"); + } + return data[(first + sz - i - 1) % capacity]; + } + + std::vector<T> to_vector() const { + std::vector<T> result; + result.reserve(sz); + for (size_t i = 0; i < sz; i++) { + result.push_back(data[(first + i) % capacity]); + } + return result; + } + + void clear() { + // here only reset the status of the buffer + sz = 0; + first = 0; + pos = 0; + } + + bool empty() const { + return sz == 0; + } + + size_t size() const { + return sz; + } + + size_t capacity = 0; + size_t sz = 0; + size_t first = 0; + size_t pos = 0; + std::vector<T> data; +}; |