// // Copyright (C) 2023-2025 The llama.cpp authors // Copyright (C) 2024-2025 Iwan Kawrakow // MIT license // SPDX-License-Identifier: MIT // #pragma once #define LLAMA_API_INTERNAL #include "llama.h" #include #ifdef __GNUC__ #ifdef __MINGW32__ #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) #else #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) #endif #else #define LLAMA_ATTRIBUTE_FORMAT(...) #endif // // logging // LLAMA_ATTRIBUTE_FORMAT(2, 3) void llama_log_internal (ggml_log_level level, const char * format, ...); void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data); #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) // // helpers // static void replace_all(std::string & s, const std::string & search, const std::string & replace) { if (search.empty()) { return; } std::string builder; builder.reserve(s.length()); size_t pos = 0; size_t last_pos = 0; while ((pos = s.find(search, last_pos)) != std::string::npos) { builder.append(s, last_pos, pos - last_pos); builder.append(replace); last_pos = pos + search.length(); } builder.append(s, last_pos, std::string::npos); s = std::move(builder); } // the ring buffer works similarly to std::deque, but with a fixed capacity template struct ring_buffer { ring_buffer(size_t cap) : capacity(cap), data(cap) {} T& front() { if (sz == 0) { throw std::runtime_error("ring buffer is empty"); } return data[first]; } const T& front() const { if (sz == 0) { throw std::runtime_error("ring buffer is empty"); } return data[first]; } T& back() { if (sz == 0) { throw std::runtime_error("ring buffer is empty"); } return data[pos]; } const T& back() const { if (sz == 0) { throw std::runtime_error("ring buffer is empty"); } return data[pos]; } void push_back(const T& value) { if (capacity == 0) { throw std::runtime_error("ring buffer: capacity is zero"); } if (sz == capacity) { // advance the start when buffer is full first = (first + 1) % capacity; } else { sz++; } data[pos] = value; pos = (pos + 1) % capacity; } T pop_front() { if (sz == 0) { throw std::runtime_error("ring buffer is empty"); } T value = data[first]; first = (first + 1) % capacity; sz--; return value; } //T & operator[](size_t i) { // if (i >= sz) { // throw std::runtime_error("ring buffer: index out of bounds"); // } // return data[(first + i) % capacity]; //} //const T & at(size_t i) const { // if (i >= sz) { // throw std::runtime_error("ring buffer: index out of bounds"); // } // return data[(first + i) % capacity]; //} const T& rat(size_t i) const { if (i >= sz) { throw std::runtime_error("ring buffer: index out of bounds"); } return data[(first + sz - i - 1) % capacity]; } std::vector to_vector() const { std::vector result; result.reserve(sz); for (size_t i = 0; i < sz; i++) { result.push_back(data[(first + i) % capacity]); } return result; } void clear() { // here only reset the status of the buffer sz = 0; first = 0; pos = 0; } bool empty() const { return sz == 0; } size_t size() const { return sz; } size_t capacity = 0; size_t sz = 0; size_t first = 0; size_t pos = 0; std::vector data; };