| #pragma once |
|
|
| #include "llama.h" |
|
|
| #include <string> |
| #include <vector> |
| #include <stdexcept> |
|
|
| #ifdef __GNUC__ |
| #ifdef __MINGW32__ |
| #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) |
| #else |
| #define LLAMA_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) |
| #endif |
| #else |
| #define LLAMA_ATTRIBUTE_FORMAT(...) |
| #endif |
|
|
| |
| |
| |
|
|
| LLAMA_ATTRIBUTE_FORMAT(2, 3) |
| void llama_log_internal (ggml_log_level level, const char * format, ...); |
| void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data); |
|
|
| #define LLAMA_LOG(...) llama_log_internal(GGML_LOG_LEVEL_NONE , __VA_ARGS__) |
| #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) |
| #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) |
| #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) |
| #define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) |
| #define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__) |
|
|
| |
| |
| |
|
|
| struct time_meas { |
| time_meas(int64_t & t_acc, bool disable = false) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {} |
|
|
| ~time_meas() { |
| if (t_start_us >= 0) { |
| t_acc += ggml_time_us() - t_start_us; |
| } |
| } |
|
|
| const int64_t t_start_us; |
|
|
| int64_t & t_acc; |
| }; |
|
|
| static void replace_all(std::string & s, const std::string & search, const std::string & replace) { |
| if (search.empty()) { |
| return; |
| } |
| std::string builder; |
| builder.reserve(s.length()); |
| size_t pos = 0; |
| size_t last_pos = 0; |
| while ((pos = s.find(search, last_pos)) != std::string::npos) { |
| builder.append(s, last_pos, pos - last_pos); |
| builder.append(replace); |
| last_pos = pos + search.length(); |
| } |
| builder.append(s, last_pos, std::string::npos); |
| s = std::move(builder); |
| } |
|
|
| const std::vector<std::pair<std::string, struct ggml_tensor *>> & llama_internal_get_tensor_map( |
| struct llama_context * ctx |
| ); |
|
|
| |
| template<typename T> |
| struct ring_buffer { |
| ring_buffer(size_t cap) : capacity(cap), data(cap) {} |
|
|
| T & front() { |
| if (sz == 0) { |
| throw std::runtime_error("ring buffer is empty"); |
| } |
| return data[first]; |
| } |
|
|
| const T & front() const { |
| if (sz == 0) { |
| throw std::runtime_error("ring buffer is empty"); |
| } |
| return data[first]; |
| } |
|
|
| T & back() { |
| if (sz == 0) { |
| throw std::runtime_error("ring buffer is empty"); |
| } |
| return data[pos]; |
| } |
|
|
| const T & back() const { |
| if (sz == 0) { |
| throw std::runtime_error("ring buffer is empty"); |
| } |
| return data[pos]; |
| } |
|
|
| void push_back(const T & value) { |
| if (capacity == 0) { |
| throw std::runtime_error("ring buffer: capacity is zero"); |
| } |
|
|
| if (sz == capacity) { |
| |
| first = (first + 1) % capacity; |
| } else { |
| sz++; |
| } |
| data[pos] = value; |
| pos = (pos + 1) % capacity; |
| } |
|
|
| T pop_front() { |
| if (sz == 0) { |
| throw std::runtime_error("ring buffer is empty"); |
| } |
| T value = data[first]; |
| first = (first + 1) % capacity; |
| sz--; |
| return value; |
| } |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| const T & rat(size_t i) const { |
| if (i >= sz) { |
| throw std::runtime_error("ring buffer: index out of bounds"); |
| } |
| return data[(first + sz - i - 1) % capacity]; |
| } |
|
|
| std::vector<T> to_vector() const { |
| std::vector<T> result; |
| result.reserve(sz); |
| for (size_t i = 0; i < sz; i++) { |
| result.push_back(data[(first + i) % capacity]); |
| } |
| return result; |
| } |
|
|
| void clear() { |
| |
| sz = 0; |
| first = 0; |
| pos = 0; |
| } |
|
|
| bool empty() const { |
| return sz == 0; |
| } |
|
|
| size_t size() const { |
| return sz; |
| } |
|
|
| size_t capacity = 0; |
| size_t sz = 0; |
| size_t first = 0; |
| size_t pos = 0; |
| std::vector<T> data; |
| }; |
|
|