summaryrefslogtreecommitdiff
path: root/ggml/include
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/include')
-rw-r--r--ggml/include/ggml-cuda.h3
-rw-r--r--ggml/include/ggml-metal.h2
-rw-r--r--ggml/include/ggml.h47
3 files changed, 32 insertions, 20 deletions
diff --git a/ggml/include/ggml-cuda.h b/ggml/include/ggml-cuda.h
index d7903c66..71bb6dcf 100644
--- a/ggml/include/ggml-cuda.h
+++ b/ggml/include/ggml-cuda.h
@@ -6,6 +6,9 @@
#ifdef GGML_USE_HIPBLAS
#define GGML_CUDA_NAME "ROCm"
#define GGML_CUBLAS_NAME "hipBLAS"
+#elif defined(GGML_USE_MUSA)
+#define GGML_CUDA_NAME "MUSA"
+#define GGML_CUBLAS_NAME "muBLAS"
#else
#define GGML_CUDA_NAME "CUDA"
#define GGML_CUBLAS_NAME "cuBLAS"
diff --git a/ggml/include/ggml-metal.h b/ggml/include/ggml-metal.h
index 6c3226c3..d483cf1a 100644
--- a/ggml/include/ggml-metal.h
+++ b/ggml/include/ggml-metal.h
@@ -50,6 +50,8 @@ GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void
GGML_API void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb);
+GGML_API void ggml_backend_metal_set_abort_callback(ggml_backend_t backend, ggml_abort_callback abort_callback, void * user_data);
+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
// helper to check if the device supports a specific family
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index a0bcc67f..b9b0284b 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -254,18 +254,8 @@
#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
-#define GGML_ASSERT(x) \
- do { \
- if (!(x)) { \
- fflush(stdout); \
- fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
- ggml_print_backtrace(); \
- abort(); \
- } \
- } while (0)
-
#ifndef NDEBUG
-#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
+#define GGML_UNREACHABLE() do { fprintf(stderr, "statement should be unreachable\n"); abort(); } while(0)
#elif defined(__GNUC__)
#define GGML_UNREACHABLE() __builtin_unreachable()
#elif defined(_MSC_VER)
@@ -274,6 +264,17 @@
#define GGML_UNREACHABLE() ((void) 0)
#endif
+#ifdef __cplusplus
+#define GGML_NORETURN [[noreturn]]
+#elif defined(_MSC_VER)
+#define GGML_NORETURN __declspec(noreturn)
+#else
+#define GGML_NORETURN _Noreturn
+#endif
+
+#define GGML_ABORT(...) ggml_abort(__FILE__, __LINE__, __VA_ARGS__)
+#define GGML_ASSERT(x) if (!(x)) GGML_ABORT("GGML_ASSERT(%s) failed", #x)
+
// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
@@ -322,6 +323,9 @@
extern "C" {
#endif
+ GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4)
+ GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...);
+
enum ggml_status {
GGML_STATUS_ALLOC_FAILED = -2,
GGML_STATUS_FAILED = -1,
@@ -345,6 +349,7 @@ extern "C" {
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
+ GGML_API void ggml_fp32_to_bf16_row_ref(const float *, ggml_bf16_t *, int64_t);
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
struct ggml_object;
@@ -653,8 +658,11 @@ extern "C" {
GGML_CGRAPH_EVAL_ORDER_COUNT
};
+ typedef uint32_t ggml_bitset_t;
+
struct ggml_hash_set {
size_t size;
+ ggml_bitset_t * used;
struct ggml_tensor ** keys;
};
@@ -668,7 +676,7 @@ extern "C" {
struct ggml_tensor ** grads;
struct ggml_tensor ** leafs;
- struct ggml_hash_set visited_hash_table;
+ struct ggml_hash_set visited_hash_set;
enum ggml_cgraph_eval_order order;
};
@@ -715,8 +723,6 @@ extern "C" {
GGML_API int64_t ggml_cycles(void);
GGML_API int64_t ggml_cycles_per_ms(void);
- GGML_API void ggml_print_backtrace(void);
-
// accepts a UTF-8 path, even on Windows
GGML_API FILE * ggml_fopen(const char * fname, const char * mode);
@@ -1151,16 +1157,17 @@ extern "C" {
// group normalize along ne0*ne1*n_groups
// used in stable-diffusion
- // TODO: eps is hardcoded to 1e-6 for now
GGML_API struct ggml_tensor * ggml_group_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int n_groups);
+ int n_groups,
+ float eps);
GGML_API struct ggml_tensor * ggml_group_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int n_groups);
+ int n_groups,
+ float eps);
// a - x
// b - dy
@@ -1467,7 +1474,6 @@ extern "C" {
// if mode & 2 == 1, GPT-NeoX style
//
// b is an int32 vector with size a->ne[2], it contains the positions
- // c is freq factors (e.g. phi3-128k), (optional)
GGML_API struct ggml_tensor * ggml_rope(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -1484,6 +1490,7 @@ extern "C" {
int mode);
// custom RoPE
+ // c is freq factors (e.g. phi3-128k), (optional)
GGML_API struct ggml_tensor * ggml_rope_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
@@ -2022,8 +2029,8 @@ extern "C" {
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
- GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
- GGML_API enum ggml_status ggml_graph_compute ( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
+ GGML_API struct ggml_cplan ggml_graph_plan (const struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
+ GGML_API enum ggml_status ggml_graph_compute( struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
// same as ggml_graph_compute() but the work data is allocated as a part of the context
// note: the drawback of this API is that you must have ensured that the context has enough memory for the work data
GGML_API enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads);