diff options
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 89 |
1 files changed, 50 insertions, 39 deletions
@@ -58,7 +58,8 @@ // { // ... // -// struct ggml_cgraph gf = ggml_build_forward(f); +// struct ggml_cgraph * gf = ggml_new_graph(ctx); +// ggml_build_forward_expand(gf, f); // // // set the input variable and parameter values // ggml_set_f32(x, 2.0f); @@ -213,15 +214,14 @@ #define GGML_QNT_VERSION 2 // bump this on quantization format changes #define GGML_QNT_VERSION_FACTOR 1000 // do not change this -#define GGML_MAX_DIMS 4 -#define GGML_MAX_NODES 16384 -#define GGML_MAX_PARAMS 1024 -#define GGML_MAX_CONTEXTS 64 -#define GGML_MAX_SRC 6 -#define GGML_MAX_NAME 64 -#define GGML_MAX_OP_PARAMS 64 -#define GGML_DEFAULT_N_THREADS 4 - +#define GGML_MAX_DIMS 4 +#define GGML_MAX_PARAMS 1024 +#define GGML_MAX_CONTEXTS 64 +#define GGML_MAX_SRC 6 +#define GGML_MAX_NAME 64 +#define GGML_MAX_OP_PARAMS 64 +#define GGML_DEFAULT_N_THREADS 4 +#define GGML_DEFAULT_GRAPH_SIZE 2048 #if UINTPTR_MAX == 0xFFFFFFFF #define GGML_MEM_ALIGN 4 #else @@ -245,7 +245,10 @@ do { \ if (!(x)) { \ fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \ - abort(); \ + fflush(stderr); \ + fflush(stdout); \ + ggml_print_backtrace(); \ + exit(1); \ } \ } while (0) @@ -451,6 +454,7 @@ extern "C" { GGML_UNARY_OP_GELU, GGML_UNARY_OP_GELU_QUICK, GGML_UNARY_OP_SILU, + GGML_UNARY_OP_LEAKY }; enum ggml_object_type { @@ -531,37 +535,33 @@ extern "C" { int n_threads; - // the `n_tasks` of nodes, 1:1 mapping to cgraph nodes - int n_tasks[GGML_MAX_NODES]; - // abort ggml_graph_compute when true bool (*abort_callback)(void * data); void * abort_callback_data; }; - // next prime after GGML_MAX_NODES - // #define GGML_GRAPH_HASHTABLE_SIZE 4099 - // next prime after GGML_MAX_NODES * 2 (nodes + leafs) - // #define GGML_GRAPH_HASHTABLE_SIZE 8273 - // #define GGML_GRAPH_HASHTABLE_SIZE 16411 - #define GGML_GRAPH_HASHTABLE_SIZE 32771 - enum ggml_cgraph_eval_order { GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT = 0, GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT, GGML_CGRAPH_EVAL_ORDER_COUNT }; + struct ggml_hash_set { + size_t size; + struct ggml_tensor ** keys; + }; + // computation graph struct ggml_cgraph { + int size; int n_nodes; int n_leafs; - struct ggml_tensor * nodes[GGML_MAX_NODES]; - struct ggml_tensor * grads[GGML_MAX_NODES]; - struct ggml_tensor * leafs[GGML_MAX_NODES]; + struct ggml_tensor ** nodes; + struct ggml_tensor ** grads; + struct ggml_tensor ** leafs; - void * visited_hash_table[GGML_GRAPH_HASHTABLE_SIZE]; + struct ggml_hash_set visited_hash_table; enum ggml_cgraph_eval_order order; @@ -571,8 +571,6 @@ extern "C" { int64_t perf_time_us; }; - static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph); - // scratch buffer struct ggml_scratch { size_t offs; @@ -617,6 +615,8 @@ extern "C" { GGML_API int64_t ggml_cycles(void); GGML_API int64_t ggml_cycles_per_ms(void); + GGML_API void ggml_print_backtrace(void); + GGML_API void ggml_numa_init(void); // call once for better performance on NUMA systems GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node @@ -709,7 +709,7 @@ extern "C" { // Context tensor enumeration and lookup GGML_API struct ggml_tensor * ggml_get_first_tensor(struct ggml_context * ctx); GGML_API struct ggml_tensor * ggml_get_next_tensor (struct ggml_context * ctx, struct ggml_tensor * tensor); - GGML_API struct ggml_tensor * ggml_get_tensor (struct ggml_context * ctx, const char * name); + GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); GGML_API struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor); GGML_API struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value); @@ -943,6 +943,10 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_leaky( + struct ggml_context * ctx, + struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_relu_inplace( struct ggml_context * ctx, struct ggml_tensor * a); @@ -1482,6 +1486,8 @@ extern "C" { int s0, // stride int p0); // padding + // the result will have 2*p0 padding for the first dimension + // and 2*p1 padding for the second dimension GGML_API struct ggml_tensor * ggml_pool_2d( struct ggml_context * ctx, struct ggml_tensor * a, @@ -1490,8 +1496,8 @@ extern "C" { int k1, int s0, int s1, - int p0, - int p1); + float p0, + float p1); // nearest interpolate // used in stable-diffusion @@ -1732,19 +1738,22 @@ extern "C" { GGML_API void ggml_build_forward_expand (struct ggml_cgraph * cgraph, struct ggml_tensor * tensor); GGML_API void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph * gf, struct ggml_cgraph * gb, bool keep); - GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor); - GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep); - // graph allocation in a context - GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); - GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor); + GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx); // size = GGML_DEFAULT_GRAPH_SIZE, grads = false + GGML_API struct ggml_cgraph * ggml_new_graph_custom (struct ggml_context * ctx, size_t size, bool grads); + GGML_API struct ggml_cgraph * ggml_graph_dup (struct ggml_context * ctx, struct ggml_cgraph * cgraph); + GGML_API struct ggml_cgraph * ggml_graph_view (struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i0, int i1); + GGML_API void ggml_graph_cpy (struct ggml_cgraph * src, struct ggml_cgraph * dst); + GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); // zero grads + GGML_API void ggml_graph_clear (struct ggml_cgraph * cgraph); + GGML_API size_t ggml_graph_overhead(void); + GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads); // ggml_graph_plan() has to be called before ggml_graph_compute() // when plan.work_size > 0, caller must allocate memory for plan.work_data GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/); - GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); - GGML_API void ggml_graph_reset (struct ggml_cgraph * cgraph); + GGML_API int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan); // same as ggml_graph_compute() but the work data is allocated as a part of the context // note: the drawback of this API is that you must have ensured that the context has enough memory for the work data @@ -1752,8 +1761,8 @@ extern "C" { GGML_API struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name); - GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname); - GGML_API struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval); + GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname); + GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval); // print info and performance information for the graph GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph); @@ -1816,6 +1825,8 @@ extern "C" { struct ggml_opt_params { enum ggml_opt_type type; + size_t graph_size; + int n_threads; // delta-based convergence test |