summaryrefslogtreecommitdiff
path: root/ggml/src/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/ggml.c')
-rw-r--r--ggml/src/ggml.c985
1 files changed, 543 insertions, 442 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index b5fdb96d..73054bfe 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -47,6 +47,9 @@
#include <unistd.h>
#endif
+#if defined(__ARM_FEATURE_SVE)
+int ggml_sve_cnt_b = 0;
+#endif
#if defined(__ARM_FEATURE_SVE) || defined(__ARM_FEATURE_MATMUL_INT8)
#undef GGML_USE_LLAMAFILE
#endif
@@ -63,6 +66,9 @@
// disable POSIX deprecation warnings
// these functions are never going away, anyway
#pragma warning(disable: 4996)
+
+// unreachable code because of multiple instances of code after GGML_ABORT
+#pragma warning(disable: 4702)
#endif
#if defined(_WIN32)
@@ -151,23 +157,69 @@ typedef pthread_t ggml_thread_t;
#include <sys/wait.h>
-void ggml_print_backtrace(void) {
- /*
- #include <execinfo.h>
- #include <dlfcn.h>
+#if defined(__ANDROID__)
+#include <unwind.h>
+#include <dlfcn.h>
+#include <stdio.h>
- void * trace[100];
+struct backtrace_state {
+ void ** current;
+ void ** end;
+};
- int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
+static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
+ struct backtrace_state * state = (struct backtrace_state *)arg;
+ uintptr_t pc = _Unwind_GetIP(context);
+ if (pc) {
+ if (state->current == state->end) {
+ return _URC_END_OF_STACK;
+ } else {
+ *state->current++ = (void*)pc;
+ }
+ }
+ return _URC_NO_REASON;
+}
+
+static void ggml_print_backtrace_symbols(void) {
+ const int max = 100;
+ void* buffer[max];
+ struct backtrace_state state = {buffer, buffer + max};
+ _Unwind_Backtrace(unwind_callback, &state);
+
+ int count = state.current - buffer;
+
+ for (int idx = 0; idx < count; ++idx) {
+ const void * addr = buffer[idx];
+ const char * symbol = "";
+
+ Dl_info info;
+ if (dladdr(addr, &info) && info.dli_sname) {
+ symbol = info.dli_sname;
+ }
+
+ fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
+ }
+}
+#elif defined(__linux__) && defined(__GLIBC__)
+#include <execinfo.h>
+static void ggml_print_backtrace_symbols(void) {
+ void * trace[100];
+ int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
- */
+}
+#else
+static void ggml_print_backtrace_symbols(void) {
+ // platform not supported
+}
+#endif
- // backtrack_symbols does not show line numbers, use gdb instead
+static void ggml_print_backtrace(void) {
char attach[32];
snprintf(attach, sizeof(attach), "attach %d", getpid());
int pid = fork();
if (pid == 0) {
+ // try gdb
execlp("gdb", "gdb", "--batch",
"-ex", "set style enabled on",
"-ex", attach,
@@ -175,16 +227,46 @@ void ggml_print_backtrace(void) {
"-ex", "detach",
"-ex", "quit",
(char *) NULL);
+ // try lldb
+ execlp("lldb", "lldb", "--batch",
+ "-o", "bt",
+ "-o", "quit",
+ "-p", attach,
+ (char *) NULL);
+ exit(EXIT_FAILURE);
} else {
- waitpid(pid, NULL, 0);
+ int wstatus;
+ waitpid(pid, &wstatus, 0);
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_FAILURE) {
+ // gdb failed, fallback to backtrace_symbols
+ ggml_print_backtrace_symbols();
+ }
+ }
}
}
#else
-void ggml_print_backtrace(void) {
+static void ggml_print_backtrace(void) {
// platform not supported
}
#endif
+void ggml_abort(const char * file, int line, const char * fmt, ...) {
+ fflush(stdout);
+
+ fprintf(stderr, "%s:%d: ", file, line);
+
+ va_list args;
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ fprintf(stderr, "\n");
+
+ ggml_print_backtrace();
+ abort();
+}
+
#define GGML_DEBUG 0
#define GGML_GELU_FP16
#define GGML_GELU_QUICK_FP16
@@ -256,7 +338,7 @@ inline static void * ggml_aligned_malloc(size_t size) {
break;
}
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
return NULL;
}
return aligned_memory;
@@ -277,7 +359,7 @@ inline static void * ggml_malloc(size_t size) {
void * result = malloc(size);
if (result == NULL) {
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
return result;
}
@@ -291,7 +373,7 @@ inline static void * ggml_calloc(size_t num, size_t size) {
void * result = calloc(num, size);
if (result == NULL) {
GGML_PRINT("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
return result;
}
@@ -414,9 +496,16 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
}
}
+void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
+ for (int i = 0; i < n; i++) {
+ y[i] = ggml_compute_fp32_to_bf16(x[i]);
+ }
+}
+
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
int i = 0;
#if defined(__AVX512BF16__)
+ // subnormals are flushed to zero on this platform
for (; i + 32 <= n; i += 32) {
_mm512_storeu_si512(
(__m512i *)(y + i),
@@ -939,7 +1028,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.is_quantized = false,
.to_float = (ggml_to_float_t) ggml_bf16_to_fp32_row,
.from_float = (ggml_from_float_t) ggml_fp32_to_bf16_row,
- .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row,
+ .from_float_ref = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
.vec_dot = (ggml_vec_dot_t) ggml_vec_dot_bf16,
.vec_dot_type = GGML_TYPE_BF16,
.nrows = 1,
@@ -2339,7 +2428,7 @@ inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) {
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
//inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
-inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
+inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
@@ -3544,7 +3633,7 @@ static inline int ggml_up(int n, int m) {
}
// assert that pointer is aligned to GGML_MEM_ALIGN
-#define ggml_assert_aligned(ptr) \
+#define GGML_ASSERT_ALIGNED(ptr) \
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
////////////////////////////////////////////////////////////////////////////////
@@ -3645,7 +3734,13 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_ASSERT(ctx->mem_buffer != NULL);
- ggml_assert_aligned(ctx->mem_buffer);
+ GGML_ASSERT_ALIGNED(ctx->mem_buffer);
+
+#if defined(__ARM_FEATURE_SVE)
+ if (!ggml_sve_cnt_b) {
+ ggml_sve_cnt_b = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
+ }
+#endif
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
@@ -3777,7 +3872,7 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
.type = type,
};
- ggml_assert_aligned(mem_buffer + obj_new->offs);
+ GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
if (obj_cur != NULL) {
obj_cur->next = obj_new;
@@ -3801,7 +3896,8 @@ static struct ggml_tensor * ggml_new_tensor_impl(
struct ggml_tensor * view_src,
size_t view_offs) {
- assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
+ GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
+ GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
// find the base tensor and absolute offset
if (view_src != NULL && view_src->view_src != NULL) {
@@ -3878,7 +3974,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
#endif
// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
- //ggml_assert_aligned(result->data);
+ //GGML_ASSERT_ALIGNED(result->data);
for (int i = 0; i < n_dims; i++) {
result->ne[i] = ne[i];
@@ -4051,8 +4147,8 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
return tensor;
@@ -4110,8 +4206,8 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
return tensor;
@@ -4180,11 +4276,9 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
}
default:
{
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
-
- return 0.0f;
}
void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
@@ -4227,8 +4321,8 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -4248,10 +4342,8 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
case GGML_TYPE_F32:
return ((float *) data)[0];
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
-
- return 0.0f;
}
void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, int32_t value) {
@@ -4283,8 +4375,8 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -4321,11 +4413,9 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
}
default:
{
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
-
- return 0.0f;
}
void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
@@ -4362,8 +4452,8 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) {
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -4383,10 +4473,8 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
case GGML_TYPE_F32:
return ((float *) data)[0];
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
-
- return 0.0f;
}
void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, int i3, float value) {
@@ -4418,8 +4506,8 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -4442,8 +4530,11 @@ const char * ggml_get_name(const struct ggml_tensor * tensor) {
}
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
- strncpy(tensor->name, name, sizeof(tensor->name) - 1);
- tensor->name[sizeof(tensor->name) - 1] = '\0';
+ size_t i;
+ for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
+ tensor->name[i] = name[i];
+ }
+ tensor->name[i] = '\0';
return tensor;
}
@@ -5014,7 +5105,7 @@ struct ggml_tensor * ggml_mean(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
@@ -5037,7 +5128,7 @@ struct ggml_tensor * ggml_argmax(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
is_node = true;
}
@@ -5360,7 +5451,7 @@ static struct ggml_tensor * ggml_norm_impl(
bool is_node = false;
if (!inplace && (a->grad)) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -5459,17 +5550,19 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_groups,
+ float eps,
bool inplace) {
bool is_node = false;
if (!inplace && (a->grad)) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
- result->op_params[0] = n_groups;
+ ggml_set_op_params_i32(result, 0, n_groups);
+ ggml_set_op_params_f32(result, 1, eps);
result->op = GGML_OP_GROUP_NORM;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
@@ -5481,15 +5574,17 @@ static struct ggml_tensor * ggml_group_norm_impl(
struct ggml_tensor * ggml_group_norm(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int n_groups) {
- return ggml_group_norm_impl(ctx, a, n_groups, false);
+ int n_groups,
+ float eps) {
+ return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
}
struct ggml_tensor * ggml_group_norm_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
- int n_groups) {
- return ggml_group_norm_impl(ctx, a, n_groups, true);
+ int n_groups,
+ float eps) {
+ return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
}
// ggml_mul_mat
@@ -5877,7 +5972,7 @@ struct ggml_tensor * ggml_reshape(
if (b->grad) {
// gradient propagation is not supported
- //GGML_ASSERT(false);
+ //GGML_ABORT("fatal error");
}
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
@@ -6660,7 +6755,7 @@ struct ggml_tensor * ggml_clamp(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -6736,7 +6831,7 @@ GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -6808,7 +6903,7 @@ struct ggml_tensor * ggml_im2col(
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -6894,7 +6989,7 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
bool is_node = false;
if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -6935,7 +7030,7 @@ struct ggml_tensor * ggml_pool_1d(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -6973,7 +7068,7 @@ struct ggml_tensor * ggml_pool_2d(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7006,7 +7101,7 @@ static struct ggml_tensor * ggml_upscale_impl(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7056,7 +7151,7 @@ struct ggml_tensor * ggml_pad(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7105,7 +7200,7 @@ struct ggml_tensor * ggml_timestep_embedding(
bool is_node = false;
if (timesteps->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7231,7 +7326,7 @@ struct ggml_tensor * ggml_flash_attn_back(
struct ggml_tensor * v,
struct ggml_tensor * d,
bool masked) {
- GGML_ASSERT(false && "TODO: adapt to ggml_flash_attn_ext() changes");
+ GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
GGML_ASSERT(ggml_can_mul_mat(k, q));
// TODO: check if vT can be multiplied by (k*qT)
@@ -7330,7 +7425,7 @@ struct ggml_tensor * ggml_ssm_conv(
bool is_node = false;
if (s->grad || x->grad || c->grad || sq->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
@@ -7384,7 +7479,7 @@ struct ggml_tensor * ggml_ssm_scan(
bool is_node = false;
if (s->grad || x->grad || dt->grad || A->grad || B->grad || C->grad || sq->grad) {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
is_node = true;
}
@@ -7416,7 +7511,7 @@ struct ggml_tensor * ggml_win_part(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7454,7 +7549,7 @@ struct ggml_tensor * ggml_win_unpart(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -7484,7 +7579,7 @@ struct ggml_tensor * ggml_get_rel_pos(
bool is_node = false;
if (a->grad) {
- GGML_ASSERT(false); // TODO: implement backward
+ GGML_ABORT("fatal error"); // TODO: implement backward
is_node = true;
}
@@ -8174,7 +8269,7 @@ static void ggml_compute_forward_dup_f16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
@@ -8216,7 +8311,7 @@ static void ggml_compute_forward_dup_f16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
return;
@@ -8333,7 +8428,7 @@ static void ggml_compute_forward_dup_f16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
@@ -8460,7 +8555,7 @@ static void ggml_compute_forward_dup_bf16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
@@ -8520,7 +8615,7 @@ static void ggml_compute_forward_dup_bf16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
return;
@@ -8689,7 +8784,7 @@ static void ggml_compute_forward_dup_bf16(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
@@ -8775,7 +8870,7 @@ static void ggml_compute_forward_dup_f32(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
} else {
//printf("%s: this is not optimal - fix me\n", __func__);
@@ -8835,7 +8930,7 @@ static void ggml_compute_forward_dup_f32(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
@@ -9006,7 +9101,7 @@ static void ggml_compute_forward_dup_f32(
}
}
} else {
- GGML_ASSERT(false); // TODO: implement
+ GGML_ABORT("fatal error"); // TODO: implement
}
}
@@ -9184,8 +9279,8 @@ static void ggml_compute_forward_dup(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -9337,7 +9432,7 @@ static void ggml_compute_forward_add_f16_f32(
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
@@ -9412,7 +9507,7 @@ static void ggml_compute_forward_add_bf16_f32(
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
@@ -9464,7 +9559,7 @@ static void ggml_compute_forward_add_f16_f16(
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
@@ -9516,7 +9611,7 @@ static void ggml_compute_forward_add_bf16_bf16(
}
else {
// src1 is not contiguous
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
}
@@ -9610,7 +9705,7 @@ static void ggml_compute_forward_add(
ggml_compute_forward_add_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_F16:
@@ -9622,7 +9717,7 @@ static void ggml_compute_forward_add(
ggml_compute_forward_add_f16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_BF16:
@@ -9634,7 +9729,7 @@ static void ggml_compute_forward_add(
ggml_compute_forward_add_bf16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_Q4_0:
@@ -9672,8 +9767,8 @@ static void ggml_compute_forward_add(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10007,7 +10102,7 @@ static void ggml_compute_forward_add1(
ggml_compute_forward_add1_f16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_BF16:
@@ -10019,7 +10114,7 @@ static void ggml_compute_forward_add1(
ggml_compute_forward_add1_bf16_f32(params, dst);
}
else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_TYPE_Q4_0:
@@ -10058,8 +10153,8 @@ static void ggml_compute_forward_add1(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10191,8 +10286,8 @@ static void ggml_compute_forward_acc(
case GGML_TYPE_Q4_0_8_8:
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10272,8 +10367,8 @@ static void ggml_compute_forward_sub(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10383,8 +10478,8 @@ static void ggml_compute_forward_mul(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10474,8 +10569,8 @@ static void ggml_compute_forward_div(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10519,8 +10614,8 @@ static void ggml_compute_forward_sqr(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10564,8 +10659,8 @@ static void ggml_compute_forward_sqrt(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10609,8 +10704,8 @@ static void ggml_compute_forward_log(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10738,8 +10833,8 @@ static void ggml_compute_forward_sum(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10791,8 +10886,8 @@ static void ggml_compute_forward_sum_rows(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10848,8 +10943,8 @@ static void ggml_compute_forward_mean(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -10896,8 +10991,8 @@ static void ggml_compute_forward_argmax(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11014,8 +11109,8 @@ static void ggml_compute_forward_repeat(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11092,8 +11187,8 @@ static void ggml_compute_forward_repeat_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11161,8 +11256,8 @@ static void ggml_compute_forward_concat(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11205,8 +11300,8 @@ static void ggml_compute_forward_abs(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11249,8 +11344,8 @@ static void ggml_compute_forward_sgn(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11293,8 +11388,8 @@ static void ggml_compute_forward_neg(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11337,8 +11432,8 @@ static void ggml_compute_forward_step(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11380,8 +11475,8 @@ static void ggml_compute_forward_tanh(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11424,8 +11519,8 @@ static void ggml_compute_forward_elu(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11468,8 +11563,8 @@ static void ggml_compute_forward_relu(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11512,8 +11607,8 @@ static void ggml_compute_forward_sigmoid(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11571,8 +11666,8 @@ static void ggml_compute_forward_gelu(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11630,8 +11725,8 @@ static void ggml_compute_forward_gelu_quick(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11689,8 +11784,8 @@ static void ggml_compute_forward_silu(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
// ggml_compute_forward_leaky_relu
@@ -11738,8 +11833,8 @@ static void ggml_compute_forward_leaky_relu(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11801,8 +11896,8 @@ static void ggml_compute_forward_silu_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11843,8 +11938,8 @@ static void ggml_compute_forward_hardswish(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11885,8 +11980,8 @@ static void ggml_compute_forward_hardsigmoid(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -11957,8 +12052,8 @@ static void ggml_compute_forward_norm(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -12025,8 +12120,8 @@ static void ggml_compute_forward_rms_norm(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -12198,8 +12293,8 @@ static void ggml_compute_forward_rms_norm_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -12220,10 +12315,11 @@ static void ggml_compute_forward_group_norm_f32(
GGML_TENSOR_UNARY_OP_LOCALS
- const float eps = 1e-6f; // TODO: make this a parameter
-
// TODO: optimize
+ float eps;
+ memcpy(&eps, dst->op_params + 1, sizeof(float));
+
int n_channels = src0->ne[2];
int n_groups = dst->op_params[0];
int n_channels_per_group = (n_channels + n_groups - 1) / n_groups;
@@ -12292,8 +12388,8 @@ static void ggml_compute_forward_group_norm(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -12544,6 +12640,8 @@ UseGgmlGemm1:;
IQK_MulMat_Not_Available2:;
#endif
+ ggml_barrier(params->shared);
+
#if GGML_USE_LLAMAFILE
if (src1->type != vec_dot_type) {
const size_t row_size = ggml_row_size(vec_dot_type, ne10);
@@ -12802,6 +12900,34 @@ IQK_MulMat_Not_Available:;
continue;
}
+ if (((ggml_n_dims(src0) - 1) == 2) && gemv) {
+ int64_t src0_cur_start = (ith * ne01) / nth;
+ int64_t src0_cur_end = ((ith + 1) * ne01) / nth;
+ src0_cur_start = (src0_cur_start % matmul_num_cols) ? src0_cur_start + matmul_num_cols - (src0_cur_start % matmul_num_cols): src0_cur_start;
+ src0_cur_end = (src0_cur_end % matmul_num_cols) ? src0_cur_end + matmul_num_cols - (src0_cur_end % matmul_num_cols): src0_cur_end;
+ if (src0_cur_start >= src0_cur_end) return;
+
+ for (int ir1 = 0; ir1 < nr1; ir1++) {
+ struct mmid_row_mapping row_mapping = MMID_MATRIX_ROW(cur_a, ir1);
+ const int id = row_mapping.i1; // selected expert index
+
+ const int64_t i11 = id % ne11;
+ const int64_t i12 = row_mapping.i2; // row index in src1
+
+ const int64_t i1 = id; // selected expert index
+ const int64_t i2 = i12; // row
+
+ const char * src1_col = (const char *) wdata +
+ (src1_cont || src1->type != vec_dot_type
+ ? (i11 + i12 * ne11) * row_size
+ : (i11 * nb11 + i12 * nb12));
+
+ gemv(ne00, (float *)((char *) dst->data + (i1 * nb1 + i2 * nb2)) + src0_cur_start, ne01,
+ (const char *) src0_cur + src0_cur_start * nb01, src1_col, 1, src0_cur_end - src0_cur_start);
+ }
+ continue;
+ }
+
// distribute the thread work across the inner or outer loop based on which one is larger
const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
@@ -13119,17 +13245,17 @@ static void ggml_compute_forward_out_prod(
} break;
case GGML_TYPE_F16:
{
- GGML_ASSERT(false); // todo
+ GGML_ABORT("fatal error"); // todo
// ggml_compute_forward_out_prod_f16_f32(params, dst);
- } break;
+ }
case GGML_TYPE_F32:
{
ggml_compute_forward_out_prod_f32(params, dst);
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13188,8 +13314,8 @@ static void ggml_compute_forward_scale(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13312,8 +13438,8 @@ static void ggml_compute_forward_set(
case GGML_TYPE_Q4_0_8_8:
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13598,8 +13724,8 @@ static void ggml_compute_forward_get_rows(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
//static bool first = true;
@@ -13706,8 +13832,8 @@ static void ggml_compute_forward_get_rows_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
//static bool first = true;
@@ -13784,8 +13910,8 @@ static void ggml_compute_forward_diag(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13854,8 +13980,8 @@ static void ggml_compute_forward_diag_mask_inf(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13872,8 +13998,8 @@ static void ggml_compute_forward_diag_mask_zero(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -13990,8 +14116,8 @@ static void ggml_compute_forward_soft_max(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14086,8 +14212,8 @@ static void ggml_compute_forward_soft_max_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14186,8 +14312,8 @@ static void ggml_compute_forward_clamp(
case GGML_TYPE_F64:
case GGML_TYPE_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14516,8 +14642,8 @@ static void ggml_compute_forward_rope(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14540,8 +14666,8 @@ static void ggml_compute_forward_rope_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14740,8 +14866,8 @@ static void ggml_compute_forward_conv_transpose_1d(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -14912,8 +15038,8 @@ static void ggml_compute_forward_im2col(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15024,7 +15150,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
const struct ggml_tensor * src = dst->src[0];
- assert(src->type == GGML_TYPE_F32);
+ assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
if (params->ith != 0) {
return;
@@ -15037,28 +15163,27 @@ static void ggml_compute_forward_pool_1d_sk_p0(
const int64_t rs = dst->ne[0];
while (cdata < data_end) {
- const float * const srow = (const float *)cdata;
-
+ const void * srow = (const void *)cdata;
int j = 0;
-
for (int64_t i = 0; i < rs; ++i) {
switch (op) {
case GGML_OP_POOL_AVG: drow[i] = 0; break;
case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
for (int ki = 0; ki < k; ++ki) {
+ const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
switch (op) {
- case GGML_OP_POOL_AVG: drow[i] += srow[j]; break;
- case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_AVG: drow[i] += srow_j; break;
+ case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
++j;
}
switch (op) {
case GGML_OP_POOL_AVG: drow[i] /= k; break;
case GGML_OP_POOL_MAX: break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
@@ -15092,7 +15217,7 @@ static void ggml_compute_forward_pool_2d(
const struct ggml_tensor * src = dst->src[0];
- GGML_ASSERT(src->type == GGML_TYPE_F32);
+ assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
if (params->ith != 0) {
return;
@@ -15127,7 +15252,7 @@ static void ggml_compute_forward_pool_2d(
switch (op) {
case GGML_OP_POOL_AVG: *out = 0; break;
case GGML_OP_POOL_MAX: *out = -FLT_MAX; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
const int ix = offset0 + ox * s0;
@@ -15135,21 +15260,22 @@ static void ggml_compute_forward_pool_2d(
for (int ky = 0; ky < k1; ++ky) {
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
- const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
+ const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky));
for (int kx = 0; kx < k0; ++kx) {
int j = ix + kx;
if (j < 0 || j >= src->ne[0]) continue;
+ const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
switch (op) {
- case GGML_OP_POOL_AVG: *out += srow[j]; break;
- case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_AVG: *out += srow_j; break;
+ case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
}
switch (op) {
case GGML_OP_POOL_AVG: *out /= ka; break;
case GGML_OP_POOL_MAX: break;
- case GGML_OP_POOL_COUNT: GGML_ASSERT(false); break;
+ case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
}
}
}
@@ -15213,8 +15339,8 @@ static void ggml_compute_forward_upscale(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15271,8 +15397,8 @@ static void ggml_compute_forward_pad(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15312,8 +15438,8 @@ static void ggml_compute_forward_arange(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15363,8 +15489,8 @@ static void ggml_compute_forward_timestep_embedding(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15422,8 +15548,8 @@ static void ggml_compute_forward_argsort(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15645,8 +15771,8 @@ static void ggml_compute_forward_flash_attn_ext(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -15981,8 +16107,8 @@ static void ggml_compute_forward_flash_attn_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16103,8 +16229,8 @@ static void ggml_compute_forward_ssm_conv(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16224,8 +16350,8 @@ static void ggml_compute_forward_ssm_scan(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16287,8 +16413,8 @@ static void ggml_compute_forward_win_part(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16348,8 +16474,8 @@ static void ggml_compute_forward_win_unpart(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16416,8 +16542,8 @@ static void ggml_compute_forward_unary(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16463,8 +16589,8 @@ static void ggml_compute_forward_get_rel_pos(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16544,8 +16670,8 @@ static void ggml_compute_forward_add_rel_pos(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16590,8 +16716,8 @@ static void ggml_compute_forward_map_unary(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16639,8 +16765,8 @@ static void ggml_compute_forward_map_binary(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16838,8 +16964,8 @@ static void ggml_compute_forward_cross_entropy_loss(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -16925,8 +17051,8 @@ static void ggml_compute_forward_cross_entropy_loss_back(
} break;
default:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
@@ -17261,14 +17387,32 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
}
////////////////////////////////////////////////////////////////////////////////
-static size_t ggml_hash_size(size_t min_sz) {
+struct ggml_hash_set ggml_hash_set_new(size_t size) {
+ size = ggml_hash_size(size);
+ struct ggml_hash_set result;
+ result.size = size;
+ result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
+ result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
+ return result;
+}
+
+void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
+ memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
+}
+
+void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
+ GGML_FREE(hash_set->used);
+ GGML_FREE(hash_set->keys);
+}
+
+size_t ggml_hash_size(size_t min_sz) {
// next primes after powers of two
static const size_t primes[] = {
2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
@@ -17279,7 +17423,7 @@ static size_t ggml_hash_size(size_t min_sz) {
};
static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
- // find the smallest prime that is larger or equal to min_sz
+ // find the smallest prime that is larger or equal than min_sz
size_t l = 0;
size_t r = n_primes;
while (l < r) {
@@ -17294,67 +17438,6 @@ static size_t ggml_hash_size(size_t min_sz) {
return sz;
}
-static size_t ggml_hash(const void * p) {
- return (size_t)p;
-}
-
-size_t ggml_hash_find(const struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t h = ggml_hash(key) % hash_set.size;
-
- // linear probing
- size_t i = h;
- while (hash_set.keys[i] != NULL && hash_set.keys[i] != key) {
- i = (i + 1) % hash_set.size;
- if (i == h) {
- // visited all hash table entries -> not found
- return GGML_HASHTABLE_FULL;
- }
- }
- return i;
-}
-
-bool ggml_hash_contains(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
- return i != GGML_HASHTABLE_FULL && hash_set.keys[i] == key;
-}
-
-size_t ggml_hash_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
-
- GGML_ASSERT(i != GGML_HASHTABLE_FULL);
-
- if (hash_set.keys[i] == key) {
- return GGML_HASHTABLE_ALREADY_EXISTS;
- }
-
- // insert
- GGML_ASSERT(hash_set.keys[i] == NULL);
- hash_set.keys[i] = key;
- return i;
-}
-
-size_t ggml_hash_find_or_insert(struct ggml_hash_set hash_set, struct ggml_tensor * key) {
- size_t i = ggml_hash_find(hash_set, key);
-
- GGML_ASSERT(i != GGML_HASHTABLE_FULL);
-
- hash_set.keys[i] = key;
- return i;
-}
-
-struct ggml_hash_set ggml_hash_set_new(size_t size) {
- size = ggml_hash_size(size);
- struct ggml_hash_set result;
- result.size = size;
- result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
- memset(result.keys, 0, sizeof(struct ggml_tensor *) * size);
- return result;
-}
-
-static void ggml_hash_set_free(struct ggml_hash_set hash_set) {
- GGML_FREE(hash_set.keys);
-}
-
struct hash_map {
struct ggml_hash_set set;
struct ggml_tensor ** vals;
@@ -17363,13 +17446,12 @@ struct hash_map {
static struct hash_map * ggml_new_hash_map(size_t size) {
struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
result->set = ggml_hash_set_new(size);
- result->vals = GGML_MALLOC(sizeof(struct ggml_tensor *) * result->set.size);
- memset(result->vals, 0, sizeof(struct ggml_tensor *) * result->set.size);
+ result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
return result;
}
static void ggml_hash_map_free(struct hash_map * map) {
- ggml_hash_set_free(map->set);
+ ggml_hash_set_free(&map->set);
GGML_FREE(map->vals);
GGML_FREE(map);
}
@@ -17390,7 +17472,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
return node;
}
- if (!ggml_hash_contains(graph->visited_hash_table, node)) {
+ if (!ggml_hash_contains(&graph->visited_hash_set, node)) {
return node;
}
@@ -17405,8 +17487,8 @@ static struct ggml_tensor * ggml_recompute_graph_node(
return node;
}
- size_t i = ggml_hash_find(replacements->set, node);
- GGML_ASSERT(i != GGML_HASHTABLE_FULL); // assert that not full
+ size_t i = ggml_hash_find(&replacements->set, node);
+ GGML_ASSERT(i != GGML_HASHSET_FULL); // assert that not full
if (replacements->set.keys[i] == node) {
return replacements->vals[i];
}
@@ -17464,8 +17546,8 @@ void ggml_build_backward_gradient_checkpointing(
// insert checkpoints in replacements
for (int i = 0; i < n_checkpoints; ++i) {
- size_t k = ggml_hash_find(replacements->set, checkpoints[i]);
- GGML_ASSERT(k != GGML_HASHTABLE_FULL); // assert that not full
+ size_t k = ggml_hash_find(&replacements->set, checkpoints[i]);
+ GGML_ASSERT(k != GGML_HASHSET_FULL); // assert that not full
GGML_ASSERT(replacements->set.keys[k] == NULL); // assert that we don't overwrite
replacements->set.keys[k] = checkpoints[i];
replacements->vals[k] = checkpoints[i];
@@ -17493,7 +17575,7 @@ void ggml_build_backward_gradient_checkpointing(
// functions to change gradients considering the case that input a might be initial gradient with zero value
-static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return b;
} else {
@@ -17501,7 +17583,7 @@ static struct ggml_tensor * ggml_add_or_set(struct ggml_context * ctx, struct gg
}
}
-static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, size_t nb1, size_t nb2, size_t nb3, size_t offset, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
struct ggml_tensor * a_zero = ggml_scale(ctx, a, 0.0f);
return ggml_acc_impl(ctx, a_zero, b, nb1, nb2, nb3, offset, false);
@@ -17510,7 +17592,7 @@ static struct ggml_tensor * ggml_acc_or_set(struct ggml_context * ctx, struct gg
}
}
-static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return ggml_repeat(ctx, b, a);
} else {
@@ -17518,7 +17600,7 @@ static struct ggml_tensor * ggml_add1_or_set(struct ggml_context * ctx, struct g
}
}
-static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set zero_table) {
+static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b, struct ggml_hash_set * zero_table) {
if (ggml_hash_contains(zero_table, a)) {
return ggml_neg(ctx, b);
} else {
@@ -17526,7 +17608,7 @@ static struct ggml_tensor * ggml_sub_or_set(struct ggml_context * ctx, struct gg
}
}
-static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set zero_table) {
+static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor * tensor, struct ggml_hash_set * zero_table) {
struct ggml_tensor * src0 = tensor->src[0];
struct ggml_tensor * src1 = tensor->src[1];
struct ggml_tensor * src2 = tensor->src[2];
@@ -17695,8 +17777,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
case GGML_OP_MEAN:
case GGML_OP_ARGMAX:
{
- GGML_ASSERT(false); // TODO: implement
- } break;
+ GGML_ABORT("fatal error"); // TODO: implement
+ }
case GGML_OP_REPEAT:
{
// necessary for llama
@@ -17719,16 +17801,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_CONCAT:
{
- GGML_ASSERT(false); // TODO: implement
- } break;
+ GGML_ABORT("fatal error"); // TODO: implement
+ }
case GGML_OP_SILU_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_NORM:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_RMS_NORM:
{
// necessary for llama
@@ -17744,12 +17826,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_RMS_NORM_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_GROUP_NORM:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_MUL_MAT:
{
// https://cs231n.github.io/optimization-2/#staged
@@ -17810,12 +17892,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_MUL_MAT_ID:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_OUT_PROD:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_SCALE:
{
// necessary for llama
@@ -17991,12 +18073,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_GET_ROWS_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_DIAG:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_DIAG_MASK_INF:
{
// necessary for llama
@@ -18034,8 +18116,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_SOFT_MAX_BACK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ROPE:
{
// necessary for llama
@@ -18110,52 +18192,52 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_CLAMP:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_CONV_TRANSPOSE_1D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_IM2COL:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_CONV_TRANSPOSE_2D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_POOL_1D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_POOL_2D:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_UPSCALE:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_PAD:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ARANGE:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_TIMESTEP_EMBEDDING:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_ARGSORT:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_LEAKY_RELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_FLASH_ATTN_EXT:
{
struct ggml_tensor * flash_grad = NULL;
@@ -18211,13 +18293,13 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_FLASH_ATTN_BACK:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_SSM_CONV:
case GGML_OP_SSM_SCAN:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_OP_WIN_PART:
case GGML_OP_WIN_UNPART:
case GGML_OP_UNARY:
@@ -18255,12 +18337,12 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_UNARY_OP_TANH:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_ELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_RELU:
{
if (src0->grad) {
@@ -18274,16 +18356,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_UNARY_OP_SIGMOID:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_GELU:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_GELU_QUICK:
{
- GGML_ASSERT(false); // TODO: not implemented
- } break;
+ GGML_ABORT("fatal error"); // TODO: not implemented
+ }
case GGML_UNARY_OP_SILU:
{
// necessary for llama
@@ -18295,7 +18377,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
}
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_OP_GET_REL_POS:
@@ -18309,8 +18391,8 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
case GGML_OP_MAP_CUSTOM2:
case GGML_OP_MAP_CUSTOM3:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_CROSS_ENTROPY_LOSS:
{
if (src0->grad) {
@@ -18325,16 +18407,16 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
} break;
case GGML_OP_CROSS_ENTROPY_LOSS_BACK:
{
- GGML_ASSERT(false); // not supported
- } break;
+ GGML_ABORT("fatal error"); // not supported
+ }
case GGML_OP_NONE:
{
// nop
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
for (int i = 0; i < GGML_MAX_SRC; ++i) {
@@ -18354,7 +18436,7 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor *
}
// check if already visited
- if (ggml_hash_insert(cgraph->visited_hash_table, node) == GGML_HASHTABLE_ALREADY_EXISTS) {
+ if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) {
return;
}
@@ -18400,7 +18482,6 @@ static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_ten
}
const int n0 = cgraph->n_nodes;
- UNUSED(n0);
ggml_visit_parents(cgraph, tensor);
@@ -18436,7 +18517,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
struct ggml_hash_set zero_table = ggml_hash_set_new(gf->size);
for (int i = 0; i < gf->n_nodes; i++) {
if (gf->grads[i]) {
- ggml_hash_insert(zero_table, gf->grads[i]);
+ ggml_hash_insert(&zero_table, gf->grads[i]);
}
}
@@ -18446,7 +18527,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
// inplace operations to add gradients are not created by ggml_compute_backward
// use allocator to automatically make inplace operations
if (node->grad) {
- ggml_compute_backward(ctx, node, zero_table);
+ ggml_compute_backward(ctx, node, &zero_table);
}
}
@@ -18459,16 +18540,29 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
}
}
- ggml_hash_set_free(zero_table);
+ ggml_hash_set_free(&zero_table);
+}
+
+static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
+ void * ptr = *p;
+ ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
+ *p = (void *) ((char *) ptr + size);
+ return ptr;
}
static size_t ggml_graph_nbytes(size_t size, bool grads) {
- size_t nbytes = sizeof(struct ggml_cgraph);
- nbytes += size * sizeof(struct ggml_tensor *) * 2; // leafs + nodes
+ size_t hash_size = ggml_hash_size(size * 2);
+ void * p = 0;
+ incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
+ incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
if (grads) {
- nbytes += size * sizeof(struct ggml_tensor *); // grads
+ incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
}
- nbytes += ggml_hash_size(size * 2) * sizeof(struct ggml_tensor *); // hash set
+ incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
+
+ size_t nbytes = (size_t) p;
return nbytes;
}
@@ -18485,19 +18579,19 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
- struct ggml_tensor ** data_start = (struct ggml_tensor **) (cgraph + 1);
-
+ // the size of the hash table is doubled since it needs to hold both nodes and leafs
size_t hash_size = ggml_hash_size(size * 2);
- struct ggml_tensor ** nodes_ptr = data_start;
- struct ggml_tensor ** leafs_ptr = nodes_ptr + size;
- struct ggml_tensor ** hash_keys_ptr = leafs_ptr + size;
- struct ggml_tensor ** grads_ptr = grads ? hash_keys_ptr + hash_size : NULL;
- // check that we allocated the correct amount of memory
- assert(obj_size == (size_t) (
- (grads ? (char *)(grads_ptr + size) : (char *)(hash_keys_ptr + hash_size)) - (char *)cgraph));
+ void * p = cgraph + 1;
- memset(hash_keys_ptr, 0, hash_size * sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
+ struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
+ ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
+
+ // check that we allocated the correct amount of memory
+ assert(obj_size == (size_t)((char *)p - (char *)cgraph));
*cgraph = (struct ggml_cgraph) {
/*.size =*/ size,
@@ -18506,10 +18600,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
/*.nodes =*/ nodes_ptr,
/*.grads =*/ grads_ptr,
/*.leafs =*/ leafs_ptr,
- /*.hash_table =*/ { hash_size, hash_keys_ptr },
+ /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
};
+ ggml_hash_set_reset(&cgraph->visited_hash_set);
+
return cgraph;
}
@@ -18525,7 +18621,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
/*.nodes =*/ cgraph0->nodes + i0,
/*.grads =*/ cgraph0->grads ? cgraph0->grads + i0 : NULL,
/*.leafs =*/ NULL,
- /*.hash_table =*/ { 0, NULL },
+ /*.hash_table =*/ { 0, NULL, NULL },
/*.order =*/ cgraph0->order,
};
@@ -18535,7 +18631,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
GGML_ASSERT(dst->size >= src->n_leafs);
GGML_ASSERT(dst->size >= src->n_nodes);
- GGML_ASSERT(dst->visited_hash_table.size >= src->visited_hash_table.size);
+ GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
dst->n_leafs = src->n_leafs;
dst->n_nodes = src->n_nodes;
@@ -18556,9 +18652,9 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
}
}
- for (size_t i = 0; i < src->visited_hash_table.size; ++i) {
- if (src->visited_hash_table.keys[i]) {
- ggml_hash_insert(dst->visited_hash_table, src->visited_hash_table.keys[i]);
+ for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
+ if (src->visited_hash_set.keys[i]) {
+ ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
}
}
}
@@ -18584,7 +18680,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
cgraph->n_leafs = 0;
cgraph->n_nodes = 0;
- memset(cgraph->visited_hash_table.keys, 0, cgraph->visited_hash_table.size * sizeof(struct ggml_tensor *));
+ ggml_hash_set_reset(&cgraph->visited_hash_set);
}
//
@@ -18779,7 +18875,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
n_tasks = MIN(ggml_nrows(node), n_threads);
} break;
default:
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
break;
case GGML_OP_SILU_BACK:
@@ -18906,8 +19002,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
default:
{
fprintf(stderr, "%s: op not implemented: ", __func__);
@@ -18916,8 +19012,8 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
} else {
fprintf(stderr, "%d\n", node->op);
}
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
}
assert(n_tasks > 0);
@@ -19027,7 +19123,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
cur += sizeof(float)*ne00*ne01*ne02;
cur += sizeof(float)*ne10*ne11;
} else {
- GGML_ASSERT(false);
+ GGML_ABORT("fatal error");
}
} break;
case GGML_OP_CONV_TRANSPOSE_2D:
@@ -19073,8 +19169,8 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
} break;
case GGML_OP_COUNT:
{
- GGML_ASSERT(false);
- } break;
+ GGML_ABORT("fatal error");
+ }
default:
break;
}
@@ -20308,9 +20404,9 @@ static enum ggml_opt_result linesearch_backtracking(
(*step) *= width;
}
- GGML_ASSERT(false && "line search failed");
+ GGML_ABORT("line search failed");
- return GGML_LINESEARCH_FAIL;
+ //return GGML_LINESEARCH_FAIL;
}
static enum ggml_opt_result ggml_opt_lbfgs(
@@ -20578,9 +20674,9 @@ static enum ggml_opt_result ggml_opt_lbfgs(
step[0] = 1.0;
}
- GGML_ASSERT(false && "lbfgs failed");
+ GGML_ABORT("lbfgs failed");
- return GGML_OPT_RESULT_DID_NOT_CONVERGE;
+ //return GGML_OPT_RESULT_DID_NOT_CONVERGE;
}
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
@@ -20925,7 +21021,7 @@ size_t ggml_quantize_chunk(
case GGML_TYPE_BF16:
{
size_t elemsize = sizeof(ggml_bf16_t);
- ggml_fp32_to_bf16_row(src + start, (ggml_bf16_t *)dst + start, n);
+ ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
result = n * elemsize;
} break;
case GGML_TYPE_F32:
@@ -21283,10 +21379,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
}
} break;
case GGUF_TYPE_ARRAY:
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
} break;
- default: GGML_ASSERT(false && "invalid type");
+ default: GGML_ABORT("invalid type");
}
if (!ok) {
@@ -21453,7 +21549,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
ctx->infos[i].ne[3],
};
- struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, ctx->infos[i].n_dims, ne);
+ int n_dims = ctx->infos[i].n_dims;
+ if (n_dims == 0 || n_dims > 4) {
+ n_dims = 4;
+ for (; n_dims > 1; --n_dims) if (ne[n_dims-1] > 1) break;
+ }
+ struct ggml_tensor * cur = ggml_new_tensor(ctx_data, ctx->infos[i].type, n_dims, ne);
ok = ok && cur != NULL;
@@ -21867,12 +21968,12 @@ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
GGML_FREE((void *)data);
} else if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
- GGML_ASSERT(false && "nested arrays not supported");
+ GGML_ABORT("nested arrays not supported");
} else {
gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
}
} break;
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
}
}
@@ -21881,7 +21982,7 @@ void gguf_add_tensor(
struct gguf_context * ctx,
const struct ggml_tensor * tensor) {
if (gguf_find_tensor(ctx, tensor->name) != -1) {
- GGML_ASSERT(false && "duplicated tensor name");
+ GGML_ABORT("duplicated tensor name");
}
const int idx = ctx->header.n_tensors;
@@ -21914,7 +22015,7 @@ void gguf_add_tensor(
void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggml_type type) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
- GGML_ASSERT(false && "tensor not found");
+ GGML_ABORT("tensor not found");
}
ctx->infos[idx].type = type;
@@ -21923,7 +22024,7 @@ void gguf_set_tensor_type(struct gguf_context * ctx, const char * name, enum ggm
void gguf_set_tensor_data(struct gguf_context * ctx, const char * name, const void * data, size_t size) {
const int idx = gguf_find_tensor(ctx, name);
if (idx < 0) {
- GGML_ASSERT(false && "tensor not found");
+ GGML_ABORT("tensor not found");
}
ctx->infos[idx].data = data;
@@ -22052,10 +22153,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
}
} break;
case GGUF_TYPE_ARRAY:
- default: GGML_ASSERT(false && "invalid type"); break;
+ default: GGML_ABORT("invalid type");
}
} break;
- default: GGML_ASSERT(false && "invalid type");
+ default: GGML_ABORT("invalid type");
}
}
@@ -22116,7 +22217,7 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
void gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = ggml_fopen(fname, "wb");
if (!file) {
- GGML_ASSERT(false && "failed to open file for writing");
+ GGML_ABORT("failed to open file for writing");
}
struct gguf_buf buf = gguf_buf_init(16*1024);