summaryrefslogtreecommitdiff
path: root/tests/test-grad0.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-grad0.cpp')
-rw-r--r--tests/test-grad0.cpp165
1 files changed, 119 insertions, 46 deletions
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
index 7b0c0fcd..4f49dc55 100644
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -251,18 +251,20 @@ static bool check_gradient(
printf("GGML_N_THREADS = %d\n", n_threads);
}
- struct ggml_cgraph gf = ggml_build_forward (f);
- struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false);
+ struct ggml_cgraph * gf = ggml_build_forward_ctx(ctx0, f);
+ struct ggml_cgraph * gb = ggml_new_graph(ctx0);
+ *gb = *gf;
+ ggml_build_backward_expand(ctx0, gf, gb, false);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+ ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
- ggml_graph_reset (&gf);
+ ggml_graph_reset (gf);
ggml_set_f32 (f->grad, 1.0f);
- ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
+ ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
- // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot");
- // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot");
+ // ggml_graph_dump_dot(gf, NULL, "test-grad0-forward.dot");
+ // ggml_graph_dump_dot(gb, gf, "test-grad0-backward.dot");
for (int i = 0; i < nargs; ++i) {
const int nelements = ggml_nelements(x[i]);
@@ -273,13 +275,13 @@ static bool check_gradient(
const float xp = x0 + eps;
ggml_set_f32_1d(x[i], k, xp);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+ ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
const double f0 = ggml_get_f32_1d(f, 0);
ggml_set_f32_1d(x[i], k, xm);
- ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+ ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
const double f1 = ggml_get_f32_1d(f, 0);
const double g0 = (f0 - f1)/(2.0*(double) eps);
@@ -287,10 +289,10 @@ static bool check_gradient(
ggml_set_f32_1d(x[i], k, x0);
// compute gradient using backward graph
- ggml_graph_reset (&gf);
+ ggml_graph_reset (gf);
ggml_set_f32 (f->grad, 1.0f);
- ggml_graph_compute_with_ctx(ctx0, &gb, n_threads);
+ ggml_graph_compute_with_ctx(ctx0, gb, n_threads);
const double g1 = ggml_get_f32_1d(x[i]->grad, k);
@@ -373,7 +375,7 @@ static bool check_mat_mul(
int main(int argc, const char ** argv) {
struct ggml_init_params params = {
- /* .mem_size = */ 128*1024*1024,
+ /* .mem_size = */ 256*1024*1024,
/* .mem_buffer = */ NULL,
/* .no_alloc = */ false,
};
@@ -405,6 +407,7 @@ int main(int argc, const char ** argv) {
}
}
+ unsigned seed_iter = 1;
// original loop: 1000
int niter = 4;
@@ -416,6 +419,10 @@ int main(int argc, const char ** argv) {
niter = atoi(argv[1]);
}
for (int iter = 0; iter < niter; ++iter) {
+ srand(seed_iter);
+ seed_iter = rand();
+ unsigned seed = rand();
+
printf("test-grad0: iter:%d/%d\n", iter, niter);
struct ggml_context * ctx0 = ggml_init(params);
@@ -425,6 +432,7 @@ int main(int argc, const char ** argv) {
// add f32
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -441,6 +449,7 @@ int main(int argc, const char ** argv) {
// add f16
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -457,6 +466,7 @@ int main(int argc, const char ** argv) {
// sub
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -473,6 +483,7 @@ int main(int argc, const char ** argv) {
// mul
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -489,6 +500,7 @@ int main(int argc, const char ** argv) {
// div
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -505,6 +517,7 @@ int main(int argc, const char ** argv) {
// sqr
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -521,6 +534,7 @@ int main(int argc, const char ** argv) {
// sqrt
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -537,6 +551,7 @@ int main(int argc, const char ** argv) {
// log
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -553,6 +568,7 @@ int main(int argc, const char ** argv) {
// sum
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -570,6 +586,7 @@ int main(int argc, const char ** argv) {
// sum_rows
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -587,6 +604,7 @@ int main(int argc, const char ** argv) {
// mean, not yet fully implemented
if(0)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -604,6 +622,7 @@ int main(int argc, const char ** argv) {
// argmax
if (0)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -620,6 +639,7 @@ int main(int argc, const char ** argv) {
// repeat
{
+ srand(seed);
int64_t ne2[4];
get_random_dims(ne2, 4);
@@ -642,6 +662,7 @@ int main(int argc, const char ** argv) {
// repeat back
{
+ srand(seed);
int64_t ne2[4];
get_random_dims(ne2, 4);
@@ -680,6 +701,7 @@ int main(int argc, const char ** argv) {
// sgn
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -696,6 +718,7 @@ int main(int argc, const char ** argv) {
// neg
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -712,6 +735,7 @@ int main(int argc, const char ** argv) {
// step
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -729,6 +753,7 @@ int main(int argc, const char ** argv) {
// tanh, not yet fully implemented
if(0)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -745,33 +770,45 @@ int main(int argc, const char ** argv) {
// mul_mat
{
+ srand(seed);
const int nargs = 2;
- for (int ndims = 2; ndims <= 2; ++ndims) {
+ for (int ndims = 2; ndims <= 4; ++ndims) {
+ int max_nrep = (ndims >= 3) ? 2 : 1;
x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f);
- {
- int64_t ne2[4];
- get_random_dims(ne2, 4);
- ne2[0] = ne[0];
- x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
- }
+ for (int nrep2 = 1; nrep2 < max_nrep; ++nrep2) {
+ for (int nrep3 = 1; nrep3 < max_nrep; ++nrep3) {
+ {
+ int64_t ne2[4];
+ get_random_dims(ne2, 4);
+ ne2[0] = ne[0];
+ ne2[2] = nrep2 * ne[2];
+ ne2[3] = nrep3 * ne[3];
+ x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f);
+ }
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
+ ggml_set_param(ctx0, x[0]);
+ ggml_set_param(ctx0, x[1]);
- struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
- struct ggml_tensor * f = ggml_sum(ctx0, m);
+ struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]);
+ struct ggml_tensor * f = ggml_sum(ctx0, m);
- GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
+ GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims);
- check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
- check_mat_mul(m, x[1], x[0]);
+ check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY);
+ if (ndims == 2) {
+ // check_mat_mul does not support ndims > 2
+ check_mat_mul(m, x[1], x[0]);
+ }
+ }
+ }
}
}
// elu, not yet fully implemented
if(0)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -788,6 +825,7 @@ int main(int argc, const char ** argv) {
// relu
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -805,6 +843,7 @@ int main(int argc, const char ** argv) {
// gelu, not yet fully implemented
if(0)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -821,6 +860,7 @@ int main(int argc, const char ** argv) {
// silu
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -842,6 +882,7 @@ int main(int argc, const char ** argv) {
// rms_norm
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -858,6 +899,7 @@ int main(int argc, const char ** argv) {
// scale
{
+ srand(seed);
const int nargs = 2;
int64_t ne2[4];
@@ -878,6 +920,7 @@ int main(int argc, const char ** argv) {
// cpy f32
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -895,6 +938,7 @@ int main(int argc, const char ** argv) {
// cpy f16
{
+ srand(seed);
const int nargs = 2;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -912,6 +956,7 @@ int main(int argc, const char ** argv) {
// reshape (1d->nd)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -935,6 +980,7 @@ int main(int argc, const char ** argv) {
// reshape (nd->1d)
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 2; ++ndims) {
@@ -958,6 +1004,7 @@ int main(int argc, const char ** argv) {
// acc 1d
{
+ srand(seed);
int64_t ne2[4] = { 1, 1, 1, 1 };
const int nargs = 2;
@@ -985,6 +1032,7 @@ int main(int argc, const char ** argv) {
// acc 2d
{
+ srand(seed);
int64_t ne2[4] = { 1, 1, 1, 1 };
int64_t max_offsets[4] = { 0, 0, 0, 0 };
int64_t offsets[4] = { 0, 0, 0, 0 };
@@ -1017,6 +1065,7 @@ int main(int argc, const char ** argv) {
// acc 3d
{
+ srand(seed);
int64_t ne2[4] = { 1, 1, 1, 1 };
int64_t max_offsets[4] = { 0, 0, 0, 0 };
int64_t offsets[4] = { 0, 0, 0, 0 };
@@ -1051,6 +1100,7 @@ int main(int argc, const char ** argv) {
// acc 4d
{
+ srand(seed);
int64_t ne2[4] = { 1, 1, 1, 1 };
int64_t max_offsets[4] = { 0, 0, 0, 0 };
int64_t offsets[4] = { 0, 0, 0, 0 };
@@ -1087,6 +1137,7 @@ int main(int argc, const char ** argv) {
// set_1d
{
+ srand(seed);
int64_t ne2[4];
const int nargs = 2;
@@ -1114,6 +1165,7 @@ int main(int argc, const char ** argv) {
// set_2d
{
+ srand(seed);
int64_t ne2[4];
int64_t max_offsets[4] = { 0, 0, 0, 0 };
int64_t offsets[4] = { 0, 0, 0, 0 };
@@ -1146,6 +1198,7 @@ int main(int argc, const char ** argv) {
// view_1d
{
+ srand(seed);
const int nargs = 1;
for (int ndims = 1; ndims <= 4; ++ndims) {
@@ -1169,6 +1222,7 @@ int main(int argc, const char ** argv) {
// view_2d
{
+ srand(seed);
int64_t ne2[4];
int64_t nb2[4];
@@ -1199,6 +1253,7 @@ int main(int argc, const char ** argv) {
// view_3d
{
+ srand(seed);
int64_t ne2[4] = {1,1,1,1};
int64_t nb2[4] = {0,0,0,0};
@@ -1230,6 +1285,7 @@ int main(int argc, const char ** argv) {
// permute
{
+ srand(seed);
int64_t ne2[4];
const int nargs = 1;
@@ -1263,6 +1319,7 @@ int main(int argc, const char ** argv) {
// transpose
{
+ srand(seed);
int64_t ne2[4];
const int nargs = 1;
@@ -1290,6 +1347,7 @@ int main(int argc, const char ** argv) {
// get_rows
{
+ srand(seed);
int64_t ne2[4] = {ne[0], ne[1], 1, 1};
int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1};
const int nargs = 1;
@@ -1306,6 +1364,7 @@ int main(int argc, const char ** argv) {
// diag_mask_inf
{
+ srand(seed);
const int nargs = 1;
const int ndims = 2;
@@ -1321,6 +1380,7 @@ int main(int argc, const char ** argv) {
// diag_mask_zero
{
+ srand(seed);
const int nargs = 1;
const int ndims = 2;
@@ -1336,6 +1396,7 @@ int main(int argc, const char ** argv) {
// softmax
{
+ srand(seed);
const int nargs = 1;
int64_t ne2[4];
@@ -1357,11 +1418,16 @@ int main(int argc, const char ** argv) {
ggml_new_f32(ctx0, eps))));
check_gradient("softmax", ctx0, x, f, ndims, nargs, 1e-3f, 2e-1f, INFINITY);
+ // NOTE: softmax forward is computed using f16 table lookup instead of using actual expf, but backward assumes actual expf.
+ // this may result in different gradients too finite differences.
+ // when this test reports errors, first try to replace the table lookup with actual expf and test again to see if just that was the cause.
+ // if only the table lookup causes gradients to differ this is acceptable.
}
}
// cross_entropy_loss
{
+ srand(seed);
const int nargs = 1;
int64_t ne2[4];
@@ -1392,6 +1458,7 @@ int main(int argc, const char ** argv) {
// rope f32
{
+ srand(seed);
const int nargs = 1;
int64_t ne2[4];
@@ -1431,6 +1498,7 @@ int main(int argc, const char ** argv) {
// rope f16
{
+ srand(seed);
const int nargs = 1;
int64_t ne2[4];
@@ -1470,6 +1538,7 @@ int main(int argc, const char ** argv) {
// flash_attn f32
{
+ srand(seed);
const int nargs = 3;
int64_t ne2[4];
@@ -1482,28 +1551,31 @@ int main(int argc, const char ** argv) {
for (int masked = 0; masked <= 1; ++masked) {
for (int ndims = 2; ndims <= 4; ++ndims) {
- int64_t neq[4] = { D, N, B, ne[3] };
- int64_t nek[4] = { D, M, B, ne[3] };
- int64_t nev[4] = { M, D, B, ne[3] };
- if (ndims == 2) {
- neq[2] = 1; neq[3] = 1;
- nek[2] = 1; nek[3] = 1;
- nev[2] = 1; nev[3] = 1;
- } else if (ndims == 3) {
- neq[3] = 1;
- nek[3] = 1;
- nev[3] = 1;
- }
- x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
- x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
- x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
- ggml_set_param(ctx0, x[0]);
- ggml_set_param(ctx0, x[1]);
- ggml_set_param(ctx0, x[2]);
+ int max_nrep = (ndims >= 3) ? 2 : 1;
+ for (int nrep = 1; nrep < max_nrep; ++nrep) {
+ int64_t neq[4] = { D, N, B*nrep, ne[3] };
+ int64_t nek[4] = { D, M, B, ne[3] };
+ int64_t nev[4] = { M, D, B, ne[3] };
+ if (ndims == 2) {
+ neq[2] = 1; neq[3] = 1;
+ nek[2] = 1; nek[3] = 1;
+ nev[2] = 1; nev[3] = 1;
+ } else if (ndims == 3) {
+ neq[3] = 1;
+ nek[3] = 1;
+ nev[3] = 1;
+ }
+ x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f);
+ x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f);
+ x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f);
+ ggml_set_param(ctx0, x[0]);
+ ggml_set_param(ctx0, x[1]);
+ ggml_set_param(ctx0, x[2]);
- struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
+ struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0)));
- check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
+ check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, 1e-3f, INFINITY);
+ }
}
}
}
@@ -1511,6 +1583,7 @@ int main(int argc, const char ** argv) {
// flash_attn f16, not yet fully implemented
if(0)
{
+ srand(seed);
const int nargs = 3;
int64_t ne2[4];