summaryrefslogtreecommitdiff
path: root/tests/test-backend-ops.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test-backend-ops.cpp')
-rw-r--r--tests/test-backend-ops.cpp87
1 files changed, 66 insertions, 21 deletions
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 21adba42..02daad24 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -101,7 +101,7 @@ static std::vector<float> tensor_to_float(const ggml_tensor * t) {
} else if (t->type == GGML_TYPE_I8) {
tv.push_back((float)*(int8_t *) &buf[i]);
} else if (quantized) {
- tt.to_float(&buf[i], vq.data(), ggml_blck_size(t->type));
+ tt.to_float(&buf[i], vq.data(), bs);
tv.insert(tv.end(), vq.begin(), vq.end());
} else {
GGML_ASSERT(false);
@@ -948,14 +948,14 @@ struct test_mul_mat_id : public test_case {
const ggml_type type_a;
const ggml_type type_b;
const int n_mats;
- const int id;
+ const int n_used;
+ const bool b; // brodcast b matrix
const int64_t m;
const int64_t n;
const int64_t k;
- const bool v; // view (non-contiguous ids)
std::string vars() override {
- return VARS_TO_STR8(type_a, type_b, n_mats, id, m, n, k, v);
+ return VARS_TO_STR8(type_a, type_b, n_mats, n_used, b, m, n, k);
}
double max_nmse_err() override {
@@ -972,20 +972,22 @@ struct test_mul_mat_id : public test_case {
}
test_mul_mat_id(ggml_type type_a = GGML_TYPE_F32, ggml_type type_b = GGML_TYPE_F32,
- int n_mats = 2, int id = 0,
- int64_t m = 32, int64_t n = 32, int64_t k = 32, bool v = false)
- : type_a(type_a), type_b(type_b), n_mats(n_mats), id(id),
- m(m), n(n), k(k), v(v) {}
+ int n_mats = 8, int n_used = 2, bool b = false,
+ int64_t m = 32, int64_t n = 32, int64_t k = 32)
+ : type_a(type_a), type_b(type_b), n_mats(n_mats), n_used(n_used), b(b),
+ m(m), n(n), k(k) {
+ GGML_ASSERT(n_used <= n_mats);
+ }
ggml_tensor * build_graph(ggml_context * ctx) override {
// C^T = A * B^T: (k, m) * (k, n) => (m, n)
- ggml_tensor * mats = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
+ ggml_tensor * as = ggml_new_tensor_3d(ctx, type_a, k, m, n_mats);
ggml_tensor * ids = ggml_new_tensor_2d(ctx, GGML_TYPE_I32, n_mats, n);
- if (v) {
- ids = ggml_view_2d(ctx, ids, n_mats/2, ids->ne[1], ids->nb[1], 0);
+ if (n_used != n_mats) {
+ ids = ggml_view_2d(ctx, ids, n_used, n, ids->nb[1], 0);
}
- ggml_tensor * b = ggml_new_tensor_2d(ctx, type_b, k, n);
- ggml_tensor * out = ggml_mul_mat_id(ctx, mats, ids, v ? id/2 : id, b);
+ ggml_tensor * b = ggml_new_tensor_3d(ctx, type_b, k, this->b ? 1 : n_used, n);
+ ggml_tensor * out = ggml_mul_mat_id(ctx, as, b, ids);
return out;
}
@@ -1611,7 +1613,6 @@ public:
}
};
-
// Llama
struct test_llama : public test_llm {
static constexpr float freq_base = 10000.0f;
@@ -1875,6 +1876,25 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
};
+ const ggml_type base_types[] = {
+ GGML_TYPE_F32, GGML_TYPE_F16,
+ GGML_TYPE_Q4_0,
+ GGML_TYPE_Q4_K,
+ GGML_TYPE_IQ2_XXS
+ };
+
+ const ggml_type other_types[] = {
+ GGML_TYPE_Q4_1,
+ GGML_TYPE_Q5_0, GGML_TYPE_Q5_1,
+ GGML_TYPE_Q8_0,
+ GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
+ GGML_TYPE_Q5_K,
+ GGML_TYPE_Q6_K,
+ GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
+ GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S, GGML_TYPE_IQ1_M,
+ GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S, GGML_TYPE_IQ4_XS,
+ };
+
// unary ops
for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) {
test_cases.emplace_back(new test_unary((ggml_unary_op) op));
@@ -1983,7 +2003,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 10, 10, 10}, eps));
}
- for (ggml_type type_a : all_types) {
+ for (ggml_type type_a : base_types) {
for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) {
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {10, 1}, {1, 1}));
@@ -2003,6 +2023,12 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
}
}
+ for (ggml_type type_a : other_types) {
+ for (ggml_type type_b : {GGML_TYPE_F32}) {
+ test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, { 1, 1}, {1, 1}));
+ }
+ }
+
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 128, { 8, 1}, {1, 1}));
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 83, 2, 128, { 8, 1}, {4, 1}));
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 2, 64, { 8, 1}, {4, 1}));
@@ -2010,13 +2036,32 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 64, 45, 128, { 8, 1}, {4, 1}));
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128, 45, 64, { 8, 1}, {4, 1}));
- for (ggml_type type_a : all_types) {
+ for (ggml_type type_a : base_types) {
for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
- for (int n_mats : {2, 4, 8}) {
- for (int id = 0; id < n_mats; id++) {
- for (bool v : {false, true}) {
- test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 16, 1, 256, v));
- test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, id, 16, 16, 256, v));
+ for (int n_mats : {4, 8}) {
+ for (int n_used : {1, 2, 4}) {
+ for (bool b : {false, true}) {
+ for (int n : {1, 32}) {
+ int m = 512;
+ int k = 256;
+ test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, n_used, b, m, n, k));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (ggml_type type_a : other_types) {
+ for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) {
+ for (int n_mats : {4}) {
+ for (int n_used : {2}) {
+ for (bool b : {false}) {
+ for (int n : {1}) {
+ int m = 512;
+ int k = 256;
+ test_cases.emplace_back(new test_mul_mat_id(type_a, type_b, n_mats, n_used, b, m, n, k));
+ }
}
}
}