summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-17 18:54:56 +0200
committerGitHub <noreply@github.com>2024-01-17 18:54:56 +0200
commit38566680cdfe982a495562332c25b9227de9cf8d (patch)
tree3936732879d0a3146577745232feadb80e5917c9 /tests
parentba69bbc84ced580fe4fdb0713ca2d95634325b7a (diff)
ggml : add IQ2 to test-backend-ops + refactoring (#4990)
* ggml : add IQ2 to test-backend-ops + refactoring ggml-ci * cuda : update supports_op for IQ2 ggml-ci * ci : enable LLAMA_CUBLAS=1 for CUDA nodes ggml-ci * cuda : fix out-of-bounds-access in `mul_mat_vec_q` ggml-ci * tests : avoid creating RNGs for each Q tensor ggml-ci * tests : avoid creating RNGs for each tensor ggml-ci
Diffstat (limited to 'tests')
-rw-r--r--tests/test-backend-ops.cpp46
1 files changed, 28 insertions, 18 deletions
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 22a7856d..55ce14e0 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -16,39 +16,37 @@
#include <vector>
static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
+ // static RNG initialization (revisit if n_threads stops being constant)
+ static const size_t n_threads = std::thread::hardware_concurrency();
+ static std::vector<std::default_random_engine> generators = []() {
+ std::random_device rd;
+ std::vector<std::default_random_engine> vec;
+ vec.reserve(n_threads);
+ //for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed
+ for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); }
+ return vec;
+ }();
+
size_t size = ggml_nelements(tensor);
std::vector<float> data(size);
-#if 0
- static std::default_random_engine generator(1234);
- std::uniform_real_distribution<float> distribution(min, max);
-
- for (size_t i = 0; i < size; i++) {
- data[i] = distribution(generator);
- }
-#else
- auto init_thread = [&](size_t start, size_t end) {
- std::random_device rd;
- std::default_random_engine generator(rd());
+ auto init_thread = [&](size_t ith, size_t start, size_t end) {
std::uniform_real_distribution<float> distribution(min, max);
-
for (size_t i = start; i < end; i++) {
- data[i] = distribution(generator);
+ data[i] = distribution(generators[ith]);
}
};
- size_t n_threads = std::thread::hardware_concurrency();
std::vector<std::thread> threads;
threads.reserve(n_threads);
for (size_t i = 0; i < n_threads; i++) {
size_t start = i*size/n_threads;
size_t end = (i+1)*size/n_threads;
- threads.emplace_back(init_thread, start, end);
+ threads.emplace_back(init_thread, i, start, end);
}
for (auto & t : threads) {
t.join();
}
-#endif
if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) {
ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float));
@@ -56,7 +54,16 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0);
std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size));
int64_t hist[16];
- ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, nullptr);
+ std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix
+ const float * im = imatrix.data();
+ if (!ggml_quantize_requires_imatrix(tensor->type)) {
+ // when the imatrix is optional, we want to test both quantization with and without imatrix
+ // use one of the random numbers to decide
+ if (data[0] > 0.5f*(min + max)) {
+ im = nullptr;
+ }
+ }
+ ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, im);
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
// This is going to create some weird integers though.
@@ -1472,7 +1479,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
GGML_TYPE_Q8_0,
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
- GGML_TYPE_Q6_K
+ GGML_TYPE_Q6_K,
+ GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS,
};
// unary ops
@@ -1752,6 +1760,8 @@ int main(int argc, char ** argv) {
return 1;
}
+ ggml_quantize_free();
+
printf("\033[1;32mOK\033[0m\n");
return 0;
}