diff options
Diffstat (limited to 'tests/test-backend-ops.cpp')
-rw-r--r-- | tests/test-backend-ops.cpp | 46 |
1 files changed, 28 insertions, 18 deletions
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 22a7856d..55ce14e0 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -16,39 +16,37 @@ #include <vector> static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) { + // static RNG initialization (revisit if n_threads stops being constant) + static const size_t n_threads = std::thread::hardware_concurrency(); + static std::vector<std::default_random_engine> generators = []() { + std::random_device rd; + std::vector<std::default_random_engine> vec; + vec.reserve(n_threads); + //for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(1234 + i); } // fixed seed + for (size_t i = 0; i < n_threads; i++) { vec.emplace_back(rd()); } + return vec; + }(); + size_t size = ggml_nelements(tensor); std::vector<float> data(size); -#if 0 - static std::default_random_engine generator(1234); - std::uniform_real_distribution<float> distribution(min, max); - - for (size_t i = 0; i < size; i++) { - data[i] = distribution(generator); - } -#else - auto init_thread = [&](size_t start, size_t end) { - std::random_device rd; - std::default_random_engine generator(rd()); + auto init_thread = [&](size_t ith, size_t start, size_t end) { std::uniform_real_distribution<float> distribution(min, max); - for (size_t i = start; i < end; i++) { - data[i] = distribution(generator); + data[i] = distribution(generators[ith]); } }; - size_t n_threads = std::thread::hardware_concurrency(); std::vector<std::thread> threads; threads.reserve(n_threads); for (size_t i = 0; i < n_threads; i++) { size_t start = i*size/n_threads; size_t end = (i+1)*size/n_threads; - threads.emplace_back(init_thread, start, end); + threads.emplace_back(init_thread, i, start, end); } for (auto & t : threads) { t.join(); } -#endif if (tensor->type == GGML_TYPE_F32 || tensor->type == GGML_TYPE_I32) { ggml_backend_tensor_set(tensor, data.data(), 0, size * sizeof(float)); @@ -56,7 +54,16 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m GGML_ASSERT(size % ggml_blck_size(tensor->type) == 0); std::vector<uint8_t> dataq(ggml_row_size(tensor->type, size)); int64_t hist[16]; - ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, nullptr); + std::vector<float> imatrix(tensor->ne[0], 1.0f); // dummy importance matrix + const float * im = imatrix.data(); + if (!ggml_quantize_requires_imatrix(tensor->type)) { + // when the imatrix is optional, we want to test both quantization with and without imatrix + // use one of the random numbers to decide + if (data[0] > 0.5f*(min + max)) { + im = nullptr; + } + } + ggml_quantize_chunk(tensor->type, data.data(), dataq.data(), 0, size/tensor->ne[0], tensor->ne[0], hist, im); ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size()); } else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) { // This is going to create some weird integers though. @@ -1472,7 +1479,8 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op GGML_TYPE_Q8_0, GGML_TYPE_Q2_K, GGML_TYPE_Q3_K, GGML_TYPE_Q4_K, GGML_TYPE_Q5_K, - GGML_TYPE_Q6_K + GGML_TYPE_Q6_K, + GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, }; // unary ops @@ -1752,6 +1760,8 @@ int main(int argc, char ** argv) { return 1; } + ggml_quantize_free(); + printf("\033[1;32mOK\033[0m\n"); return 0; } |