diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2024-10-10 18:21:24 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-10 18:21:24 +0300 |
commit | 70aca0b75c7b6658cdb5fe9d098d1fbda629e907 (patch) | |
tree | 71b066ffce556d9249dc0584a39900ac5db452da /examples/quantize-stats/quantize-stats.cpp | |
parent | b30c9e10d8710a49b2d2ab98d086b9f11bfaa228 (diff) |
Better model info (#84)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/quantize-stats/quantize-stats.cpp')
-rw-r--r-- | examples/quantize-stats/quantize-stats.cpp | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 6264deb4..88a7d2b9 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -17,6 +17,7 @@ #include <vector> #include <thread> #include <mutex> +#include <array> #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -227,6 +228,31 @@ static void test_roundtrip_on_layer( } } +static void print_fp_stats(const char * msg, const uint64_t * counts) { + printf("===== %s\n", msg); + uint64_t tot = 0; for (int i = 0; i < 32; ++i) tot += counts[i]; + double norm = 1./tot; + for (int i = 0; i < 32; ++i) { + if (!counts[i]) continue; + uint16_t val = i << 10; + float f = ggml_fp16_to_fp32(val); + printf("%2d %f %g\n", i, norm*counts[i], f); + } +} + +static void analyze_tensor_fp(const ggml_tensor * t, uint64_t * H) { + if (t->type != GGML_TYPE_F16) return; + if (!ggml_is_contiguous(t)) return; + int n = ggml_nelements(t); + const uint16_t * x = (const uint16_t *)t->data; + std::array<uint64_t, 32> counts = {}; + for (int j = 0; j < n; ++j) { + ++counts[(x[j] >> 10) & 31]; + } + for (int i = 0; i < 32; ++i) H[i] += counts[i]; + print_fp_stats(t->name, counts.data()); +} + int main(int argc, char ** argv) { ggml_time_init(); @@ -236,6 +262,7 @@ int main(int argc, char ** argv) { int max_thread = 0; bool invalid_param = false; + bool analyze_fp = false; std::string arg; for (int i = 1; i < argc; i++) { arg = argv[i]; @@ -249,6 +276,8 @@ int main(int argc, char ** argv) { params.verbose = true; } else if (arg == "-p" || arg == "--per-layer-stats") { params.per_layer_stats = true; + } else if (arg == "-afp" || arg == "--analyze-fp") { + analyze_fp = true; } else if (arg == "--histogram") { params.print_histogram = true; } else if (arg == "-m" || arg == "--model") { @@ -375,6 +404,22 @@ int main(int argc, char ** argv) { std::vector<char> quantized_scratch; std::vector<float> output_scratch; + if (analyze_fp) { + for (const auto& kv_tensor : tensors) { + if (!layer_included(params, kv_tensor.first)) { + continue; + } + if (kv_tensor.second->ne[0] == 1 || kv_tensor.second->ne[1] == 1) { + // we never quantize those + continue; + } + std::array<uint64_t, 32> H = {}; + analyze_tensor_fp(kv_tensor.second, H.data()); + print_fp_stats("Total", H.data()); + } + return 0; + } + // loop throught quantization types for (int i = 0; i < GGML_TYPE_COUNT; i++) { const ggml_type type = (ggml_type) i; |