summaryrefslogtreecommitdiff
path: root/examples/imatrix
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-02-12 07:20:38 +0200
committerGitHub <noreply@github.com>2025-02-12 07:20:38 +0200
commite974fc9e6691825611c781525ae16d77eed3cbc0 (patch)
tree6c39ac6d5edc6473842fe5c727c890b322f3f9d7 /examples/imatrix
parent3c98bfb33d149a0d9d3bb91604dd12709721e3cf (diff)
Fix imatrix overprotectiveness (#202)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'examples/imatrix')
-rw-r--r--examples/imatrix/imatrix.cpp39
1 files changed, 37 insertions, 2 deletions
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 497c9d14..8006988c 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -39,6 +39,7 @@ struct Stats {
std::vector<float> values;
std::vector<int> counts;
int ncall = 0;
+ int n_as = 1;
};
class IMatrixCollector {
@@ -132,11 +133,15 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
if (e.values.empty()) {
e.values.resize(src1->ne[0]*n_as, 0);
e.counts.resize(src1->ne[0]*n_as, 0);
+ e.n_as = n_as;
}
else if (e.values.size() != (size_t)src1->ne[0]*n_as) {
fprintf(stderr, "Oops: inconsistent size for %s (%d vs %d)\n", wname.c_str(), (int)e.values.size(), (int)src1->ne[0]*n_as);
exit(1); //GGML_ABORT("fatal error");
}
+ else if (e.n_as != n_as) {
+ fprintf(stderr, "Oops: inconsistent n_as for %s (%d vs %d)\n", wname.c_str(), e.n_as, n_as);
+ }
if (m_params.verbosity > 1) {
printf("%s[%d]: %32s, %s, %5d x %5d, %d\n", __func__, m_last_call, wname.c_str(), ggml_op_name(t->op), (int)src1->ne[0], (int)src1->ne[2], (int)src1->type);
}
@@ -258,8 +263,38 @@ void IMatrixCollector::save_imatrix(int ncall) const {
}
if (n_zeros > 0) {
- fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%) - skipping\n", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all);
- continue;
+ fprintf(stderr, "%s: entry '%40s' has partial data (%.2f%%)", __func__, kv.first.c_str(), 100.0f * (n_all - n_zeros) / n_all);
+ bool store_it = false;
+ if (kv.second.n_as > 1) {
+ int n_per_expert = n_all / kv.second.n_as;
+ std::vector<int> bad_experts;
+ bad_experts.reserve(kv.second.n_as);
+ for (int i = 0; i < kv.second.n_as; ++i) {
+ auto counts = kv.second.counts.data() + i*n_per_expert;
+ int nz_i = 0;
+ for (int j = 0; j < n_per_expert; ++j) {
+ if (counts[j] == 0) ++nz_i;
+ }
+ if (nz_i > 0) bad_experts.push_back(i);
+ }
+ fprintf(stderr, " %d out of %d experts are missing data", int(bad_experts.size()), kv.second.n_as);
+ if (bad_experts.size() < round(kv.second.n_as * 0.05)) {
+ fprintf(stderr, " Storing **but be aware**\n");
+ store_it = true;
+ for (auto i : bad_experts) {
+ auto counts = (int *)kv.second.counts.data() + i*n_per_expert;
+ auto values = (float *)kv.second.values.data() + i*n_per_expert;
+ for (int j = 0; j < n_per_expert; ++j) {
+ counts[j] = 1;
+ values[j] = 1;
+ }
+ }
+ }
+ }
+ if (!store_it) {
+ fprintf(stderr, " - skipping\n");
+ continue;
+ }
}
n_entries++;