diff options
Diffstat (limited to 'ggml/src/ggml.c')
-rw-r--r-- | ggml/src/ggml.c | 84 |
1 files changed, 64 insertions, 20 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 08b292b7..2804accd 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -651,24 +651,28 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .blck_size = 1, .type_size = sizeof(int8_t), .is_quantized = false, + .row_meta_size = 0, }, [GGML_TYPE_I16] = { .type_name = "i16", .blck_size = 1, .type_size = sizeof(int16_t), .is_quantized = false, + .row_meta_size = 0, }, [GGML_TYPE_I32] = { .type_name = "i32", .blck_size = 1, .type_size = sizeof(int32_t), .is_quantized = false, + .row_meta_size = 0, }, [GGML_TYPE_I64] = { .type_name = "i64", .blck_size = 1, .type_size = sizeof(int64_t), .is_quantized = false, + .row_meta_size = 0, }, [GGML_TYPE_F64] = { .type_name = "f64", @@ -676,6 +680,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .type_size = sizeof(double), .is_quantized = false, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_F32] = { .type_name = "f32", @@ -685,6 +690,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32, .vec_dot_type = GGML_TYPE_F32, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_F16] = { .type_name = "f16", @@ -697,6 +703,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f16, .vec_dot_type = GGML_TYPE_F16, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q4_0] = { .type_name = "q4_0", @@ -717,6 +724,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { #else .nrows = 1, #endif + .row_meta_size = 0, }, [GGML_TYPE_Q4_1] = { .type_name = "q4_1", @@ -733,6 +741,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { #else .nrows = 1, #endif + .row_meta_size = 0, }, [4] = { // GGML_TYPE_Q4_2 .type_name = "DEPRECATED", @@ -745,6 +754,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = NULL, .vec_dot_type = GGML_TYPE_COUNT, .nrows = 1, + .row_meta_size = 0, }, [5] = { // GGML_TYPE_Q4_3 .type_name = "DEPRECATED", @@ -757,6 +767,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = NULL, .vec_dot_type = GGML_TYPE_COUNT, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q5_0] = { .type_name = "q5_0", @@ -773,6 +784,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot_type = GGML_TYPE_Q8_0, #endif .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q5_1] = { .type_name = "q5_1", @@ -785,6 +797,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q5_1_q8_1, .vec_dot_type = GGML_TYPE_Q8_1, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q8_0] = { .type_name = "q8_0", @@ -806,6 +819,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { #else .nrows = 1, #endif + .row_meta_size = 0, }, [GGML_TYPE_Q8_1] = { .type_name = "q8_1", @@ -816,6 +830,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_ref = (ggml_from_float_t) quantize_row_q8_1_ref, .vec_dot_type = GGML_TYPE_Q8_1, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q2_K] = { .type_name = "q2_K", @@ -828,6 +843,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q2_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q3_K] = { .type_name = "q3_K", @@ -840,6 +856,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q3_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q4_K] = { .type_name = "q4_K", @@ -852,6 +869,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q4_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q5_K] = { .type_name = "q5_K", @@ -864,6 +882,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q5_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q6_K] = { .type_name = "q6_K", @@ -876,6 +895,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_q6_K_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_XXS] = { .type_name = "iq2_xxs", @@ -888,6 +908,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq2_xxs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_XS] = { .type_name = "iq2_xs", @@ -900,6 +921,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq2_xs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ3_XXS] = { .type_name = "iq3_xxs", @@ -912,6 +934,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq3_xxs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ3_S] = { .type_name = "iq3_s", @@ -924,6 +947,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq3_s_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_S] = { .type_name = "iq2_s", @@ -936,6 +960,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq2_s_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ1_S] = { .type_name = "iq1_s", @@ -948,6 +973,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq1_s_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ1_M] = { .type_name = "iq1_m", @@ -960,6 +986,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq1_m_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ1_BN] = { .type_name = "iq1_bn", @@ -972,6 +999,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq1_bn_q8_K64, .vec_dot_type = GGML_TYPE_Q8_K64, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_BN] = { .type_name = "iq2_bn", @@ -984,6 +1012,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq2_bn_q8_K64, .vec_dot_type = GGML_TYPE_Q8_K64, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_TN] = { .type_name = "iq2_tn", @@ -996,6 +1025,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq2_tn_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 4, }, [GGML_TYPE_IQ1_TN] = { .type_name = "iq1_tn", @@ -1008,6 +1038,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq1_tn_q8_k, .vec_dot_type = GGML_TYPE_Q8_K64, .nrows = 1, + .row_meta_size = 2, }, [GGML_TYPE_IQ4_NL] = { .type_name = "iq4_nl", @@ -1020,6 +1051,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq4_nl_q8_0, .vec_dot_type = GGML_TYPE_Q8_0, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ4_XS] = { .type_name = "iq4_xs", @@ -1032,6 +1064,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = ggml_vec_dot_iq4_xs_q8_K, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q8_K] = { .type_name = "q8_K", @@ -1039,6 +1072,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .type_size = sizeof(block_q8_K), .is_quantized = true, .from_float = quantize_row_q8_K, + .row_meta_size = 0, }, [GGML_TYPE_Q8_K64] = { .type_name = "q8_K64", @@ -1046,6 +1080,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .type_size = sizeof(block_q8_K64), .is_quantized = true, .from_float = quantize_row_q8_K64, + .row_meta_size = 0, }, [GGML_TYPE_BF16] = { .type_name = "bf16", @@ -1058,6 +1093,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_bf16, .vec_dot_type = GGML_TYPE_BF16, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_Q4_0_4_4] = { .type_name = "q4_0_4x4", @@ -1074,6 +1110,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .ncols = 4, .gemv = ggml_gemv_q4_0_4x4_q8_0, .gemm = ggml_gemm_q4_0_4x4_q8_0, + .row_meta_size = 0, }, [GGML_TYPE_Q4_0_4_8] = { .type_name = "q4_0_4x8", @@ -1090,6 +1127,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .ncols = 4, .gemv = ggml_gemv_q4_0_4x8_q8_0, .gemm = ggml_gemm_q4_0_4x8_q8_0, + .row_meta_size = 0, }, [GGML_TYPE_Q4_0_8_8] = { .type_name = "q4_0_8x8", @@ -1106,6 +1144,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .ncols = 8, .gemv = ggml_gemv_q4_0_8x8_q8_0, .gemm = ggml_gemm_q4_0_8x8_q8_0, + .row_meta_size = 0, }, [GGML_TYPE_IQ2_K] = { .type_name = "iq2_k", @@ -1118,6 +1157,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq2_k_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ3_K] = { .type_name = "iq3_k", @@ -1130,6 +1170,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq3_k_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ4_K] = { .type_name = "iq4_k", @@ -1142,6 +1183,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq4_k_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ5_K] = { .type_name = "iq5_k", @@ -1154,6 +1196,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq5_k_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, [GGML_TYPE_IQ6_K] = { .type_name = "iq6_k", @@ -1166,6 +1209,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .vec_dot = vec_dot_iq6_k_q8_k, .vec_dot_type = GGML_TYPE_Q8_K, .nrows = 1, + .row_meta_size = 0, }, }; @@ -3585,6 +3629,10 @@ GGML_CALL int64_t ggml_nrows(const struct ggml_tensor * tensor) { return tensor->ne[1]*tensor->ne[2]*tensor->ne[3]; } +GGML_CALL int64_t ggml_blck_size(enum ggml_type type) { + return type_traits[type].blck_size; +} + GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) { size_t nbytes; size_t blck_size = ggml_blck_size(tensor->type); @@ -3595,7 +3643,7 @@ GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) { } } else { - nbytes = tensor->ne[0]*tensor->nb[0]/blck_size; + nbytes = tensor->nb[1]; //tensor->ne[0]*tensor->nb[0]/blck_size; for (int i = 1; i < GGML_MAX_DIMS; ++i) { nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; } @@ -3608,17 +3656,13 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN); } -GGML_CALL int64_t ggml_blck_size(enum ggml_type type) { - return type_traits[type].blck_size; -} - GGML_CALL size_t ggml_type_size(enum ggml_type type) { return type_traits[type].type_size; } GGML_CALL size_t ggml_row_size(enum ggml_type type, int64_t ne) { assert(ne % ggml_blck_size(type) == 0); - return ggml_type_size(type)*ne/ggml_blck_size(type); + return type_traits[type].row_meta_size + ggml_type_size(type)*ne/ggml_blck_size(type); } double ggml_type_sizef(enum ggml_type type) { @@ -3764,7 +3808,7 @@ static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) { if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) { return false; } - next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type); + next_nb = ggml_row_size(tensor->type, tensor->ne[0]); //next_nb*tensor->ne[0]/ggml_blck_size(tensor->type) + type_traits[tensor->type].row_meta_size; for (int i = 1; i < GGML_MAX_DIMS; i++) { if (tensor->ne[i] != 1) { if (i > n) { @@ -4227,7 +4271,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( } result->nb[0] = ggml_type_size(type); - result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type)); + result->nb[1] = ggml_row_size(type, ne[0]); for (int i = 2; i < GGML_MAX_DIMS; i++) { result->nb[i] = result->nb[i - 1]*result->ne[i - 1]; } @@ -13023,8 +13067,8 @@ static void ggml_compute_forward_mul_mat( for (int64_t i12 = 0; i12 < ne12; i12++) { if (counter++ % nth == ith) { if (!iqk_mul_mat(ne01, ne11, ne00, - src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01/ggml_type_size(src0->type), - src1->type, (const char *)src1->data + i12*nb12 + i13*nb13, nb11/ggml_type_size(src1->type), + src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01, ///ggml_type_size(src0->type), + src1->type, (const char *)src1->data + i12*nb12 + i13*nb13, nb11, ///ggml_type_size(src1->type), (float *)((char *)dst->data + i12*nb2 + i13*nb3), nb1/ggml_type_size(dst->type), 0, 1)) goto IQK_MulMat_Not_Available1; } @@ -13036,8 +13080,8 @@ static void ggml_compute_forward_mul_mat( for (int64_t i13 = 0; i13 < ne13; i13++) for (int64_t i12 = 0; i12 < ne12; i12++) if (!iqk_mul_mat(ne01, ne11, ne00, - src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01/ggml_type_size(src0->type), - src1->type, (const char *)src1->data + i12*nb12 + i13*nb13, nb11/ggml_type_size(src1->type), + src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01, ///ggml_type_size(src0->type), + src1->type, (const char *)src1->data + i12*nb12 + i13*nb13, nb11, ///ggml_type_size(src1->type), (float *)((char *)dst->data + i12*nb2 + i13*nb3), nb1/ggml_type_size(dst->type), ith, nth)) goto IQK_MulMat_Not_Available1; return; @@ -13123,8 +13167,8 @@ UseGgmlGemm1:; for (int64_t i13 = 0; i13 < ne13; i13++) for (int64_t i12 = 0; i12 < ne12; i12++) if (!iqk_mul_mat(ne01, ne11, ne00, - src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01/ggml_type_size(src0->type), - vec_dot_type, (const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size, row_size/ggml_type_size(vec_dot_type), + src0->type, (const char *)src0->data + i12/r2*nb02 + i13/r3*nb03, nb01, ///ggml_type_size(src0->type), + vec_dot_type, (const char *)wdata + (i12*ne11 + i13*ne12*ne11)*row_size, row_size, ///ggml_type_size(vec_dot_type), (float *)((char *)dst->data + i12*nb2 + i13*nb3), nb1/ggml_type_size(dst->type), ith, nth)) goto IQK_MulMat_Not_Available2; return; @@ -13353,8 +13397,8 @@ static void ggml_compute_forward_mul_mat_id( #if GGML_USE_IQK_MULMAT if (ne13 == 1 && dst->type == GGML_TYPE_F32) { if (!iqk_mul_mat_moe(nr0, nr1, ne00, ne11, - src0->type, (const char *)src0_cur, nb01/ggml_type_size(src0->type), - vec_dot_type, (const char *)wdata, row_size/ggml_type_size(vec_dot_type), + src0->type, (const char *)src0_cur, nb01, ///ggml_type_size(src0->type), + vec_dot_type, (const char *)wdata, row_size, ///ggml_type_size(vec_dot_type), (float *)dst->data, nb1, nb2, matrix_rows + cur_a*ne12, ith, nth)) goto IQK_MulMat_Not_Available; continue; @@ -13870,7 +13914,7 @@ static void ggml_compute_forward_softcap( default: { GGML_ASSERT(false); - } break; + } } } @@ -13986,7 +14030,7 @@ static void ggml_compute_forward_softcap_max( default: { GGML_ASSERT(false); - } break; + } } } @@ -18652,11 +18696,11 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor case GGML_OP_SOFTCAP: { GGML_ASSERT(false); // TODO: not implemented - } break; + } case GGML_OP_SOFT_CAP_MAX: { GGML_ASSERT(false); // TODO: not implemented - } break; + } case GGML_OP_SET: { const size_t nb1 = ((int32_t *) tensor->op_params)[0]; |