1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
//
// Copyright (C) 2024 Iwan Kawrakow
// MIT license
// SPDX-License-Identifier: MIT
//
#pragma once
#include <stdint.h>
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
bool iqk_mul_mat(long Nx, long Ny, long ne00,
int typeA, const void * A, long strideA,
int typeB, const void * B, long strideB,
float * C, long stride_C, int ith, int nth);
bool iqk_mul_mat_4d(long Nx, long Ny, long ne00,
long ne02, long ne03, long ne12, long ne13,
long nb02, long nb03, long nb12, long nb13, long nb2, long nb3,
int typeA, const void * A, long strideA,
int typeB, const void * B, long strideB,
float * C, long stride_C, int ith, int nth);
bool iqk_mul_mat_moe(long Nx, long Ny, long ne00, int ne11,
int typeA, const void * A, long strideA,
int typeB, const void * B, long strideB,
float * C, long nb1, long nb2, const void * vrow_mapping, int ith, int nth);
bool iqk_flash_attn_noalibi(int type_k, // type of k
int type_v, // type of v
int Dk, // K head size
int Dv, // V head size
int nq, // number of columns in q
int nk, // number of rows in k
int stride_q, // distance between q columns in bytes
int stride_k, // distance between k rows in bytes
int stride_v, // distance between v rows in bytes
int stride_m, // distance between mask rows (in bytes
int stride_qkv, // distance between rows in mask (in bytes)
const float * q, // q matrix.
const void * k, // k matrix. Assumed to be fp16, nq x nk elements
const void * v, // v matrix. Assumed to be fp16, nq x nk elements
const void * mask, // mask. If not null, assumed to be fp16. nq x nk elements
float scale, // scale applied before softmax
float softcap, // if > 0, a "soft-cap" operation is applied before softmax
float * qkv); // v*softmax(scale*(k*q))
#ifdef __cplusplus
}
#endif
|