From 514ae086200a8cfd78af6a71b6c6ee14931ddc0e Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Tue, 17 Dec 2024 14:16:34 +0100 Subject: Be able to repack tensors at run time (#147) * Be able to repack tensors at run time * Repack: also add bf16 as repackable type * Repack: make sure number of rows is a multiple of the packing --------- Co-authored-by: Iwan Kawrakow --- common/common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'common/common.h') diff --git a/common/common.h b/common/common.h index 486017ef..73d7d650 100644 --- a/common/common.h +++ b/common/common.h @@ -187,6 +187,7 @@ struct gpt_params { bool no_kv_offload = false; // disable KV offloading bool warmup = true; // warmup run bool check_tensors = false; // validate tensor data + bool repack_tensors = false; // repack tensors if interleaved variant is available std::string cache_type_k = "f16"; // KV cache data type for the K std::string cache_type_v = "f16"; // KV cache data type for the V -- cgit v1.2.3