From 73bdcb395ef9a997d9c02950c7cd4249546162cd Mon Sep 17 00:00:00 2001 From: Andrew Godfrey Date: Wed, 1 Nov 2023 04:49:04 -0700 Subject: finetune : add -ngl parameter (#3762) * Add '-ngl' support to finetune.cpp * Add fprintf in ggml_cuda_op_add When I tried CUDA offloading during finetuning following the readme, I got an assert here. This probably isn't an important case because inference later gives a warning saying you should use f16 or f32 instead when using lora * Add 'finetune.sh', which currently fails when using GPU "error: operator (): Finetuning on tensors with type 'f16' is not yet supported" * tweak finetune.sh * Suppress some warnings in ggml.c * Add f16 implementation to ggml_compute_forward_add_f16_f32 * Add an f16 case to ggml_add_cast_impl and llama_build_lora_finetune_graphs * finetune.sh: Edit comments * Add "add_f16_f32_f32_cuda" * Tweak an error message * finetune.sh: Add an optional LLAMA_MODEL_DIR variable * finetune.sh: Add an optional LLAMA_TRAINING_DIR variable * train : minor * tabs to spaces --------- Co-authored-by: Georgi Gerganov Co-authored-by: cebtenzzre --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index ead1d421..42cedc7a 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8003,7 +8003,7 @@ static int llama_apply_lora_from_file_internal( if (dest_t->backend == GGML_BACKEND_GPU || dest_t->backend == GGML_BACKEND_GPU_SPLIT) { if (dest_t->type != GGML_TYPE_F16) { throw std::runtime_error(format( - "%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models", __func__)); + "%s: error: the simultaneous use of LoRAs and GPU acceleration is only supported for f16 models. dest_t->type: %d", __func__, dest_t->type)); } offload_func = ggml_cuda_assign_buffers; offload_func_force_inplace = ggml_cuda_assign_buffers_force_inplace; -- cgit v1.2.3