finetune : add -ngl parameter (#3762)

* Add '-ngl' support to finetune.cpp * Add fprintf in ggml_cuda_op_add When I tried CUDA offloading during finetuning following the readme, I got an assert here. This probably isn't an important case because inference later gives a warning saying you should use f16 or f32 instead when using lora * Add 'finetune.sh', which currently fails when using GPU "error: operator (): Finetuning on tensors with type 'f16' is not yet supported" * tweak finetune.sh * Suppress some warnings in ggml.c * Add f16 implementation to ggml_compute_forward_add_f16_f32 * Add an f16 case to ggml_add_cast_impl and llama_build_lora_finetune_graphs * finetune.sh: Edit comments * Add "add_f16_f32_f32_cuda" * Tweak an error message * finetune.sh: Add an optional LLAMA_MODEL_DIR variable * finetune.sh: Add an optional LLAMA_TRAINING_DIR variable * train : minor * tabs to spaces --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
author: Andrew Godfrey <AndrewGodfrey@users.noreply.github.com> 2023-11-01 04:49:04 -0700
committer: GitHub <noreply@github.com> 2023-11-01 13:49:04 +0200
commit: 73bdcb395ef9a997d9c02950c7cd4249546162cd (patch)
tree: 9cace5e626d13541dda1798fbee2d74b57874952 /common
parent: f0e209324a7f663225791897877bf610f1af152d (diff)
2 files changed, 3 insertions, 0 deletions
diff --git a/common/train.cpp b/common/train.cpp
index 3cce5da2..bc15b7a0 100644
--- a/common/train.cpp
+++ b/common/train.cpp
@@ -1045,6 +1045,7 @@ struct train_params_common get_default_train_params_common() {
     params.n_batch    =    8;
     params.n_gradient_accumulation = 1;
     params.n_epochs   = -1;
+    params.n_gpu_layers = 0;
 
     params.custom_n_ctx = false;
 
@@ -1080,6 +1081,7 @@ struct train_params_common get_default_train_params_common() {
     params.adam_beta2          = 0.999f;
     params.adam_gclip          = 1.0f;
     params.adam_eps_f          = 0.0f;
+
     return params;
 }
 
diff --git a/common/train.h b/common/train.h
index 42fa704b..d86c93cc 100644
--- a/common/train.h
+++ b/common/train.h
@@ -44,6 +44,7 @@ struct train_params_common {
     int n_batch;
     int n_gradient_accumulation;
     int n_epochs;
+    int n_gpu_layers;
 
     bool custom_n_ctx;
author	Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>	2023-11-01 04:49:04 -0700
committer	GitHub <noreply@github.com>	2023-11-01 13:49:04 +0200
commit	73bdcb395ef9a997d9c02950c7cd4249546162cd (patch)
tree	9cace5e626d13541dda1798fbee2d74b57874952 /common
parent	f0e209324a7f663225791897877bf610f1af152d (diff)