summaryrefslogtreecommitdiff
path: root/common/common.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'common/common.cpp')
-rw-r--r--common/common.cpp43
1 files changed, 37 insertions, 6 deletions
diff --git a/common/common.cpp b/common/common.cpp
index 7c3e1187..8764a7be 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -78,7 +78,7 @@ int32_t get_num_physical_cores() {
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
}
-static void process_escapes(std::string& input) {
+void process_escapes(std::string& input) {
std::size_t input_len = input.length();
std::size_t output_idx = 0;
@@ -352,7 +352,19 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
- params.lora_adapter = argv[i];
+ params.lora_adapter.push_back({argv[i], 1.0f});
+ params.use_mmap = false;
+ } else if (arg == "--lora-scaled") {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+ const char * lora_adapter = argv[i];
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+ params.lora_adapter.push_back({lora_adapter, std::stof(argv[i])});
params.use_mmap = false;
} else if (arg == "--lora-base") {
if (++i >= argc) {
@@ -703,6 +715,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" --verbose-prompt print prompt before generation\n");
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n");
printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
+ printf(" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n");
printf(" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
printf(" -m FNAME, --model FNAME\n");
printf(" model path (default: %s)\n", params.model.c_str());
@@ -776,10 +789,15 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
return std::make_tuple(nullptr, nullptr);
}
- if (!params.lora_adapter.empty()) {
+ for (unsigned int i = 0; i < params.lora_adapter.size(); ++i) {
+ const std::string& lora_adapter = std::get<0>(params.lora_adapter[i]);
+ float lora_scale = std::get<1>(params.lora_adapter[i]);
int err = llama_model_apply_lora_from_file(model,
- params.lora_adapter.c_str(),
- params.lora_base.empty() ? NULL : params.lora_base.c_str(),
+ lora_adapter.c_str(),
+ lora_scale,
+ ((i > 0) || params.lora_base.empty())
+ ? NULL
+ : params.lora_base.c_str(),
params.n_threads);
if (err != 0) {
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
@@ -1225,7 +1243,20 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, " %d: %f", lb.first, lb.second);
}
- fprintf(stream, "lora: %s\n", params.lora_adapter.c_str());
+ fprintf(stream, "lora:\n");
+ for (std::tuple<std::string, float> la : params.lora_adapter) {
+ if (std::get<1>(la) != 1.0f) {
+ continue;
+ }
+ fprintf(stream, " - %s\n", std::get<0>(la).c_str());
+ }
+ fprintf(stream, "lora_scaled:\n");
+ for (std::tuple<std::string, float> la : params.lora_adapter) {
+ if (std::get<1>(la) == 1.0f) {
+ continue;
+ }
+ fprintf(stream, " - %s: %f\n", std::get<0>(la).c_str(), std::get<1>(la));
+ }
fprintf(stream, "lora_base: %s\n", params.lora_base.c_str());
fprintf(stream, "low_vram: %s # default: false\n", params.low_vram ? "true" : "false");
fprintf(stream, "main_gpu: %d # default: 0\n", params.main_gpu);