summaryrefslogtreecommitdiff
path: root/main.cpp
diff options
context:
space:
mode:
authoruint256_t <konndennsa@gmail.com>2023-03-14 01:33:43 +0900
committerGitHub <noreply@github.com>2023-03-13 18:33:43 +0200
commit63fd76fbb06f9b723ca11505352387a3148b1814 (patch)
tree10e4d2c002837c522339ecc2a6742b5ea9bcd0a3 /main.cpp
parent2a20f48efad692a8c2744f10c673bbdbe0c751b7 (diff)
Reduce model loading time (#43)
* Use buffering * Use vector * Minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'main.cpp')
-rw-r--r--main.cpp4
1 files changed, 4 insertions, 0 deletions
diff --git a/main.cpp b/main.cpp
index d068761e..ee0952f7 100644
--- a/main.cpp
+++ b/main.cpp
@@ -87,7 +87,10 @@ struct llama_model {
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
+ std::vector<char> f_buf(1024*1024);
+
auto fin = std::ifstream(fname, std::ios::binary);
+ fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
if (!fin) {
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
return false;
@@ -325,6 +328,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
fin = std::ifstream(fname_part, std::ios::binary);
+ fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
fin.seekg(file_offset);
// load weights