diff options
author | Herman Semenov <GermanAizek@yandex.ru> | 2024-05-20 07:33:21 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-20 10:33:21 +0300 |
commit | 213e90ed73f8ac3cd3026dc3f086beae0d414f96 (patch) | |
tree | 08fd8c354635baac81319b2d36816b7947351ab5 | |
parent | 65c58207ece92ad213f4bfd0f91dcb2dfb664f5b (diff) |
ggml-opencl, llama: using reserve() if count already known (#7272)
-rw-r--r-- | ggml-opencl.cpp | 7 | ||||
-rw-r--r-- | llama.cpp | 3 |
2 files changed, 8 insertions, 2 deletions
diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp index 880a1495..922f2483 100644 --- a/ggml-opencl.cpp +++ b/ggml-opencl.cpp @@ -1,4 +1,4 @@ -#include "ggml.h" +#include "ggml.h" #include "ggml-opencl.h" #include "ggml-backend-impl.h" @@ -1835,7 +1835,10 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor * CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, &offset, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL)); } - for (int64_t i12 = i02 * r2, e12 = i12 + r2; i12 < e12; i12++) { + int64_t i12 = i02 * r2; + int64_t e12 = i12 + r2; + events.reserve(e12 - i12); + for (; i12 < e12; i12++) { if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel // copy src1 to device events.emplace_back(); @@ -16162,6 +16162,7 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const } // make tensors + cvec.tensors.reserve(model.hparams.n_layer); cvec.tensors.push_back(nullptr); // there's never a tensor for layer 0 for (size_t il = 1; il < model.hparams.n_layer; il++) { struct ggml_context * ctx = ctx_map.at(model.buft_layer[il].buft); @@ -16170,6 +16171,8 @@ static bool llama_control_vector_init(struct llama_control_vector & cvec, const } // allocate tensors / buffers and zero + cvec.ctxs.reserve(ctx_map.size()); + cvec.bufs.reserve(ctx_map.size()); for (auto it : ctx_map) { ggml_backend_buffer_type_t buft = it.first; ggml_context * ctx = it.second; |