summaryrefslogtreecommitdiff
path: root/ggml-backend.c
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2023-12-24 14:34:22 +0100
committerGitHub <noreply@github.com>2023-12-24 14:34:22 +0100
commit5bf3953d7e9831ea22b0bc017ce97409b801ccf1 (patch)
tree48c0136d9943fb9cca22209894464970549c24b5 /ggml-backend.c
parent708e179e8562c2604240df95a2241dea17fd808b (diff)
cuda : improve cuda pool efficiency using virtual memory (#4606)
* cuda : improve cuda pool efficiency using virtual memory * fix mixtral * fix cmake build * check for vmm support, disable for hip ggml-ci * fix hip build * clarify granularity * move all caps to g_device_caps * refactor error checking * add cuda_pool_alloc, refactor most pool allocations ggml-ci * fix hip build * CUBLAS_TF32_TENSOR_OP_MATH is not a macro * more hip crap * llama : fix msvc warnings * ggml : fix msvc warnings * minor * minor * cuda : fallback to CPU on host buffer alloc fail * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * Update ggml-cuda.cu Co-authored-by: Johannes Gäßler <johannesg@5d6.de> * ensure allocations are always aligned * act_size -> actual_size --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Diffstat (limited to 'ggml-backend.c')
-rw-r--r--ggml-backend.c16
1 files changed, 6 insertions, 10 deletions
diff --git a/ggml-backend.c b/ggml-backend.c
index 0c8c9ec4..526ce732 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -297,7 +297,7 @@ static void ggml_backend_registry_init(void) {
void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
- int id = ggml_backend_registry_count;
+ size_t id = ggml_backend_registry_count;
ggml_backend_registry[id] = (struct ggml_backend_reg) {
/* .name = */ {0},
@@ -330,6 +330,8 @@ size_t ggml_backend_reg_find_by_name(const char * name) {
return i;
}
}
+
+ // not found
return SIZE_MAX;
}
@@ -340,15 +342,15 @@ ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str)
const char * params = strchr(backend_str, ':');
char backend_name[128];
if (params == NULL) {
- strcpy(backend_name, backend_str);
+ snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
params = "";
} else {
- strncpy(backend_name, backend_str, params - backend_str);
- backend_name[params - backend_str] = '\0';
+ snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
params++;
}
size_t backend_i = ggml_backend_reg_find_by_name(backend_name);
+
if (backend_i == SIZE_MAX) {
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
return NULL;
@@ -396,18 +398,12 @@ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
}
static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
- GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
- GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
-
memcpy((char *)tensor->data + offset, data, size);
GGML_UNUSED(buffer);
}
static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
- GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
- GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
-
memcpy(data, (const char *)tensor->data + offset, size);
GGML_UNUSED(buffer);