summaryrefslogtreecommitdiff
path: root/ggml-cuda.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r--ggml-cuda.cu17
1 files changed, 16 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 2d9ffffb..f87f1880 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -5790,6 +5790,11 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
CUDA_CHECK(cudaFree(ptr));
}
+static bool g_cublas_loaded = false;
+
+bool ggml_cublas_loaded(void) {
+ return g_cublas_loaded;
+}
void ggml_init_cublas() {
static bool initialized = false;
@@ -5803,7 +5808,12 @@ void ggml_init_cublas() {
CUDA_CHECK(cudaDeviceSynchronize());
#endif
- CUDA_CHECK(cudaGetDeviceCount(&g_device_count));
+ if (cudaGetDeviceCount(&g_device_count) != cudaSuccess) {
+ initialized = true;
+ g_cublas_loaded = false;
+ return;
+ }
+
GGML_ASSERT(g_device_count <= GGML_CUDA_MAX_DEVICES);
int64_t total_vram = 0;
#if defined(GGML_CUDA_FORCE_MMQ)
@@ -5851,6 +5861,7 @@ void ggml_init_cublas() {
// CUBLAS_CHECK(cublasLoggerConfigure(1, 1, 0, nullptr));
initialized = true;
+ g_cublas_loaded = true;
}
}
@@ -7158,6 +7169,8 @@ static void ggml_cuda_rms_norm(const ggml_tensor * src0, const ggml_tensor * src
}
bool ggml_cuda_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
+ if (!g_cublas_loaded) return false;
+
const int64_t ne10 = src1->ne[0];
const int64_t ne0 = dst->ne[0];
@@ -7843,6 +7856,8 @@ void ggml_cuda_free_scratch() {
}
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
+ if (!g_cublas_loaded) return false;
+
ggml_cuda_func_t func;
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))