summaryrefslogtreecommitdiff
path: root/ggml-cuda.cu
diff options
context:
space:
mode:
authorhydai <z54981220@gmail.com>2023-12-30 00:31:19 +0800
committerGitHub <noreply@github.com>2023-12-29 17:31:19 +0100
commit91bb39cec7e4dfb9e2293509ef60298a67f0b1b7 (patch)
tree9a0085ff74960585798ca43e70be5e65a14bf8c8 /ggml-cuda.cu
parent04ac0607e913ab91234dfb240e12a76509e30982 (diff)
cuda: fix vmm oom issue on NVIDIA AGX Orin (#4687)
Signed-off-by: hydai <hydai@secondstate.io>
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r--ggml-cuda.cu2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 9a9effcf..09585b07 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6662,7 +6662,7 @@ static void ggml_cuda_pool_free_leg(int device, void * ptr, size_t size) {
// pool with virtual memory
static CUdeviceptr g_cuda_pool_addr[GGML_CUDA_MAX_DEVICES] = {0};
static size_t g_cuda_pool_used[GGML_CUDA_MAX_DEVICES] = {0};
-static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
+static const size_t CUDA_POOL_VMM_MAX_SIZE = 1ull << 35; // 32 GB
static void * ggml_cuda_pool_malloc_vmm(int device, size_t size, size_t * actual_size) {
scoped_spin_lock lock(g_cuda_pool_lock);