summaryrefslogtreecommitdiff
path: root/ggml-cuda.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r--ggml-cuda.cu6
1 files changed, 4 insertions, 2 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 64d3b674..593fa4cd 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -188,13 +188,15 @@ static ggml_cuda_device_info ggml_cuda_init() {
info.default_tensor_split[id] = total_vram;
total_vram += prop.totalGlobalMem;
+ info.devices[id].nsm = prop.multiProcessorCount;
+ info.devices[id].smpb = prop.sharedMemPerBlock;
#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
+ info.devices[id].smpbo = prop.sharedMemPerBlock;
info.devices[id].cc = 100*prop.major + 10*prop.minor + CC_OFFSET_AMD;
#else
+ info.devices[id].smpbo = prop.sharedMemPerBlockOptin;
info.devices[id].cc = 100*prop.major + 10*prop.minor;
#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
- info.devices[id].smpb = prop.sharedMemPerBlock;
- info.devices[id].nsm = prop.multiProcessorCount;
}
for (int id = 0; id < info.device_count; ++id) {