diff options
Diffstat (limited to 'ggml-cuda/common.cuh')
-rw-r--r-- | ggml-cuda/common.cuh | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index 7f4764d6..de7c2e43 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -331,6 +331,10 @@ static __device__ __forceinline__ half2 __shfl_xor(half2 var, int laneMask, int #define FP16_AVAILABLE #endif // (defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) || __CUDA_ARCH__ >= CC_PASCAL +#if defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610 +#define FAST_FP16_AVAILABLE +#endif // defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610 + #if !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_VOLTA #define FP16_MMA_AVAILABLE #endif // !(defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= CC_VOLTA @@ -661,6 +665,7 @@ struct ggml_cuda_device_info { int cc; // compute capability int nsm; // number of streaming multiprocessors size_t smpb; // max. shared memory per block + size_t smpbo; // max. shared memory per block (with opt-in) bool vmm; // virtual memory support size_t vmm_granularity; // granularity of virtual memory size_t total_vram; |