update HIP_UMA #7399 (#7414)

* update HIP_UMA #7399 add use of hipMemAdviseSetCoarseGrain when LLAMA_HIP_UMA is enable. - get x2 on prompte eval and x1.5 on token gen with rocm6.0 on ryzen 7940HX iGPU (780M/gfx1103) * simplify code, more consistent style --------- Co-authored-by: slaren <slarengh@gmail.com>
author: Djip007 <djip.perois@free.fr> 2024-05-28 01:40:47 +0200
committer: GitHub <noreply@github.com> 2024-05-28 01:40:47 +0200
commit: 852aafb163d32d5bad63c10bc323a02c28fec59d (patch)
tree: 48c9bf7791952889c83da7ee3085818d7a61fa25 /ggml-cuda
parent: 0136966dafb452601c23f30395878d5a65ddc559 (diff)
1 files changed, 0 insertions, 5 deletions
diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh
index 8f6fd71c..22872ca5 100644
--- a/ggml-cuda/common.cuh
+++ b/ggml-cuda/common.cuh
@@ -79,13 +79,8 @@
 #define cudaHostRegisterReadOnly hipHostRegisterReadOnly
 #define cudaHostUnregister hipHostUnregister
 #define cudaLaunchHostFunc hipLaunchHostFunc
-#ifdef GGML_HIP_UMA
-#define cudaMalloc hipMallocManaged
-#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
-#else
 #define cudaMalloc hipMalloc
 #define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size, hipHostMallocDefault)
-#endif
 #define cudaMemcpy hipMemcpy
 #define cudaMemcpyAsync hipMemcpyAsync
 #define cudaMemcpyPeerAsync hipMemcpyPeerAsync
author	Djip007 <djip.perois@free.fr>	2024-05-28 01:40:47 +0200
committer	GitHub <noreply@github.com>	2024-05-28 01:40:47 +0200
commit	852aafb163d32d5bad63c10bc323a02c28fec59d (patch)
tree	48c9bf7791952889c83da7ee3085818d7a61fa25 /ggml-cuda
parent	0136966dafb452601c23f30395878d5a65ddc559 (diff)