diff options
author | Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> | 2024-04-09 09:16:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-09 11:16:13 +0300 |
commit | 5dc9dd7152dedc6046b646855585bd070c91e8c8 (patch) | |
tree | d2bae3652d91cdd9327e28fa85d167a67e050c53 /ggml.c | |
parent | e11a8999b5690f810c2c99c14347f0834e68c524 (diff) |
llama : add Command R Plus support (#6491)
* Add Command R Plus GGUF
* Add Command R Plus GGUF
* Loading works up to LayerNorm2D
* Export new tensors in 1D so they are not quantized.
* Fix embedding layer based on Noeda's example
* Whitespace
* Add line
* Fix unexpected tokens on MPS. Re-add F16 fix. ((Noeda)
* dranger003: Fix block index overflow in CUDA dequantizing.
* Reverted blocked multiplication code as it still has issues and could affect other Llama arches
* export norms as f32
* fix overflow issues during quant and other cleanup
* Type convention
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* dranger003: Fix more int overflow during quant.
---------
Co-authored-by: S <seast@Ss-Mac-Studio.local>
Co-authored-by: S <s@example.com>
Co-authored-by: slaren <slarengh@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml.c')
-rw-r--r-- | ggml.c | 16 |
1 files changed, 8 insertions, 8 deletions
@@ -338,14 +338,14 @@ ggml_fp16_t ggml_fp32_to_fp16(float x) { return GGML_FP32_TO_FP16(x); } -void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int n) { - for (int i = 0; i < n; i++) { +void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) { + for (int64_t i = 0; i < n; i++) { y[i] = GGML_FP16_TO_FP32(x[i]); } } -void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int n) { - int i = 0; +void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) { + int64_t i = 0; #if defined(__F16C__) for (; i + 7 < n; i += 8) { __m256 x_vec = _mm256_loadu_ps(x + i); @@ -20331,11 +20331,11 @@ size_t ggml_quantize_chunk( enum ggml_type type, const float * src, void * dst, - int start, - int nrows, - int n_per_row, + int64_t start, + int64_t nrows, + int64_t n_per_row, const float * imatrix) { - const int n = nrows * n_per_row; + const int64_t n = (int64_t) nrows * n_per_row; if (ggml_quantize_requires_imatrix(type)) { GGML_ASSERT(imatrix != NULL); |