summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCebtenzzre <cebtenzzre@gmail.com>2023-09-28 17:41:44 -0400
committerGitHub <noreply@github.com>2023-09-28 17:41:44 -0400
commitbc39553c901a91cfcb757863586250838c83eeab (patch)
tree507d1aedf8ad63e4ed84e37154de9abf31ba358a
parent0ccfc62a96a6b59a8faa14d1b350493f4cd51ae2 (diff)
build : enable more non-default compiler warnings (#3200)
-rw-r--r--.gitignore1
-rw-r--r--CMakeLists.txt51
-rw-r--r--Makefile73
-rw-r--r--common/common.cpp3
-rw-r--r--common/log.h74
-rw-r--r--examples/baby-llama/baby-llama.cpp13
-rw-r--r--examples/llama-bench/llama-bench.cpp4
-rw-r--r--examples/main/main.cpp2
-rw-r--r--examples/quantize/quantize.cpp1
-rw-r--r--examples/train-text-from-scratch/train-text-from-scratch.cpp6
-rw-r--r--ggml.c288
-rw-r--r--ggml.h8
-rw-r--r--llama.cpp14
-rw-r--r--pocs/vdot/q8dot.cpp8
-rw-r--r--tests/test-grad0.cpp6
-rw-r--r--tests/test-opt.cpp4
16 files changed, 287 insertions, 269 deletions
diff --git a/.gitignore b/.gitignore
index 8ba3b9f4..f98132a2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ models-mnt
/main
/metal
/perplexity
+/q8dot
/quantize
/quantize-stats
/result
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4a649a9..d5acf854 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -414,37 +414,38 @@ endif()
if (LLAMA_ALL_WARNINGS)
if (NOT MSVC)
- set(c_flags
- -Wall
- -Wextra
- -Wpedantic
- -Wcast-qual
- -Wdouble-promotion
- -Wshadow
- -Wstrict-prototypes
- -Wpointer-arith
- -Wmissing-prototypes
- -Werror=implicit-int
- -Wno-unused-function
- )
- set(cxx_flags
- -Wall
- -Wextra
- -Wpedantic
- -Wcast-qual
- -Wmissing-declarations
- -Wno-unused-function
- -Wno-multichar
- )
- if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- # g++ only
- set(cxx_flags ${cxx_flags} -Wno-format-truncation -Wno-array-bounds)
+ set(warning_flags -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
+ set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
+ -Werror=implicit-function-declaration)
+ set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
+
+ if (CMAKE_C_COMPILER_ID MATCHES "Clang")
+ set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
+ set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
+
+ if (
+ (CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
+ (CMAKE_C_COMPILER_ID STREQUAL "AppleClang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 7.3.0)
+ )
+ set(c_flags ${c_flags} -Wdouble-promotion)
+ endif()
+ elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
+ set(c_flags ${c_flags} -Wdouble-promotion)
+ set(cxx_flags ${cxx_flags} -Wno-array-bounds)
+
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
+ set(cxx_flags ${cxx_flags} -Wno-format-truncation)
+ endif()
+ if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
+ set(cxx_flags ${cxx_flags} -Wextra-semi)
+ endif()
endif()
else()
# todo : msvc
endif()
add_compile_options(
+ ${warning_flags}
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
)
diff --git a/Makefile b/Makefile
index 53af3c69..08b83ca7 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
# Define the default target now so that it is always the first target
-BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative parallel finetune export-lora tests/test-c.o
+BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative benchmark-matmult parallel finetune export-lora tests/test-c.o
# Binaries only useful for tests
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
@@ -19,6 +19,20 @@ ifndef UNAME_M
UNAME_M := $(shell uname -m)
endif
+ifeq '' '$(findstring clang,$(shell $(CC) --version))'
+ CC_IS_GCC=1
+ CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+else
+ CC_IS_CLANG=1
+ ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
+ CC_IS_LLVM_CLANG=1
+ else
+ CC_IS_APPLE_CLANG=1
+ endif
+ CC_VER := $(shell $(CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
+ | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
+endif
+
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
ifeq ($(UNAME_S),Darwin)
@@ -87,9 +101,6 @@ CC := riscv64-unknown-linux-gnu-gcc
CXX := riscv64-unknown-linux-gnu-g++
endif
-CCV := $(shell $(CC) --version | head -n 1)
-CXXV := $(shell $(CXX) --version | head -n 1)
-
#
# Compile flags
#
@@ -173,20 +184,33 @@ ifdef LLAMA_DISABLE_LOGS
endif # LLAMA_DISABLE_LOGS
# warnings
-MK_CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
- -Wmissing-prototypes -Werror=implicit-int -Wno-unused-function
-MK_CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wmissing-declarations -Wno-unused-function -Wno-multichar
-
-# TODO(cebtenzzre): remove this once PR #2632 gets merged
-TTFS_CXXFLAGS = $(CXXFLAGS) -Wno-missing-declarations
-
-ifneq '' '$(findstring clang,$(shell $(CXX) --version))'
- # clang++ only
- MK_CXXFLAGS += -Wmissing-prototypes
- TTFS_CXXFLAGS += -Wno-missing-prototypes
+WARN_FLAGS = -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+MK_CFLAGS += $(WARN_FLAGS) -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int \
+ -Werror=implicit-function-declaration
+MK_CXXFLAGS += $(WARN_FLAGS) -Wmissing-declarations -Wmissing-noreturn
+
+ifeq ($(CC_IS_CLANG), 1)
+ # clang options
+ MK_CFLAGS += -Wunreachable-code-break -Wunreachable-code-return
+ MK_HOST_CXXFLAGS += -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
+
+ ifneq '' '$(and $(CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 030800)))'
+ MK_CFLAGS += -Wdouble-promotion
+ endif
+ ifneq '' '$(and $(CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(CC_VER) \>= 070300)))'
+ MK_CFLAGS += -Wdouble-promotion
+ endif
else
- # g++ only
- MK_CXXFLAGS += -Wno-format-truncation -Wno-array-bounds
+ # gcc options
+ MK_CFLAGS += -Wdouble-promotion
+ MK_HOST_CXXFLAGS += -Wno-array-bounds
+
+ ifeq ($(shell expr $(CC_VER) \>= 070100), 1)
+ MK_HOST_CXXFLAGS += -Wno-format-truncation
+ endif
+ ifeq ($(shell expr $(CC_VER) \>= 080100), 1)
+ MK_HOST_CXXFLAGS += -Wextra-semi
+ endif
endif
# OS specific
@@ -382,7 +406,7 @@ ifdef LLAMA_CUDA_CCBIN
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
endif
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
- $(NVCC) $(NVCCFLAGS) -Wno-pedantic -c $< -o $@
+ $(NVCC) $(NVCCFLAGS) -c $< -o $@
endif # LLAMA_CUBLAS
ifdef LLAMA_CLBLAST
@@ -472,8 +496,8 @@ $(info I CFLAGS: $(CFLAGS))
$(info I CXXFLAGS: $(CXXFLAGS))
$(info I NVCCFLAGS: $(NVCCFLAGS))
$(info I LDFLAGS: $(LDFLAGS))
-$(info I CC: $(CCV))
-$(info I CXX: $(CXXV))
+$(info I CC: $(shell $(CC) --version | head -n 1))
+$(info I CXX: $(shell $(CXX) --version | head -n 1))
$(info )
#
@@ -554,7 +578,7 @@ gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
- $(CXX) $(TTFS_CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+ $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
@@ -601,11 +625,18 @@ tests: $(TEST_TARGETS)
benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
+
+run-benchmark-matmult: benchmark-matmult
./$@
+.PHONY: run-benchmark-matmult
+
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
+q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
+ $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
+
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
diff --git a/common/common.cpp b/common/common.cpp
index 6e8c08cb..ec181c6b 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -755,10 +755,9 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
case 7: return "He";
case 8: return "She";
case 9: return "They";
- default: return "To";
}
- return "The";
+ GGML_UNREACHABLE();
}
//
diff --git a/common/log.h b/common/log.h
index 18f3b976..b8953fdc 100644
--- a/common/log.h
+++ b/common/log.h
@@ -225,31 +225,31 @@ enum LogTriState
// USE LOG() INSTEAD
//
#ifndef _MSC_VER
- #define LOG_IMPL(str, ...) \
- { \
+ #define LOG_IMPL(str, ...) \
+ do { \
if (LOG_TARGET != nullptr) \
{ \
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
fflush(LOG_TARGET); \
} \
- }
+ } while (0)
#else
- #define LOG_IMPL(str, ...) \
- { \
+ #define LOG_IMPL(str, ...) \
+ do { \
if (LOG_TARGET != nullptr) \
{ \
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
fflush(LOG_TARGET); \
} \
- }
+ } while (0)
#endif
// INTERNAL, DO NOT USE
// USE LOG_TEE() INSTEAD
//
#ifndef _MSC_VER
- #define LOG_TEE_IMPL(str, ...) \
- { \
+ #define LOG_TEE_IMPL(str, ...) \
+ do { \
if (LOG_TARGET != nullptr) \
{ \
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
@@ -260,10 +260,10 @@ enum LogTriState
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
fflush(LOG_TEE_TARGET); \
} \
- }
+ } while (0)
#else
- #define LOG_TEE_IMPL(str, ...) \
- { \
+ #define LOG_TEE_IMPL(str, ...) \
+ do { \
if (LOG_TARGET != nullptr) \
{ \
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
@@ -274,7 +274,7 @@ enum LogTriState
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
fflush(LOG_TEE_TARGET); \
} \
- }
+ } while (0)
#endif
// The '\0' as a last argument, is a trick to bypass the silly
@@ -435,41 +435,41 @@ inline FILE *log_handler() { return log_handler1_impl(); }
inline void log_test()
{
log_disable();
- LOG("01 Hello World to nobody, because logs are disabled!\n")
+ LOG("01 Hello World to nobody, because logs are disabled!\n");
log_enable();
- LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET))
- LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n")
+ LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
+ LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
log_set_target(stderr);
- LOG("04 Hello World to stderr!\n")
- LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n")
+ LOG("04 Hello World to stderr!\n");
+ LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
log_set_target(LOG_DEFAULT_FILE_NAME);
- LOG("06 Hello World to default log file!\n")
+ LOG("06 Hello World to default log file!\n");
log_set_target(stdout);
- LOG("07 Hello World to stdout!\n")
+ LOG("07 Hello World to stdout!\n");
log_set_target(LOG_DEFAULT_FILE_NAME);
- LOG("08 Hello World to default log file again!\n")
+ LOG("08 Hello World to default log file again!\n");
log_disable();
- LOG("09 Hello World _1_ into the void!\n")
+ LOG("09 Hello World _1_ into the void!\n");
log_enable();
- LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n")
+ LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
log_disable();
log_set_target("llama.anotherlog.log");
- LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n")
+ LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
log_enable();
- LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n")
+ LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
log_set_target("llama.yetanotherlog.log");
- LOG("13 Hello World this time in yet new file?\n")
+ LOG("13 Hello World this time in yet new file?\n");
log_set_target(log_filename_generator("llama_autonamed", "log"));
- LOG("14 Hello World in log with generated filename!\n")
+ LOG("14 Hello World in log with generated filename!\n");
#ifdef _MSC_VER
- LOG_TEE("15 Hello msvc TEE without arguments\n")
- LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test")
- LOG_TEELN("17 Hello msvc TEELN without arguments\n")
- LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test")
- LOG("19 Hello msvc LOG without arguments\n")
- LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test")
- LOGLN("21 Hello msvc LOGLN without arguments\n")
- LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test")
+ LOG_TEE("15 Hello msvc TEE without arguments\n");
+ LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
+ LOG_TEELN("17 Hello msvc TEELN without arguments\n");
+ LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
+ LOG("19 Hello msvc LOG without arguments\n");
+ LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
+ LOGLN("21 Hello msvc LOGLN without arguments\n");
+ LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
#endif
}
@@ -542,7 +542,7 @@ inline void log_dump_cmdline_impl(int argc, char **argv)
buf << " " << argv[i];
}
}
- LOGLN("Cmd:%s", buf.str().c_str())
+ LOGLN("Cmd:%s", buf.str().c_str());
}
#define log_tostr(var) log_var_to_string_impl(var).c_str()
@@ -620,10 +620,10 @@ inline std::string log_var_to_string_impl(const std::vector<int> & var)
#define LOGLN(...) // dummy stub
#undef LOG_TEE
-#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
+#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
#undef LOG_TEELN
-#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__); // convert to normal fprintf
+#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
#undef LOG_DISABLE
#define LOG_DISABLE() // dummy stub
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index fb1a15c4..8155101d 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -1,9 +1,12 @@
#include "ggml.h"
#include "train.h"
+
#include <vector>
#include <cassert>
-#include <random>
+#include <cstdlib>
#include <cstring>
+#include <random>
+#include <vector>
#if defined(_MSC_VER)
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -64,7 +67,7 @@ static struct ggml_tensor * randomize_tensor(
break;
default:
assert(false);
- };
+ }
return tensor;
}
@@ -389,7 +392,7 @@ static void randomize_model_lora(
free_random_normal_distribution(rnd);
}
-static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
+static void init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
const auto & hparams = model->hparams;
const uint32_t n_ctx = hparams.n_ctx;
@@ -415,14 +418,12 @@ static bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * mod
if (!cache->ctx) {
fprintf(stderr, "%s: failed to allocate memory for kv cache\n", __func__);
- return false;
+ exit(1);
}
}
cache->k = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
cache->v = ggml_new_tensor_1d(cache->ctx, GGML_TYPE_F32, n_elements);
-
- return true;
}
static bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 93bb0c8b..a04115c9 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -655,9 +655,9 @@ struct printer {
virtual ~printer() {}
FILE * fout;
- virtual void print_header(const cmd_params & params) { (void) params; };
+ virtual void print_header(const cmd_params & params) { (void) params; }
virtual void print_test(const test & t) = 0;
- virtual void print_footer() { };
+ virtual void print_footer() { }
};
struct csv_printer : public printer {
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index fd506773..3a4ed3f7 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -852,7 +852,7 @@ int main(int argc, char ** argv) {
llama_backend_free();
#ifndef LOG_DISABLE_LOGS
- LOG_TEE("Log end\n")
+ LOG_TEE("Log end\n");
#endif // LOG_DISABLE_LOGS
return 0;
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 1c1d957e..c7dd0d89 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -72,6 +72,7 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp
// usage:
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
//
+[[noreturn]]
static void usage(const char * executable) {
printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index a9cf8a38..5043f32d 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -483,7 +483,7 @@ static struct ggml_tensor * llama_build_train_graphs(
}
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
-{ \
+do { \
const std::string skey(key); \
const int kid = gguf_find_key(ctx, skey.c_str()); \
if (kid >= 0) { \
@@ -495,7 +495,7 @@ static struct ggml_tensor * llama_build_train_graphs(
} else if (req) { \
die_fmt("key not found in model: %s", skey.c_str()); \
} \
-}
+} while (0)
static void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_ggml_ctx, struct my_llama_model * model) {
// NOTE: gguf_context must be initialized with f_ggml_ctx and no_alloc=false, otherwise tensor data can not be read
@@ -786,7 +786,7 @@ struct train_params {
float rope_freq_scale;
};
-struct train_params get_default_train_params() {
+static struct train_params get_default_train_params() {
struct train_params params;
params.common = get_default_train_params_common();
params.fn_vocab_model = "ggml-vic7b-uncensored-q4_0.bin";
diff --git a/ggml.c b/ggml.c
index 078b2c42..820fe2e7 100644
--- a/ggml.c
+++ b/ggml.c
@@ -245,18 +245,18 @@ inline static void * ggml_aligned_malloc(size_t size) {
//
#define GGML_TENSOR_UNARY_OP_LOCALS \
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#define GGML_TENSOR_BINARY_OP_LOCALS \
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
#if defined(GGML_USE_ACCELERATE)
#include <Accelerate/Accelerate.h>
@@ -1866,7 +1866,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
#define GGML_F16x8_ADD vaddq_f16
#define GGML_F16x8_MUL vmulq_f16
#define GGML_F16x8_REDUCE(res, x) \
- { \
+ do { \
int offset = GGML_F16_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \
x[i] = vaddq_f16(x[i], x[offset+i]); \
@@ -1882,7 +1882,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
const float32x4_t t0 = vcvt_f32_f16(vget_low_f16 (x[0])); \
const float32x4_t t1 = vcvt_f32_f16(vget_high_f16(x[0])); \
res = (ggml_float) vaddvq_f32(vaddq_f32(t0, t1)); \
- }
+ } while (0)
#define GGML_F16_VEC GGML_F16x8
#define GGML_F16_VEC_ZERO GGML_F16x8_ZERO
@@ -1943,7 +1943,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
#define GGML_F32x8_ADD _mm256_add_ps
#define GGML_F32x8_MUL _mm256_mul_ps
#define GGML_F32x8_REDUCE(res, x) \
-{ \
+do { \
int offset = GGML_F32_ARR >> 1; \
for (int i = 0; i < offset; ++i) { \
x[i] = _mm256_add_ps(x[i], x[offset+i]); \
@@ -1960,7 +1960,7 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
_mm256_extractf128_ps(x[0], 1)); \
const __m128 t1 = _mm_hadd_ps(t0, t0); \
res = _mm_cvtss_f32(_mm_hadd_ps(t1, t1)); \
-}
+} while (0)
// TODO: is this optimal ?
#define GGML_F32_VEC GGML_F32x8
@@ -5154,31 +5154,31 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) {
{
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
return ((int8_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
return ((int16_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
return ((int32_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_F16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
- } break;
+ }
case GGML_TYPE_F32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(float));
return ((float *)(tensor->data))[i];
- } break;
+ }
default:
{
GGML_ASSERT(false);
- } break;
+ }
}
return 0.0f;
@@ -5228,29 +5228,17 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
switch (tensor->type) {
case GGML_TYPE_I8:
- {
- return ((int8_t *) data)[0];
- } break;
+ return ((int8_t *) data)[0];
case GGML_TYPE_I16:
- {
- return ((int16_t *) data)[0];
- } break;
+ return ((int16_t *) data)[0];
case GGML_TYPE_I32:
- {
- return ((int32_t *) data)[0];
- } break;
+ return ((int32_t *) data)[0];
case GGML_TYPE_F16:
- {
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
- } break;
+ return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
case GGML_TYPE_F32:
- {
- return ((float *) data)[0];
- } break;
+ return ((float *) data)[0];
default:
- {
- GGML_ASSERT(false);
- } break;
+ GGML_ASSERT(false);
}
return 0.0f;
@@ -5297,31 +5285,31 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) {
{
GGML_ASSERT(tensor->nb[0] == sizeof(int8_t));
return ((int8_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int16_t));
return ((int16_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_I32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(int32_t));
return ((int32_t *)(tensor->data))[i];
- } break;
+ }
case GGML_TYPE_F16:
{
GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t));
return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]);
- } break;
+ }
case GGML_TYPE_F32:
{
GGML_ASSERT(tensor->nb[0] == sizeof(float));
return ((float *)(tensor->data))[i];
- } break;
+ }
default:
{
GGML_ASSERT(false);
- } break;
+ }
}
return 0.0f;
@@ -5371,29 +5359,17 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2,
void * data = (char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2] + i3*tensor->nb[3];
switch (tensor->type) {
case GGML_TYPE_I8:
- {
- return ((int8_t *) data)[0];
- } break;
+ return ((int8_t *) data)[0];
case GGML_TYPE_I16:
- {
- return ((int16_t *) data)[0];
- } break;
+ return ((int16_t *) data)[0];
case GGML_TYPE_I32:
- {
- return ((int32_t *) data)[0];
- } break;
+ return ((int32_t *) data)[0];
case GGML_TYPE_F16:
- {
- return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
- } break;
+ return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]);
case GGML_TYPE_F32:
- {
- return ((float *) data)[0];
- } break;
+ return ((float *) data)[0];
default:
- {
- GGML_ASSERT(false);
- } break;
+ GGML_ASSERT(false);
}
return 0.0f;
@@ -8542,7 +8518,7 @@ static void ggml_compute_forward_dup_f16(
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
@@ -8813,7 +8789,7 @@ static void ggml_compute_forward_dup_f32(
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
@@ -9094,7 +9070,7 @@ static void ggml_compute_forward_add_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9167,7 +9143,7 @@ static void ggml_compute_forward_add_f16_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -9221,7 +9197,7 @@ static void ggml_compute_forward_add_f16_f16(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
@@ -9272,7 +9248,7 @@ static void ggml_compute_forward_add_q_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -9398,7 +9374,7 @@ static void ggml_compute_forward_add1_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9453,7 +9429,7 @@ static void ggml_compute_forward_add1_f16_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -9503,7 +9479,7 @@ static void ggml_compute_forward_add1_f16_f16(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
@@ -9553,7 +9529,7 @@ static void ggml_compute_forward_add1_q_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const enum ggml_type type = src0->type;
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
@@ -9681,8 +9657,8 @@ static void ggml_compute_forward_acc_f32(
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
// src0 and dst as viewed during acc
const size_t nb0 = ggml_element_size(src0);
@@ -9771,7 +9747,7 @@ static void ggml_compute_forward_sub_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9861,7 +9837,7 @@ static void ggml_compute_forward_mul_f32(
const int64_t nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9952,7 +9928,7 @@ static void ggml_compute_forward_div_f32(
const int nr = ggml_nrows(src0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -10161,8 +10137,8 @@ static void ggml_compute_forward_sum_f32(
assert(ggml_is_scalar(dst));
assert(src0->nb[0] == sizeof(float));
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
ggml_float sum = 0;
ggml_float row_sum = 0;
@@ -10193,8 +10169,8 @@ static void ggml_compute_forward_sum_f16(
assert(src0->nb[0] == sizeof(ggml_fp16_t));
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb)
float sum = 0;
float row_sum = 0;
@@ -10247,7 +10223,7 @@ static void ggml_compute_forward_sum_rows_f32(
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(dst->nb[0] == sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(ne0 == 1);
GGML_ASSERT(ne1 == ne01);
@@ -10297,7 +10273,7 @@ static void ggml_compute_forward_mean_f32(
assert(src0->nb[0] == sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
assert(ne0 == 1);
assert(ne1 == ne01);
@@ -10397,7 +10373,7 @@ static void ggml_compute_forward_repeat_f32(
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne0/ne00);
@@ -10508,7 +10484,7 @@ static void ggml_compute_forward_repeat_back_f32(
return;
}
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne00/ne0);
@@ -10586,7 +10562,7 @@ static void ggml_compute_forward_concat_f32(
const int ith = params->ith;
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
// TODO: support for transposed / permuted tensors
GGML_ASSERT(nb0 == sizeof(float));
@@ -11188,7 +11164,7 @@ static void ggml_compute_forward_norm_f32(
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
@@ -11257,7 +11233,7 @@ static void ggml_compute_forward_rms_norm_f32(
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
@@ -11322,7 +11298,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
float eps;
memcpy(&eps, dst->op_params, sizeof(float));
@@ -11497,7 +11473,7 @@ static void ggml_compute_forward_group_norm_f32(
const int ith = params->ith;
const int nth = params->nth;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const float eps = 1e-6f; // TODO: make this a parameter
@@ -11608,7 +11584,7 @@ static void ggml_compute_forward_mul_mat(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -11826,7 +11802,7 @@ static void ggml_compute_forward_out_prod_f32(
// int64_t t0 = ggml_perf_time_us();
// UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -12200,8 +12176,8 @@ static void ggml_compute_forward_set_f32(
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
- GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne)
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
// src0 and dst as viewed during set
const size_t nb0 = ggml_element_size(src0);
@@ -12588,7 +12564,7 @@ static void ggml_compute_forward_diag_f32(
// TODO: handle transposed/permuted matrices
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
GGML_ASSERT(ne00 == ne0);
GGML_ASSERT(ne00 == ne1);
@@ -13163,7 +13139,7 @@ static void ggml_compute_forward_rope_f32(
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13295,7 +13271,7 @@ static void ggml_compute_forward_rope_f16(
memcpy(&freq_base, (int32_t *) dst->op_params + 4, sizeof(float));
memcpy(&freq_scale, (int32_t *) dst->op_params + 5, sizeof(float));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13458,7 +13434,7 @@ static void ggml_compute_forward_rope_back_f32(
memcpy(&xpos_base, (int32_t *) dst->op_params + 6, sizeof(float));
memcpy(&xpos_down, (int32_t *) dst->op_params + 7, sizeof(bool));
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13558,7 +13534,7 @@ static void ggml_compute_forward_rope_back_f16(
const int n_dims = ((int32_t *) dst->op_params)[1];
const int mode = ((int32_t *) dst->op_params)[2];
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13672,7 +13648,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -13763,7 +13739,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -13875,7 +13851,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -13966,7 +13942,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -14084,7 +14060,7 @@ static void ggml_compute_forward_conv_1d(
ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
} else {
GGML_ASSERT(false); // only stride 1 and 2 supported
- };
+ }
}
// ggml_compute_forward_conv_2d
@@ -14101,7 +14077,7 @@ static void ggml_compute_forward_conv_2d_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -14221,7 +14197,7 @@ static void ggml_compute_forward_conv_transpose_2d(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS
const int ith = params->ith;
const int nth = params->nth;
@@ -14480,7 +14456,7 @@ static void ggml_compute_forward_upscale_f32(
const int ith = params->ith;
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int scale_factor = dst->op_params[0];
@@ -14532,14 +14508,14 @@ static void ggml_compute_forward_flash_attn_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
@@ -14722,14 +14698,14 @@ static void ggml_compute_forward_flash_attn_f16(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
@@ -14974,18 +14950,18 @@ static void ggml_compute_forward_flash_ff_f16(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
- GGML_TENSOR_LOCALS(size_t, nba, a, nb);
- GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
- GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
- GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
- GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
- GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
- GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
- GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
- GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, nea, a, ne)
+ GGML_TENSOR_LOCALS(size_t, nba, a, nb)
+ GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne)
+ GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb)
+ GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne)
+ GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb)
+ GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne)
+ GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb)
+ GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne)
+ GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
@@ -15133,16 +15109,16 @@ static void ggml_compute_forward_flash_attn_back_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
- GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
- GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
- GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
- GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
- GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
- GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
- GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
- GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne)
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb)
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne)
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb)
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne)
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb)
+ GGML_TENSOR_LOCALS(int64_t, ned, d, ne)
+ GGML_TENSOR_LOCALS(size_t, nbd, d, nb)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
const int ith = params->ith;
const int nth = params->nth;
@@ -15505,8 +15481,8 @@ static void ggml_compute_forward_win_part_f32(
return;
}
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
const int32_t nep0 = ((const int32_t *)(dst->op_params))[0];
const int32_t nep1 = ((const int32_t *)(dst->op_params))[1];
@@ -15567,8 +15543,8 @@ static void ggml_compute_forward_win_unpart_f32(
return;
}
- GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
- GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne)
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne)
const int32_t w = ((const int32_t *)(dst->op_params))[0];
@@ -15685,7 +15661,7 @@ static void ggml_compute_forward_get_rel_pos_f16(
// ref: https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/modeling/image_encoder.py#L292-L322
- GGML_TENSOR_UNARY_OP_LOCALS;
+ GGML_TENSOR_UNARY_OP_LOCALS
const int64_t w = ne1;
@@ -19637,7 +19613,7 @@ static enum ggml_opt_result linesearch_backtracking(
(*step) *= width;
}
- return GGML_LINESEARCH_FAIL;
+ GGML_UNREACHABLE();
}
static enum ggml_opt_result ggml_opt_lbfgs(
@@ -19904,7 +19880,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
step[0] = 1.0;
}
- return GGML_OPT_DID_NOT_CONVERGE;
+ GGML_UNREACHABLE();
}
struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
@@ -20638,10 +20614,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
- };
+ }
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
- };
+ }
if (!ok) {
break;
@@ -21369,10 +21345,10 @@ static void gguf_write_to_buf(const struct gguf_context * ctx, struct gguf_buf *
} break;
case GGUF_TYPE_ARRAY:
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
- };
+ }
} break;
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
- };
+ }
}
// write tensor infos
diff --git a/ggml.h b/ggml.h
index d61c28b2..460857fa 100644
--- a/ggml.h
+++ b/ggml.h
@@ -248,6 +248,14 @@
} \
} while (0)
+#ifndef NDEBUG
+#define GGML_UNREACHABLE() GGML_ASSERT(!"statement should not be reached")
+#elif defined(__GNUC__)
+#define GGML_UNREACHABLE() __builtin_unreachable()
+#else
+#define GGML_UNREACHABLE() ((void) 0)
+#endif
+
// used to copy the number of elements and stride in bytes of tensors into local variables.
// main purpose is to reduce code duplication and improve readability.
//
diff --git a/llama.cpp b/llama.cpp
index 685712d1..666acc21 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -449,7 +449,7 @@ struct LLM_TN {
//
#define GGUF_GET_KEY(ctx, dst, func, type, req, key) \
-{ \
+do { \
const std::string skey(key); \
const int kid = gguf_find_key(ctx, skey.c_str()); \
if (kid >= 0) { \
@@ -461,7 +461,7 @@ struct LLM_TN {
} else if (req) { \
throw std::runtime_error(format("key not found in model: %s", skey.c_str())); \
} \
-}
+} while (0)
//
// ggml helpers
@@ -1913,7 +1913,7 @@ static void llm_load_hparams(
}
} break;
default: (void)0;
- };
+ }
model.ftype = ml.ftype;
}
@@ -2438,7 +2438,7 @@ static void llm_load_tensors(
} break;
default:
throw std::runtime_error("unknown architecture");
- };
+ }
}
ml.done_getting_tensors();
@@ -3981,7 +3981,7 @@ static struct ggml_cgraph * llama_build_graph(
} break;
default:
GGML_ASSERT(false);
- };
+ }
return result;
}
@@ -4626,7 +4626,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
llm_tokenizer_bpe tokenizer(vocab);
tokenizer.tokenize(raw_text, output);
} break;
- };
+ }
return output;
}
@@ -7520,7 +7520,7 @@ int llama_token_to_piece(const struct llama_model * model, llama_token token, ch
buf[2] = '\x85';
return 3;
} else if (llama_is_control_token(model->vocab, token)) {
- ;
+ // do nothing
} else if (llama_is_byte_token(model->vocab, token)) {
if (length < 1) {
return -1;
diff --git a/pocs/vdot/q8dot.cpp b/pocs/vdot/q8dot.cpp
index 4e0e0235..111770d5 100644
--- a/pocs/vdot/q8dot.cpp
+++ b/pocs/vdot/q8dot.cpp
@@ -43,7 +43,7 @@ static_assert(QK4_1 == QK8_0, "QK4_1 and QK8_0 must be the same");
static_assert(QK4_0 == QK8_0, "QK4_0 and QK8_0 must be the same");
template <typename T>
-void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
+static void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
for (auto& b : blocks) {
b.d = 1;
for (int i=0; i<QK4_1/2; ++i) {
@@ -54,7 +54,7 @@ void fillQ4blocks(std::vector<T>& blocks, std::mt19937& rndm) {
}
}
-void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
+static void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
for (auto& b : blocks) {
b.d = 1;
int sum = 0;
@@ -66,7 +66,7 @@ void fillQ80blocks(std::vector<block_q8_0>& blocks, std::mt19937& rndm) {
}
}
-float simpleDot(const block_q4_0& x, const block_q8_0& y) {
+static float simpleDot(const block_q4_0& x, const block_q8_0& y) {
int s1 = 0; //, s2 = 0;
for (int i=0; i<QK4_1/2; i+=2) {
int v1 = x.qs[i+0] & 0xf;
@@ -81,7 +81,7 @@ float simpleDot(const block_q4_0& x, const block_q8_0& y) {
//return y.d * x.d * (s1 - 8 * s2);
}
-float simpleDot(const block_q4_1& x, const block_q8_0& y) {
+static float simpleDot(const block_q4_1& x, const block_q8_0& y) {
int s1 = 0; //, s2 = 0;
for (int i=0; i<QK4_1/2; i+=2) {
int v1 = x.qs[i+0] & 0xf;
diff --git a/tests/test-grad0.cpp b/tests/test-grad0.cpp
index 4f49dc55..c3cd73bc 100644
--- a/tests/test-grad0.cpp
+++ b/tests/test-grad0.cpp
@@ -107,7 +107,7 @@ static struct ggml_tensor * get_random_tensor_f32(
break;
default:
assert(false);
- };
+ }
return result;
}
@@ -155,7 +155,7 @@ static struct ggml_tensor * get_random_tensor_f16(
break;
default:
assert(false);
- };
+ }
return result;
}
@@ -203,7 +203,7 @@ static struct ggml_tensor * get_random_tensor_i32(
break;
default:
assert(false);
- };
+ }
return result;
}
diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp
index ce497685..fb4e0be9 100644
--- a/tests/test-opt.cpp
+++ b/tests/test-opt.cpp
@@ -101,7 +101,7 @@ static struct ggml_tensor * get_random_tensor(
break;
default:
assert(false);
- };
+ }
return result;
}
@@ -124,7 +124,7 @@ int main(void) {
struct ggml_context * ctx = ggml_init(params);
int64_t ne1[4] = {4, 128, 1, 1};
- int64_t ne2[4] = {4, 256, 1, 1};;
+ int64_t ne2[4] = {4, 256, 1, 1};
int64_t ne3[4] = {128, 256, 1, 1};
struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1);