summaryrefslogtreecommitdiff
path: root/libs/libmdbx/src
diff options
context:
space:
mode:
Diffstat (limited to 'libs/libmdbx/src')
-rw-r--r--libs/libmdbx/src/CMakeLists.txt19
-rw-r--r--libs/libmdbx/src/ChangeLog.md46
-rw-r--r--libs/libmdbx/src/GNUmakefile2
-rw-r--r--libs/libmdbx/src/README.md10
-rw-r--r--libs/libmdbx/src/VERSION2
-rw-r--r--libs/libmdbx/src/cmake/utils.cmake37
-rw-r--r--libs/libmdbx/src/mdbx.c1959
-rw-r--r--libs/libmdbx/src/mdbx.c++249
-rw-r--r--libs/libmdbx/src/mdbx.h300
-rw-r--r--libs/libmdbx/src/mdbx.h++120
-rw-r--r--libs/libmdbx/src/mdbx_chk.c97
-rw-r--r--libs/libmdbx/src/mdbx_copy.c97
-rw-r--r--libs/libmdbx/src/mdbx_dump.c111
-rw-r--r--libs/libmdbx/src/mdbx_load.c193
-rw-r--r--libs/libmdbx/src/mdbx_stat.c97
15 files changed, 2118 insertions, 1221 deletions
diff --git a/libs/libmdbx/src/CMakeLists.txt b/libs/libmdbx/src/CMakeLists.txt
index 20a50a4537..d41686bf82 100644
--- a/libs/libmdbx/src/CMakeLists.txt
+++ b/libs/libmdbx/src/CMakeLists.txt
@@ -340,10 +340,12 @@ if(NOT DEFINED MDBX_CXX_STANDARD)
set(MDBX_CXX_STANDARD 98)
endif()
endif()
-if(NOT HAS_C11 LESS 0)
- set(MDBX_C_STANDARD 11)
-else()
+# MSVC >= 19.28 (Microsoft Visual Studio 16.8) is mad!
+# It unable process Windows SDK headers in the C11 mode!
+if(HAS_C11 LESS 0 OR (MSVC AND MSVC_VERSION GREATER 1927))
set(MDBX_C_STANDARD 99)
+else()
+ set(MDBX_C_STANDARD 11)
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND EXISTS "${MDBX_SOURCE_DIR}/ntdll.def")
@@ -756,14 +758,21 @@ if(NOT CMAKE_CONFIGURATION_TYPES)
endif()
endif()
+# choice target to fetch definitions and options
+if(MDBX_BUILD_SHARED_LIBRARY)
+ set(target4fetch mdbx)
+else()
+ set(target4fetch mdbx-static)
+endif()
+
# get definitions
-get_target_property(defs_list mdbx-static COMPILE_DEFINITIONS)
+get_target_property(defs_list ${target4fetch} COMPILE_DEFINITIONS)
if(defs_list)
list(APPEND MDBX_BUILD_FLAGS ${defs_list})
endif()
# get target compile options
-get_target_property(options_list mdbx-static COMPILE_OPTIONS)
+get_target_property(options_list ${target4fetch} COMPILE_OPTIONS)
if(options_list)
list(APPEND MDBX_BUILD_FLAGS ${options_list})
endif()
diff --git a/libs/libmdbx/src/ChangeLog.md b/libs/libmdbx/src/ChangeLog.md
index 34be056b8d..e7cf79534f 100644
--- a/libs/libmdbx/src/ChangeLog.md
+++ b/libs/libmdbx/src/ChangeLog.md
@@ -1,14 +1,38 @@
ChangeLog
---------
-## v0.9.2 (in development)
-
-TODO:
+## v0.9.3 (in development)
+ - Engage new terminology (https://github.com/erthink/libmdbx/issues/137).
- Rework/speedup the implementation of the dirty page list (lazy compactification, lazy sorting via merge).
- - Finalize C++ API (few typos and trivia bugs are likely for now).
+ - Resolve few TODOs (https://github.com/erthink/libmdbx/issues/123, https://github.com/erthink/libmdbx/issues/124,
+ https://github.com/erthink/libmdbx/issues/127, https://github.com/erthink/libmdbx/issues/128,
+ https://github.com/erthink/libmdbx/issues/132, https://github.com/erthink/libmdbx/issues/115).
+ - Finalize C++ API (few typos and trivia bugs are still likely for now).
- Packages for ROSA Linux, ALT Linux, Fedora/RHEL, Debian/Ubuntu.
+## v0.9.2 scheduled at 2020-11-27
+
+Acknowledgements:
+
+ - Jens Alfke (Mobile Architect at [Couchbase](https://www.couchbase.com/)) for [NimDBX](https://github.com/snej/nimdbx).
+ - Clément Renault (CTO at [MeiliSearch](https://www.meilisearch.com/)) for [mdbx-rs](https://github.com/Kerollmops/mdbx-rs).
+ - Alex Sharov (Go-Lang Teach Lead at [TurboGeth/Ethereum](https://ethereum.org/)) for an extreme test cases and bug reporting.
+ - George Hazan (CTO at [Miranda NG](https://www.miranda-ng.org/)) for bug reporting.
+ - [Positive Technologies](https://www.ptsecurity.com/) for funding and [The Standoff](https://standoff365.com/).
+
+Added features:
+
+ - Provided package for [buildroot](https://buildroot.org/).
+ - Binding for Nim is [available](https://github.com/snej/nimdbx) now by Jens Alfke.
+ - Added `mdbx_env_delete()` for deletion an environment files in a proper and multiprocess-safe way.
+ - Added `mdbx_txn_commit_ex()` with collecting latency information.
+ - Fast completion pure nested transactions.
+ - Added `LIBMDBX_INLINE_API` macro and inline versions of some API functions.
+ - Added `mdbx_cursor_copy()` function.
+ - Extended tests for checking cursor tracking.
+ - Added `MDBX_SET_LOWERBOUND` operation for `mdbx_cursor_get()`.
+
Fixes:
- Fixed missing installation of `mdbx.h++`.
@@ -21,6 +45,20 @@ Fixes:
- Fixed opening DB on a network shares (in the exclusive mode).
- Fixed copy&paste typos.
- Fixed minor false-positive GCC warning.
+ - Added workaround for broken `DEFINE_ENUM_FLAG_OPERATORS` from Windows SDK.
+ - Fixed cursor state after multimap/dupsort repeated deletes (https://github.com/erthink/libmdbx/issues/121).
+ - Added `SIGPIPE` suppression for internal thread during `mdbx_env_copy()`.
+ - Fixed extra-rare `MDBX_KEY_EXIST` error during `mdbx_commit()` (https://github.com/erthink/libmdbx/issues/131).
+ - Fixed spilled pages checking (https://github.com/erthink/libmdbx/issues/126).
+ - Fixed `mdbx_load` for 'plain text' and without `-s name` cases (https://github.com/erthink/libmdbx/issues/136).
+ - Fixed save/restore/commit of cursors for nested transactions.
+ - Fixed cursors state in rare/special cases (move next beyond end-of-data, after deletion and so on).
+ - Added workaround for MSVC 19.28 (Visual Studio 16.8) (but may still hang during compilation).
+ - Fixed paranoidal Clang C++ UB for bitwise operations with flags defined by enums.
+ - Fixed large pages checking (for compatibility and to avoid false-positive errors from `mdbx_chk`).
+ - Added workaround for Wine (https://github.com/miranda-ng/miranda-ng/issues/1209).
+ - Fixed `ERROR_NOT_SUPPORTED` while opening DB by UNC pathnames (https://github.com/miranda-ng/miranda-ng/issues/2627).
+
## v0.9.1 2020-09-30
diff --git a/libs/libmdbx/src/GNUmakefile b/libs/libmdbx/src/GNUmakefile
index 82d168b2b8..b14000f5f3 100644
--- a/libs/libmdbx/src/GNUmakefile
+++ b/libs/libmdbx/src/GNUmakefile
@@ -84,7 +84,7 @@ libmdbx.$(SO_SUFFIX): mdbx-dylib.o mdbx++-dylib.o
################################################################################
-# Amalgamated source code, i.e. distributed after `make dists`
+# Amalgamated source code, i.e. distributed after `make dist`
MAN_SRCDIR := man1/
config.h: mdbx.c $(lastword $(MAKEFILE_LIST))
diff --git a/libs/libmdbx/src/README.md b/libs/libmdbx/src/README.md
index 90f2924608..3f643d2706 100644
--- a/libs/libmdbx/src/README.md
+++ b/libs/libmdbx/src/README.md
@@ -1,12 +1,15 @@
<!-- Required extensions: pymdownx.betterem, pymdownx.tilde, pymdownx.emoji, pymdownx.tasklist, pymdownx.superfences -->
-libmdbx
-========
-
> Please refer to the online [documentation](https://erthink.github.io/libmdbx/)
> with [`C` API description](https://erthink.github.io/libmdbx/group__c__api.html)
> and pay attention to the preliminary [`C++` API](https://github.com/erthink/libmdbx/blob/devel/mdbx.h%2B%2B).
+>
> Questions, feedback and suggestions are welcome to the [Telegram' group](https://t.me/libmdbx).
+>
+> For NEWS take a look to the [ChangeLog](./ChangeLog.md).
+
+libmdbx
+========
<!-- section-begin overview -->
_libmdbx_ is an extremely fast, compact, powerful, embedded,
@@ -480,6 +483,7 @@ Bindings
| Runtime | GitHub | Author |
| ------- | ------ | ------ |
+| [Nim](https://en.wikipedia.org/wiki/Nim_(programming_language)) | [NimDBX](https://github.com/snej/nimdbx) | [Jens Alfke](https://github.com/snej)
| Rust | [mdbx-rs](https://github.com/Kerollmops/mdbx-rs) | [Clément Renault](https://github.com/Kerollmops) |
| Java | [mdbxjni](https://github.com/castortech/mdbxjni) | [Castor Technologies](https://castortech.com/) |
| .NET | [mdbx.NET](https://github.com/wangjia184/mdbx.NET) | [Jerry Wang](https://github.com/wangjia184) |
diff --git a/libs/libmdbx/src/VERSION b/libs/libmdbx/src/VERSION
index 4cef1f9b06..594150e32c 100644
--- a/libs/libmdbx/src/VERSION
+++ b/libs/libmdbx/src/VERSION
@@ -1 +1 @@
-0.9.1.18
+0.9.2.0
diff --git a/libs/libmdbx/src/cmake/utils.cmake b/libs/libmdbx/src/cmake/utils.cmake
index dc6a240e1b..4a48a15a63 100644
--- a/libs/libmdbx/src/cmake/utils.cmake
+++ b/libs/libmdbx/src/cmake/utils.cmake
@@ -17,8 +17,8 @@ cmake_minimum_required(VERSION 3.8.2)
cmake_policy(PUSH)
cmake_policy(VERSION 3.8.2)
-macro(add_compile_flags langs)
- foreach(_lang ${langs})
+macro(add_compile_flags languages)
+ foreach(_lang ${languages})
string(REPLACE ";" " " _flags "${ARGN}")
if(CMAKE_CXX_COMPILER_LOADED AND _lang STREQUAL "CXX")
set("${_lang}_FLAGS" "${${_lang}_FLAGS} ${_flags}")
@@ -113,23 +113,34 @@ macro(fetch_version name source_root_directory parent_scope)
message(FATAL_ERROR "Please install latest version of git ('show --no-patch --format=%H HEAD' failed)")
endif()
- execute_process(COMMAND ${GIT} tag --sort=-version:refname
- OUTPUT_VARIABLE tag_list
+ execute_process(COMMAND ${GIT} describe --tags --abbrev=0 "--match=v[0-9]*"
+ OUTPUT_VARIABLE last_release_tag
OUTPUT_STRIP_TRAILING_WHITESPACE
WORKING_DIRECTORY ${source_root_directory}
RESULT_VARIABLE rc)
if(rc)
- message(FATAL_ERROR "Please install latest version of git ('tag --sort=-version:refname' failed)")
+ message(FATAL_ERROR "Please install latest version of git ('describe --tags --abbrev=0 --match=v[0-9]*' failed)")
endif()
- string(REGEX REPLACE "\n" ";" tag_list "${tag_list}")
- set(last_release_tag "")
- set(git_revlist_arg "HEAD")
- foreach(tag IN LISTS tag_list)
- if(NOT last_release_tag)
- string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" last_release_tag "${tag}")
- set(git_revlist_arg "${tag}..HEAD")
+ if (last_release_tag)
+ set(git_revlist_arg "${last_release_tag}..HEAD")
+ else()
+ execute_process(COMMAND ${GIT} tag --sort=-version:refname
+ OUTPUT_VARIABLE tag_list
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ WORKING_DIRECTORY ${source_root_directory}
+ RESULT_VARIABLE rc)
+ if(rc)
+ message(FATAL_ERROR "Please install latest version of git ('tag --sort=-version:refname' failed)")
endif()
- endforeach(tag)
+ string(REGEX REPLACE "\n" ";" tag_list "${tag_list}")
+ set(git_revlist_arg "HEAD")
+ foreach(tag IN LISTS tag_list)
+ if(NOT last_release_tag)
+ string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" last_release_tag "${tag}")
+ set(git_revlist_arg "${tag}..HEAD")
+ endif()
+ endforeach(tag)
+ endif()
execute_process(COMMAND ${GIT} rev-list --count "${git_revlist_arg}"
OUTPUT_VARIABLE ${name}_GIT_REVISION
OUTPUT_STRIP_TRAILING_WHITESPACE
diff --git a/libs/libmdbx/src/mdbx.c b/libs/libmdbx/src/mdbx.c
index 2fa4734952..3bcb83b536 100644
--- a/libs/libmdbx/src/mdbx.c
+++ b/libs/libmdbx/src/mdbx.c
@@ -12,11 +12,16 @@
* <http://www.OpenLDAP.org/license.html>. */
#define MDBX_ALLOY 1
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -97,11 +102,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -817,7 +817,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -993,6 +993,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1034,8 +1063,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1088,8 +1117,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1160,7 +1188,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1169,7 +1198,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1429,32 +1460,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1890,7 +1895,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1974,7 +1979,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2222,7 +2227,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2381,8 +2386,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2409,6 +2412,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2546,7 +2551,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2556,7 +2561,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2832,7 +2837,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
@@ -3064,7 +3069,7 @@ static __maybe_unused void static_checks(void) {
/*------------------------------------------------------------------------------
- * Internal inlines */
+ * Internal inline functions */
MDBX_NOTHROW_CONST_FUNCTION static unsigned log2n(size_t value) {
assert(value > 0 && value < INT32_MAX && is_powerof2(value));
@@ -3806,12 +3811,24 @@ static __always_inline void atomic_yield(void) {
#if MDBX_64BIT_CAS
static __always_inline bool atomic_cas64(volatile uint64_t *p, uint64_t c,
uint64_t v) {
-#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_LLONG_LOCK_FREE)
+#if !defined(__STDC_NO_ATOMICS__) && \
+ (defined(ATOMIC_VAR_INIT) || defined(ATOMIC_LLONG_LOCK_FREE) || \
+ __has_extension(c_atomic))
STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t));
-#ifndef __COVERITY__
- STATIC_ASSERT(atomic_is_lock_free(p));
-#endif /* Workaround for Coverity */
- return atomic_compare_exchange_strong((_Atomic uint64_t *)p, &c, v);
+#ifdef ATOMIC_LLONG_LOCK_FREE
+ STATIC_ASSERT(ATOMIC_LLONG_LOCK_FREE > 0);
+#if ATOMIC_LLONG_LOCK_FREE < 2
+ assert(atomic_is_lock_free(p));
+#endif
+#else
+ assert(atomic_is_lock_free(p));
+#endif
+#ifdef __clang__
+ STATIC_ASSERT(sizeof(_Atomic uint64_t) == sizeof(uint64_t));
+ return atomic_compare_exchange_strong((_Atomic volatile uint64_t *)p, &c, v);
+#else
+ return atomic_compare_exchange_strong(p, &c, v);
+#endif
#elif defined(__GNUC__) || defined(__clang__)
return __sync_bool_compare_and_swap(p, c, v);
#elif defined(_MSC_VER)
@@ -3827,12 +3844,24 @@ static __always_inline bool atomic_cas64(volatile uint64_t *p, uint64_t c,
static __always_inline bool atomic_cas32(volatile uint32_t *p, uint32_t c,
uint32_t v) {
-#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE)
+#if !defined(__STDC_NO_ATOMICS__) && \
+ (defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE) || \
+ __has_extension(c_atomic))
STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
-#ifndef __COVERITY__
- STATIC_ASSERT(atomic_is_lock_free(p));
-#endif /* Workaround for Coverity */
- return atomic_compare_exchange_strong((_Atomic uint32_t *)p, &c, v);
+#ifdef ATOMIC_INT_LOCK_FREE
+ STATIC_ASSERT(ATOMIC_INT_LOCK_FREE > 0);
+#if ATOMIC_INT_LOCK_FREE < 2
+ assert(atomic_is_lock_free(p));
+#endif
+#else
+ assert(atomic_is_lock_free(p));
+#endif
+#ifdef __clang__
+ STATIC_ASSERT(sizeof(_Atomic uint32_t) == sizeof(uint32_t));
+ return atomic_compare_exchange_strong((_Atomic volatile uint32_t *)p, &c, v);
+#else
+ return atomic_compare_exchange_strong(p, &c, v);
+#endif
#elif defined(__GNUC__) || defined(__clang__)
return __sync_bool_compare_and_swap(p, c, v);
#elif defined(_MSC_VER)
@@ -3846,12 +3875,24 @@ static __always_inline bool atomic_cas32(volatile uint32_t *p, uint32_t c,
}
static __always_inline uint32_t atomic_add32(volatile uint32_t *p, uint32_t v) {
-#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE)
+#if !defined(__STDC_NO_ATOMICS__) && \
+ (defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE) || \
+ __has_extension(c_atomic))
STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
-#ifndef __COVERITY__
- STATIC_ASSERT(atomic_is_lock_free(p));
-#endif /* Workaround for Coverity */
- return atomic_fetch_add((_Atomic uint32_t *)p, v);
+#ifdef ATOMIC_INT_LOCK_FREE
+ STATIC_ASSERT(ATOMIC_INT_LOCK_FREE > 0);
+#if ATOMIC_INT_LOCK_FREE < 2
+ assert(atomic_is_lock_free(p));
+#endif
+#else
+ assert(atomic_is_lock_free(p));
+#endif
+#ifdef __clang__
+ STATIC_ASSERT(sizeof(_Atomic uint32_t) == sizeof(uint32_t));
+ return atomic_fetch_add((_Atomic volatile uint32_t *)p, v);
+#else
+ return atomic_fetch_add(p, v);
+#endif
#elif defined(__GNUC__) || defined(__clang__)
return __sync_fetch_and_add(p, v);
#elif defined(_MSC_VER)
@@ -6128,7 +6169,7 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode);
static int __must_check_result mdbx_page_get(MDBX_cursor *mc, pgno_t pgno,
MDBX_page **mp, int *lvl,
- const txnid_t pp_txnid);
+ txnid_t pp_txnid);
static int __must_check_result mdbx_page_search_root(MDBX_cursor *mc,
const MDBX_val *key,
int flags);
@@ -6204,8 +6245,9 @@ static int __must_check_result mdbx_cursor_del0(MDBX_cursor *mc);
static int __must_check_result mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi,
const MDBX_val *key,
const MDBX_val *data, unsigned flags);
-static int __must_check_result mdbx_cursor_sibling(MDBX_cursor *mc,
- int move_right);
+#define SIBLING_LEFT 0
+#define SIBLING_RIGHT 2
+static int __must_check_result mdbx_cursor_sibling(MDBX_cursor *mc, int dir);
static int __must_check_result mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key,
MDBX_val *data,
MDBX_cursor_op op);
@@ -6229,7 +6271,7 @@ static int __must_check_result mdbx_xcursor_init1(MDBX_cursor *mc,
static int __must_check_result mdbx_xcursor_init2(MDBX_cursor *mc,
MDBX_xcursor *src_mx,
bool new_dupdata);
-static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst);
+static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst);
static int __must_check_result mdbx_drop0(MDBX_cursor *mc, int subs);
static int __must_check_result mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi);
@@ -6573,7 +6615,7 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) {
/*----------------------------------------------------------------------------*/
-/* Check if there is an inited xcursor, so XCURSOR_REFRESH() is proper */
+/* Check if there is an initialized xcursor, so XCURSOR_REFRESH() is proper */
#define XCURSOR_INITED(mc) \
((mc)->mc_xcursor && ((mc)->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))
@@ -6588,16 +6630,26 @@ static __maybe_unused void mdbx_page_list(MDBX_page *mp) {
(mc)->mc_xcursor->mx_cursor.mc_pg[0] = node_data(xr_node); \
} while (0)
+static __maybe_unused bool cursor_is_tracked(const MDBX_cursor *mc) {
+ for (MDBX_cursor *scan = mc->mc_txn->tw.cursors[mc->mc_dbi]; scan;
+ scan = scan->mc_next)
+ if (mc == ((mc->mc_flags & C_SUB) ? &scan->mc_xcursor->mx_cursor : scan))
+ return true;
+ return false;
+}
+
/* Perform act while tracking temporary cursor mn */
#define WITH_CURSOR_TRACKING(mn, act) \
do { \
mdbx_cassert(&(mn), \
- mn.mc_txn->mt_cursors != NULL /* must be not rdonly txt */); \
+ mn.mc_txn->tw.cursors != NULL /* must be not rdonly txt */); \
+ mdbx_cassert(&(mn), !cursor_is_tracked(&(mn))); \
MDBX_cursor mc_dummy; \
- MDBX_cursor **tracking_head = &(mn).mc_txn->mt_cursors[mn.mc_dbi]; \
+ MDBX_cursor **tracking_head = &(mn).mc_txn->tw.cursors[mn.mc_dbi]; \
MDBX_cursor *tracked = &(mn); \
if ((mn).mc_flags & C_SUB) { \
mc_dummy.mc_flags = C_INITIALIZED; \
+ mc_dummy.mc_top = 0; \
mc_dummy.mc_xcursor = (MDBX_xcursor *)&(mn); \
tracked = &mc_dummy; \
} \
@@ -7155,7 +7207,7 @@ static int mdbx_pages_xkeep(MDBX_cursor *mc, unsigned pflags, bool all) {
MDBX_txn *txn = mc->mc_txn;
MDBX_cursor *m3, *m0 = mc;
MDBX_xcursor *mx;
- MDBX_page *dp, *mp;
+ MDBX_page *mp;
unsigned i, j;
int rc = MDBX_SUCCESS;
@@ -7180,7 +7232,7 @@ static int mdbx_pages_xkeep(MDBX_cursor *mc, unsigned pflags, bool all) {
}
}
mc = mc->mc_next;
- for (; !mc || mc == m0; mc = txn->mt_cursors[--i])
+ for (; !mc || mc == m0; mc = txn->tw.cursors[--i])
if (i == 0)
goto mark_done;
}
@@ -7193,11 +7245,8 @@ mark_done:
pgno_t pgno = txn->mt_dbs[i].md_root;
if (pgno == P_INVALID)
continue;
- int level;
- if (unlikely((rc = mdbx_page_get(m0, pgno, &dp, &level,
- txn->mt_txnid)) != MDBX_SUCCESS))
- break;
- if ((dp->mp_flags & Mask) == pflags && level <= 1)
+ MDBX_page *dp = mdbx_dpl_find(txn->tw.dirtylist, pgno);
+ if (dp && (dp->mp_flags & Mask) == pflags)
dp->mp_flags ^= P_KEEP;
}
}
@@ -7652,7 +7701,7 @@ static __always_inline __maybe_unused int ignore_enosys(int err) {
#endif /* defined(_WIN32) || defined(_WIN64) */
/* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
-static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
+static __cold int mdbx_set_readahead(MDBX_env *env, const size_t offset,
const size_t length, const bool enable) {
assert(length > 0);
mdbx_notice("readahead %s %u..%u", enable ? "ON" : "OFF",
@@ -7729,18 +7778,18 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
+ const size_t prev_size = env->me_dxb_mmap.current;
+ const size_t prev_limit = env->me_dxb_mmap.limit;
+ const void *const prev_addr = env->me_map;
mdbx_verbose("resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR,
- env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
- limit_bytes);
+ prev_size, size_bytes, prev_limit, limit_bytes);
mdbx_assert(env, limit_bytes >= size_bytes);
mdbx_assert(env, bytes2pgno(env, size_bytes) >= size_pgno);
mdbx_assert(env, bytes2pgno(env, limit_bytes) >= limit_pgno);
- const size_t prev_limit = env->me_dxb_mmap.limit;
- const void *const prev_addr = env->me_map;
#if defined(_WIN32) || defined(_WIN64)
/* Acquire guard in exclusive mode for:
@@ -7785,9 +7834,11 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
goto bailout;
if (limit_bytes != env->me_dxb_mmap.limit && env->me_lck && !implicit) {
- rc = mdbx_rdt_lock(env) /* lock readers table until remap done */;
- if (unlikely(rc != MDBX_SUCCESS))
+ int err = mdbx_rdt_lock(env) /* lock readers table until remap done */;
+ if (unlikely(MDBX_IS_ERROR(err))) {
+ rc = err;
goto bailout;
+ }
/* looking for readers from this process */
MDBX_lockinfo *const lck = env->me_lck;
@@ -7807,7 +7858,6 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
#endif /* ! Windows */
- const size_t prev_size = env->me_dxb_mmap.current;
if (size_bytes < prev_size) {
mdbx_notice("resize-MADV_%s %u..%u",
(env->me_flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED",
@@ -7846,7 +7896,8 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
rc = mdbx_mresize(env->me_flags, &env->me_dxb_mmap, size_bytes, limit_bytes,
mapping_can_be_moved);
if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_NORDAHEAD) == 0) {
- const int readahead = mdbx_is_readahead_reasonable(size_bytes, 0);
+ const int readahead =
+ mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size);
if (readahead == MDBX_RESULT_FALSE)
rc = mdbx_set_readahead(
env, 0, (size_bytes > prev_size) ? size_bytes : prev_size, false);
@@ -7889,14 +7940,12 @@ bailout:
mdbx_error("failed resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
- env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
- limit_bytes, rc);
+ prev_size, size_bytes, prev_limit, limit_bytes, rc);
} else {
mdbx_warning("unable resize datafile/mapping: "
"present %" PRIuPTR " -> %" PRIuPTR ", "
"limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
- env->me_dxb_mmap.current, size_bytes, env->me_dxb_mmap.limit,
- limit_bytes, rc);
+ prev_size, size_bytes, prev_limit, limit_bytes, rc);
}
if (!env->me_dxb_mmap.address) {
env->me_flags |= MDBX_FATAL_ERROR;
@@ -8054,7 +8103,7 @@ __hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
if (unlikely(mc->mc_flags & C_RECLAIMING)) {
/* If mc is updating the GC, then the retired-list cannot play
* catch-up with itself by growing while trying to save it. */
- flags &= ~(MDBX_ALLOC_GC | MDBX_COALESCE | MDBX_LIFORECLAIM);
+ flags &= ~MDBX_ALLOC_GC;
} else if (unlikely(txn->mt_dbs[FREE_DBI].md_entries == 0)) {
/* avoid (recursive) search inside empty tree and while tree is updating,
* https://github.com/erthink/libmdbx/issues/31 */
@@ -8096,12 +8145,6 @@ skip_cache:
const unsigned wanna_range = num - 1;
while (true) { /* hsr-kick retry loop */
- /* If our dirty list is already full, we can't do anything */
- if (unlikely(txn->tw.dirtyroom == 0)) {
- rc = MDBX_TXN_FULL;
- goto fail;
- }
-
MDBX_cursor_couple recur;
for (MDBX_cursor_op op = MDBX_FIRST;;
op = (flags & MDBX_LIFORECLAIM) ? MDBX_PREV : MDBX_NEXT) {
@@ -8142,6 +8185,11 @@ skip_cache:
}
if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */
+ if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
+ !(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY)) {
+ /* If our dirty list is already full, we can't touch GC */
+ flags &= ~MDBX_ALLOC_GC;
+ }
if (unlikely(!(flags & MDBX_ALLOC_GC)))
break /* reclaiming is prohibited for now */;
@@ -8242,7 +8290,7 @@ skip_cache:
}
}
- /* Append PNL from GC record to me_reclaimed_pglist */
+ /* Append PNL from GC record to tw.reclaimed_pglist */
mdbx_cassert(mc, (mc->mc_flags & C_GCFREEZE) == 0);
pgno_t *gc_pnl = (pgno_t *)data.iov_base;
mdbx_tassert(txn, data.iov_len >= MDBX_PNL_SIZEOF(gc_pnl));
@@ -8252,6 +8300,24 @@ skip_cache:
goto fail;
}
const unsigned gc_len = MDBX_PNL_SIZE(gc_pnl);
+ /* TODO: provide a user-configurable threshold */
+ const unsigned threshold_2_stop_gc_reclaiming = MDBX_PNL_MAX / 4;
+ if (unlikely(gc_len + MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) >
+ threshold_2_stop_gc_reclaiming) &&
+ (pgno_add(txn->mt_next_pgno, num) <= txn->mt_geo.upper ||
+ gc_len + MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) >=
+ MDBX_PNL_MAX / 16 * 15)) {
+ /* Stop reclaiming to avoid overflow the page list.
+ * This is a rare case while search for a continuously multi-page region
+ * in a large database. https://github.com/erthink/libmdbx/issues/123 */
+ flags &= ~MDBX_ALLOC_GC;
+ if (unlikely((flags & MDBX_ALLOC_ALL) == 0)) {
+ /* Oh, we can't do anything */
+ rc = MDBX_TXN_FULL;
+ goto fail;
+ }
+ break;
+ }
rc = mdbx_pnl_need(&txn->tw.reclaimed_pglist, gc_len);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
@@ -8474,7 +8540,7 @@ done:
mdbx_cassert(mc, (mc->mc_flags & C_GCFREEZE) == 0);
mdbx_tassert(txn, pgno < txn->mt_next_pgno);
mdbx_tassert(txn, pgno == re_list[range_begin]);
- /* Cutoff allocated pages from me_reclaimed_pglist */
+ /* Cutoff allocated pages from tw.reclaimed_pglist */
#if MDBX_PNL_ASCENDING
for (unsigned i = range_begin + num; i <= re_len;)
re_list[range_begin++] = re_list[i++];
@@ -8666,7 +8732,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
done:
/* Adjust cursors pointing to mp */
mc->mc_pg[mc->mc_top] = np;
- m2 = txn->mt_cursors[mc->mc_dbi];
+ m2 = txn->tw.cursors[mc->mc_dbi];
if (mc->mc_flags & C_SUB) {
for (; m2; m2 = m2->mc_next) {
m3 = &m2->mc_xcursor->mx_cursor;
@@ -8818,43 +8884,42 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) {
return mdbx_env_sync_internal(env, force, nonblock);
}
-__cold int mdbx_env_sync(MDBX_env *env) {
- return mdbx_env_sync_ex(env, true, false);
-}
+__cold int mdbx_env_sync(MDBX_env *env) { return __inline_mdbx_env_sync(env); }
__cold int mdbx_env_sync_poll(MDBX_env *env) {
- return mdbx_env_sync_ex(env, false, true);
+ return __inline_mdbx_env_sync_poll(env);
}
/* Back up parent txn's cursors, then grab the originals for tracking */
-static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) {
- MDBX_cursor *mc, *bk;
- MDBX_xcursor *mx;
-
- for (int i = src->mt_numdbs; --i >= 0;) {
- dst->mt_cursors[i] = NULL;
- if ((mc = src->mt_cursors[i]) != NULL) {
- size_t size = sizeof(MDBX_cursor);
- if (mc->mc_xcursor)
- size += sizeof(MDBX_xcursor);
- for (; mc; mc = bk->mc_next) {
+static int mdbx_cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
+ for (int i = parent->mt_numdbs; --i >= 0;) {
+ nested->tw.cursors[i] = NULL;
+ MDBX_cursor *mc = parent->tw.cursors[i];
+ if (mc != NULL) {
+ size_t size = mc->mc_xcursor ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor)
+ : sizeof(MDBX_cursor);
+ for (MDBX_cursor *bk; mc; mc = bk->mc_next) {
+ bk = mc;
+ if (mc->mc_signature != MDBX_MC_LIVE)
+ continue;
bk = mdbx_malloc(size);
if (unlikely(!bk))
return MDBX_ENOMEM;
*bk = *mc;
mc->mc_backup = bk;
- mc->mc_db = &dst->mt_dbs[i];
/* Kill pointers into src to reduce abuse: The
* user may not use mc until dst ends. But we need a valid
* txn pointer here for cursor fixups to keep working. */
- mc->mc_txn = dst;
- mc->mc_dbistate = &dst->mt_dbistate[i];
- if ((mx = mc->mc_xcursor) != NULL) {
+ mc->mc_txn = nested;
+ mc->mc_db = &nested->mt_dbs[i];
+ mc->mc_dbistate = &nested->mt_dbistate[i];
+ MDBX_xcursor *mx = mc->mc_xcursor;
+ if (mx != NULL) {
*(MDBX_xcursor *)(bk + 1) = *mx;
- mx->mx_cursor.mc_txn = dst;
+ mx->mx_cursor.mc_txn = nested;
}
- mc->mc_next = dst->mt_cursors[i];
- dst->mt_cursors[i] = mc;
+ mc->mc_next = nested->tw.cursors[i];
+ nested->tw.cursors[i] = mc;
}
}
}
@@ -8867,47 +8932,57 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) {
* [in] merge true to keep changes to parent cursors, false to revert.
*
* Returns 0 on success, non-zero on failure. */
-static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) {
- MDBX_cursor **cursors = txn->mt_cursors, *mc, *next, *bk;
- MDBX_xcursor *mx;
- int i;
-
- for (i = txn->mt_numdbs; --i >= 0;) {
- for (mc = cursors[i]; mc; mc = next) {
- unsigned stage = mc->mc_signature;
- mdbx_ensure(txn->mt_env,
- stage == MDBX_MC_LIVE || stage == MDBX_MC_WAIT4EOT);
+static void mdbx_cursors_eot(MDBX_txn *txn, const bool merge) {
+ mdbx_tassert(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
+ for (int i = txn->mt_numdbs; --i >= 0;) {
+ MDBX_cursor *next, *mc = txn->tw.cursors[i];
+ if (!mc)
+ continue;
+ txn->tw.cursors[i] = NULL;
+ do {
+ const unsigned stage = mc->mc_signature;
+ MDBX_cursor *bk = mc->mc_backup;
next = mc->mc_next;
- mdbx_tassert(txn, !next || next->mc_signature == MDBX_MC_LIVE ||
- next->mc_signature == MDBX_MC_WAIT4EOT);
- if ((bk = mc->mc_backup) != NULL) {
- if (merge) {
- /* Commit changes to parent txn */
+ mdbx_ensure(txn->mt_env,
+ stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk));
+ mdbx_cassert(mc, mc->mc_dbi == (unsigned)i);
+ if (bk) {
+ MDBX_xcursor *mx = mc->mc_xcursor;
+ mdbx_cassert(mc, mx == bk->mc_xcursor);
+ mdbx_tassert(txn, txn->mt_parent != NULL);
+ mdbx_ensure(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE);
+ if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */)
+ mc->mc_signature = stage /* Promote closed state to parent txn */;
+ else if (merge) {
+ /* Preserve changes from nested to parent txn */
mc->mc_next = bk->mc_next;
mc->mc_backup = bk->mc_backup;
mc->mc_txn = bk->mc_txn;
+ *bk->mc_db = *mc->mc_db;
mc->mc_db = bk->mc_db;
+ *bk->mc_dbistate = *mc->mc_dbistate;
mc->mc_dbistate = bk->mc_dbistate;
- if ((mx = mc->mc_xcursor) != NULL)
+ if (mx) {
+ if (mx != bk->mc_xcursor) {
+ *bk->mc_xcursor = *mx;
+ mx = bk->mc_xcursor;
+ }
mx->mx_cursor.mc_txn = bk->mc_txn;
+ }
} else {
- /* Abort nested txn */
+ /* Restore from backup, i.e. rollback/abort nested txn */
*mc = *bk;
- if ((mx = mc->mc_xcursor) != NULL)
+ if (mx)
*mx = *(MDBX_xcursor *)(bk + 1);
}
bk->mc_signature = 0;
mdbx_free(bk);
- }
- if (stage == MDBX_MC_WAIT4EOT) {
- mc->mc_signature = 0;
- mdbx_free(mc);
} else {
- mc->mc_signature = MDBX_MC_READY4CLOSE;
+ mdbx_ensure(txn->mt_env, stage == MDBX_MC_LIVE);
+ mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */;
mc->mc_flags = 0 /* reset C_UNTRACK */;
}
- }
- cursors[i] = NULL;
+ } while ((mc = next) != NULL);
}
}
@@ -9471,7 +9546,7 @@ int mdbx_txn_renew(MDBX_txn *txn) {
int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
MDBX_txn **ret) {
- return mdbx_txn_begin_ex(env, parent, flags, ret, nullptr);
+ return __inline_mdbx_txn_begin(env, parent, flags, ret);
}
int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) {
@@ -9552,7 +9627,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
if (parent) {
mdbx_tassert(txn, mdbx_dirtylist_check(parent));
- txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
+ txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
txn->mt_dbiseqs = parent->mt_dbiseqs;
txn->tw.dirtylist = mdbx_malloc(sizeof(MDBX_DP) * (MDBX_DPL_TXNFULL + 1));
txn->tw.reclaimed_pglist =
@@ -9596,7 +9671,8 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db));
/* Copy parent's mt_dbistate, but clear DB_NEW */
for (unsigned i = 0; i < txn->mt_numdbs; i++)
- txn->mt_dbistate[i] = parent->mt_dbistate[i] & ~(DBI_FRESH | DBI_CREAT);
+ txn->mt_dbistate[i] =
+ parent->mt_dbistate[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
mdbx_tassert(parent,
parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
@@ -9773,6 +9849,7 @@ int mdbx_txn_flags(const MDBX_txn *txn) {
/* Export or close DBI handles opened in this txn. */
static void mdbx_dbis_update(MDBX_txn *txn, int keep) {
+ mdbx_tassert(txn, !txn->mt_parent && txn == txn->mt_env->me_txn0);
MDBX_dbi n = txn->mt_numdbs;
if (n) {
bool locked = false;
@@ -9874,10 +9951,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
if (txn == env->me_txn0)
mdbx_txn_valgrind(env, nullptr);
#endif
- /* Export or close DBI handles created in this txn */
- mdbx_dbis_update(txn, mode & MDBX_END_UPDATE);
if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */
- mdbx_cursors_eot(txn, 0);
+ mdbx_cursors_eot(txn, false);
if (!(env->me_flags & MDBX_WRITEMAP))
mdbx_dlist_free(txn);
@@ -9886,17 +9961,20 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
env->me_txn = txn->mt_parent;
if (txn == env->me_txn0) {
mdbx_assert(env, txn->mt_parent == NULL);
+ /* Export or close DBI handles created in this txn */
+ mdbx_dbis_update(txn, mode & MDBX_END_UPDATE);
mdbx_pnl_shrink(&txn->tw.retired_pages);
mdbx_pnl_shrink(&txn->tw.reclaimed_pglist);
/* The writer mutex was locked in mdbx_txn_begin. */
mdbx_txn_unlock(env);
} else {
mdbx_assert(env, txn->mt_parent != NULL);
+ MDBX_txn *const parent = txn->mt_parent;
+ mdbx_assert(env, parent->mt_signature == MDBX_MT_SIGNATURE);
+ mdbx_assert(env, parent->mt_child == txn &&
+ (parent->mt_flags & MDBX_TXN_HAS_CHILD) != 0);
mdbx_assert(env, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno));
- MDBX_txn *const parent = txn->mt_parent;
- env->me_txn->mt_child = NULL;
- env->me_txn->mt_flags &= ~MDBX_TXN_HAS_CHILD;
mdbx_pnl_free(txn->tw.reclaimed_pglist);
mdbx_pnl_free(txn->tw.spill_pages);
@@ -9917,6 +9995,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
}
mdbx_free(txn->tw.dirtylist);
+ parent->mt_child = NULL;
+ parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
if (parent->mt_geo.upper != txn->mt_geo.upper ||
parent->mt_geo.now != txn->mt_geo.now) {
@@ -10088,7 +10168,7 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored,
db->md_branch_pages + db->md_leaf_pages + db->md_overflow_pages;
}
}
- rc = mdbx_cursor_sibling(&cx.outer, 1);
+ rc = mdbx_cursor_sibling(&cx.outer, SIBLING_RIGHT);
}
mdbx_tassert(txn, rc == MDBX_NOTFOUND);
}
@@ -10200,8 +10280,8 @@ static int mdbx_update_gc(MDBX_txn *txn) {
goto bailout_notracking;
couple.outer.mc_flags |= C_RECLAIMING;
- couple.outer.mc_next = txn->mt_cursors[FREE_DBI];
- txn->mt_cursors[FREE_DBI] = &couple.outer;
+ couple.outer.mc_next = txn->tw.cursors[FREE_DBI];
+ txn->tw.cursors[FREE_DBI] = &couple.outer;
retry:
++loop;
@@ -10233,8 +10313,10 @@ retry_noaccount:
mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno));
- if (txn->tw.lifo_reclaimed) {
- if (cleaned_gc_slot < MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)) {
+ if (lifo) {
+ if (cleaned_gc_slot < (txn->tw.lifo_reclaimed
+ ? MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)
+ : 0)) {
settled = 0;
cleaned_gc_slot = 0;
reused_gc_slot = 0;
@@ -10265,7 +10347,7 @@ retry_noaccount:
}
} else {
/* If using records from GC which we have not yet deleted,
- * now delete them and any we reserved for me_reclaimed_pglist. */
+ * now delete them and any we reserved for tw.reclaimed_pglist. */
while (cleaned_gc_id <= txn->tw.last_reclaimed) {
gc_rid = cleaned_gc_id;
settled = 0;
@@ -10325,13 +10407,13 @@ retry_noaccount:
/* handle loose pages - put ones into the reclaimed- or retired-list */
if (txn->tw.loose_pages) {
- /* Return loose page numbers to me_reclaimed_pglist,
+ /* Return loose page numbers to tw.reclaimed_pglist,
* though usually none are left at this point.
* The pages themselves remain in dirtylist. */
if (unlikely(!txn->tw.lifo_reclaimed && txn->tw.last_reclaimed < 1)) {
if (txn->tw.loose_count > 0) {
/* Put loose page numbers in tw.retired_pages,
- * since unable to return them to me_reclaimed_pglist. */
+ * since unable to return them to tw.reclaimed_pglist. */
if (unlikely((rc = mdbx_pnl_need(&txn->tw.retired_pages,
txn->tw.loose_count)) != 0))
goto bailout;
@@ -10479,9 +10561,12 @@ retry_noaccount:
env->me_maxgc_ov1page) {
/* LY: need just a txn-id for save page list. */
- couple.outer.mc_flags &= ~C_RECLAIMING;
bool need_cleanup = false;
+ txnid_t snap_oldest;
+ retry_rid:
+ couple.outer.mc_flags &= ~C_RECLAIMING;
do {
+ snap_oldest = mdbx_find_oldest(txn);
rc = mdbx_page_alloc(&couple.outer, 0, NULL, MDBX_ALLOC_GC);
if (likely(rc == MDBX_SUCCESS)) {
mdbx_trace("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
@@ -10509,7 +10594,13 @@ retry_noaccount:
gc_rid = MDBX_PNL_LAST(txn->tw.lifo_reclaimed);
} else {
mdbx_tassert(txn, txn->tw.last_reclaimed == 0);
- txn->tw.last_reclaimed = gc_rid = mdbx_find_oldest(txn) - 1;
+ if (unlikely(mdbx_find_oldest(txn) != snap_oldest))
+ /* should retry mdbx_page_alloc(MDBX_ALLOC_GC)
+ * if the oldest reader changes since the last attempt */
+ goto retry_rid;
+ /* no reclaimable GC entries,
+ * therefore no entries with ID < mdbx_find_oldest(txn) */
+ txn->tw.last_reclaimed = gc_rid = snap_oldest - 1;
mdbx_trace("%s: none recycled yet, set rid to @%" PRIaTXN,
dbg_prefix_mode, gc_rid);
}
@@ -10842,7 +10933,7 @@ retry_noaccount:
cleaned_gc_slot == MDBX_PNL_SIZE(txn->tw.lifo_reclaimed));
bailout:
- txn->mt_cursors[FREE_DBI] = couple.outer.mc_next;
+ txn->tw.cursors[FREE_DBI] = couple.outer.mc_next;
bailout_notracking:
MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) = 0;
@@ -10855,24 +10946,23 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
size_t iov_bytes) {
MDBX_env *const env = txn->mt_env;
mdbx_assert(env, iov_items > 0);
+ int rc;
if (likely(iov_items == 1)) {
mdbx_assert(env, iov->iov_len == iov_bytes);
- int rc = mdbx_pwrite(env->me_lazy_fd, iov->iov_base, iov_bytes, iov_off);
- mdbx_dpage_free(env, (MDBX_page *)iov->iov_base,
- bytes2pgno(env, iov_bytes));
- return rc;
+ rc = mdbx_pwrite(env->me_lazy_fd, iov->iov_base, iov_bytes, iov_off);
} else {
- int rc = mdbx_pwritev(env->me_lazy_fd, iov, iov_items, iov_off, iov_bytes);
- if (unlikely(rc != MDBX_SUCCESS)) {
- mdbx_error("Write error: %s", mdbx_strerror(rc));
- txn->mt_flags |= MDBX_TXN_ERROR;
- }
+ rc = mdbx_pwritev(env->me_lazy_fd, iov, iov_items, iov_off, iov_bytes);
+ }
- for (unsigned i = 0; i < iov_items; i++)
- mdbx_dpage_free(env, (MDBX_page *)iov[i].iov_base,
- bytes2pgno(env, iov[i].iov_len));
- return rc;
+ if (unlikely(rc != MDBX_SUCCESS)) {
+ mdbx_error("Write error: %s", mdbx_strerror(rc));
+ txn->mt_flags |= MDBX_TXN_ERROR;
}
+
+ for (unsigned i = 0; i < iov_items; i++)
+ mdbx_dpage_free(env, (MDBX_page *)iov[i].iov_base,
+ bytes2pgno(env, iov[i].iov_len));
+ return rc;
}
/* Flush (some) dirty pages to the map, after clearing their dirty flag.
@@ -11011,12 +11101,18 @@ static __always_inline bool mdbx_txn_dbi_exists(MDBX_txn *txn, MDBX_dbi dbi,
return mdbx_txn_import_dbi(txn, dbi);
}
-int mdbx_txn_commit(MDBX_txn *txn) {
+int mdbx_txn_commit(MDBX_txn *txn) { return __inline_mdbx_txn_commit(txn); }
+
+int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
STATIC_ASSERT(MDBX_TXN_FINISHED ==
MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR);
+ const uint64_t ts_0 = latency ? mdbx_osal_monotime() : 0;
+ uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0;
+ uint32_t audit_duration = 0;
+
int rc = check_txn(txn, MDBX_TXN_FINISHED);
if (unlikely(rc != MDBX_SUCCESS))
- return rc;
+ goto provide_latency;
if (unlikely(txn->mt_flags & MDBX_TXN_ERROR)) {
rc = MDBX_RESULT_TRUE;
@@ -11027,7 +11123,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
#if MDBX_ENV_CHECKPID
if (unlikely(env->me_pid != mdbx_getpid())) {
env->me_flags |= MDBX_FATAL_ERROR;
- return MDBX_PANIC;
+ rc = MDBX_PANIC;
+ goto provide_latency;
}
#endif /* MDBX_ENV_CHECKPID */
@@ -11038,7 +11135,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
goto done;
if (txn->mt_child) {
- rc = mdbx_txn_commit(txn->mt_child);
+ rc = mdbx_txn_commit_ex(txn->mt_child, NULL);
mdbx_tassert(txn, txn->mt_child == NULL);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
@@ -11051,8 +11148,32 @@ int mdbx_txn_commit(MDBX_txn *txn) {
}
if (txn->mt_parent) {
+ mdbx_assert(env, txn != env->me_txn0);
MDBX_txn *const parent = txn->mt_parent;
- mdbx_tassert(txn, mdbx_dirtylist_check(txn));
+ mdbx_assert(env, parent->mt_signature == MDBX_MT_SIGNATURE);
+ mdbx_assert(env, parent->mt_child == txn &&
+ (parent->mt_flags & MDBX_TXN_HAS_CHILD) != 0);
+ mdbx_assert(env, mdbx_dirtylist_check(txn));
+
+ if (txn->tw.dirtylist->length == 0 &&
+ (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0 &&
+ parent->mt_numdbs == txn->mt_numdbs) {
+ for (int i = txn->mt_numdbs; --i >= 0;) {
+ mdbx_tassert(txn, (txn->mt_dbistate[i] & DBI_DIRTY) == 0);
+ if ((txn->mt_dbistate[i] & DBI_STALE) &&
+ !(parent->mt_dbistate[i] & DBI_STALE))
+ mdbx_tassert(txn, memcmp(&parent->mt_dbs[i], &txn->mt_dbs[i],
+ sizeof(MDBX_db)) == 0);
+ }
+
+ mdbx_tassert(txn, memcmp(&parent->mt_geo, &txn->mt_geo,
+ sizeof(parent->mt_geo)) == 0);
+ mdbx_tassert(txn, memcmp(&parent->mt_canary, &txn->mt_canary,
+ sizeof(parent->mt_canary)) == 0);
+
+ end_mode = MDBX_END_ABORT | MDBX_END_SLOT | MDBX_END_FREE;
+ goto done;
+ }
/* Preserve space for spill list to avoid parent's state corruption
* if allocation fails. */
@@ -11081,7 +11202,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_flags |= txn->mt_flags & MDBX_TXN_DIRTY;
/* Merge our cursors into parent's and close them */
- mdbx_cursors_eot(txn, 1);
+ mdbx_cursors_eot(txn, true);
+ end_mode |= MDBX_END_EOTDONE;
/* Update parent's DB table. */
memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db));
@@ -11093,6 +11215,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_dbistate[i] = txn->mt_dbistate[i] | (parent->mt_dbistate[i] &
(DBI_CREAT | DBI_FRESH));
}
+ ts_1 = latency ? mdbx_osal_monotime() : 0;
/* Remove refunded pages from parent's dirty & spill lists */
MDBX_DPL dst = mdbx_dpl_sort(parent->tw.dirtylist);
@@ -11264,6 +11387,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_flags |= MDBX_TXN_SPILLS;
}
+ ts_2 = latency ? mdbx_osal_monotime() : 0;
/* Append our loose page list to parent's */
if (txn->tw.loose_pages) {
MDBX_page **lp = &parent->tw.loose_pages;
@@ -11285,8 +11409,6 @@ int mdbx_txn_commit(MDBX_txn *txn) {
env->me_txn = parent;
parent->mt_child = NULL;
- txn->mt_signature = 0;
- mdbx_free(txn);
mdbx_tassert(parent, mdbx_dirtylist_check(parent));
/* Scan parent's loose page for suitable for refund */
@@ -11296,13 +11418,18 @@ int mdbx_txn_commit(MDBX_txn *txn) {
break;
}
}
+
+ ts_4 = ts_3 = latency ? mdbx_osal_monotime() : 0;
+ txn->mt_signature = 0;
+ mdbx_free(txn);
mdbx_tassert(parent, mdbx_dirtylist_check(parent));
- return MDBX_SUCCESS;
+ rc = MDBX_SUCCESS;
+ goto provide_latency;
}
mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL);
- mdbx_cursors_eot(txn, 0);
+ mdbx_cursors_eot(txn, false);
end_mode |= MDBX_END_EOTDONE;
if (txn->tw.dirtylist->length == 0 &&
@@ -11346,17 +11473,23 @@ int mdbx_txn_commit(MDBX_txn *txn) {
}
}
+ ts_1 = latency ? mdbx_osal_monotime() : 0;
rc = mdbx_update_gc(txn);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
+ ts_2 = latency ? mdbx_osal_monotime() : 0;
if (mdbx_audit_enabled()) {
rc = mdbx_audit_ex(txn, MDBX_PNL_SIZE(txn->tw.retired_pages), true);
+ const uint64_t audit_end = mdbx_osal_monotime();
+ audit_duration = mdbx_osal_monotime_to_16dot16(audit_end - ts_2);
+ ts_2 = audit_end;
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
rc = mdbx_page_flush(txn, 0);
+ ts_3 = latency ? mdbx_osal_monotime() : 0;
if (likely(rc == MDBX_SUCCESS)) {
if (txn->mt_dbs[MAIN_DBI].md_flags & DBI_DIRTY)
txn->mt_dbs[MAIN_DBI].md_mod_txnid = pp_txnid2chk(txn);
@@ -11378,6 +11511,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
rc = mdbx_sync_locked(
env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, &meta);
}
+ ts_4 = latency ? mdbx_osal_monotime() : 0;
if (unlikely(rc != MDBX_SUCCESS)) {
env->me_flags |= MDBX_FATAL_ERROR;
goto fail;
@@ -11386,19 +11520,34 @@ int mdbx_txn_commit(MDBX_txn *txn) {
end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE;
done:
- return mdbx_txn_end(txn, end_mode);
+ rc = mdbx_txn_end(txn, end_mode);
+
+provide_latency:
+ if (latency) {
+ latency->audit = audit_duration;
+ latency->preparation =
+ ts_1 ? mdbx_osal_monotime_to_16dot16(ts_1 - ts_0) : 0;
+ latency->gc =
+ (ts_1 && ts_2) ? mdbx_osal_monotime_to_16dot16(ts_2 - ts_1) : 0;
+ latency->write =
+ (ts_2 && ts_3) ? mdbx_osal_monotime_to_16dot16(ts_3 - ts_2) : 0;
+ latency->sync =
+ (ts_3 && ts_4) ? mdbx_osal_monotime_to_16dot16(ts_4 - ts_3) : 0;
+ const uint64_t ts_5 = mdbx_osal_monotime();
+ latency->ending = ts_4 ? mdbx_osal_monotime_to_16dot16(ts_5 - ts_4) : 0;
+ latency->whole = mdbx_osal_monotime_to_16dot16(ts_5 - ts_0);
+ }
+ return rc;
fail:
mdbx_txn_abort(txn);
- return rc;
+ goto provide_latency;
}
-static int __cold mdbx_validate_meta(MDBX_env *env, MDBX_meta *const meta,
- uint64_t *filesize,
- const MDBX_page *const page,
- const unsigned meta_number,
- MDBX_meta *dest,
- const unsigned guess_pagesize) {
+static __cold int
+mdbx_validate_meta(MDBX_env *env, MDBX_meta *const meta, uint64_t *filesize,
+ const MDBX_page *const page, const unsigned meta_number,
+ MDBX_meta *dest, const unsigned guess_pagesize) {
if (meta->mm_magic_and_version != MDBX_DATA_MAGIC &&
meta->mm_magic_and_version != MDBX_DATA_MAGIC_DEVEL) {
mdbx_error("meta[%u] has invalid magic/version %" PRIx64, meta_number,
@@ -11590,7 +11739,7 @@ static int __cold mdbx_validate_meta(MDBX_env *env, MDBX_meta *const meta,
/* Read the environment parameters of a DB environment
* before mapping it into memory. */
-static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
+static __cold int mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
uint64_t *filesize,
const int lck_exclusive) {
int rc = mdbx_filesize(env->me_lazy_fd, filesize);
@@ -12080,7 +12229,7 @@ static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
mdbx_assert(env, bytes2pgno(env, pagesize + pagesize) == 2);
}
-int __cold mdbx_env_create(MDBX_env **penv) {
+__cold int mdbx_env_create(MDBX_env **penv) {
MDBX_env *env = mdbx_calloc(1, sizeof(MDBX_env));
if (unlikely(!env))
return MDBX_ENOMEM;
@@ -12422,9 +12571,11 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
rc = MDBX_EPERM;
goto bailout;
}
- rc = mdbx_rdt_lock(env);
- if (unlikely(rc != MDBX_SUCCESS))
+ int err = mdbx_rdt_lock(env);
+ if (unlikely(MDBX_IS_ERROR(err))) {
+ rc = err;
goto bailout;
+ }
/* Check if there are any reading threads that do not use the SRWL */
const size_t CurrentTid = GetCurrentThreadId();
@@ -12481,11 +12632,11 @@ bailout:
return rc;
}
-int __cold mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
- return mdbx_env_set_geometry(env, size, size, size, -1, -1, -1);
+__cold int mdbx_env_set_mapsize(MDBX_env *env, size_t size) {
+ return __inline_mdbx_env_set_mapsize(env, size);
}
-int __cold mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) {
+__cold int mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -12500,7 +12651,7 @@ int __cold mdbx_env_set_maxdbs(MDBX_env *env, MDBX_dbi dbs) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_get_maxdbs(MDBX_env *env, MDBX_dbi *dbs) {
+__cold int mdbx_env_get_maxdbs(MDBX_env *env, MDBX_dbi *dbs) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -12512,7 +12663,7 @@ int __cold mdbx_env_get_maxdbs(MDBX_env *env, MDBX_dbi *dbs) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_set_maxreaders(MDBX_env *env, unsigned readers) {
+__cold int mdbx_env_set_maxreaders(MDBX_env *env, unsigned readers) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -12527,7 +12678,7 @@ int __cold mdbx_env_set_maxreaders(MDBX_env *env, unsigned readers) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_get_maxreaders(const MDBX_env *env, unsigned *readers) {
+__cold int mdbx_env_get_maxreaders(const MDBX_env *env, unsigned *readers) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -12540,7 +12691,7 @@ int __cold mdbx_env_get_maxreaders(const MDBX_env *env, unsigned *readers) {
}
/* Further setup required for opening an MDBX environment */
-static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
+static __cold int mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
uint64_t filesize_before;
MDBX_meta meta;
int rc = MDBX_RESULT_FALSE;
@@ -12711,6 +12862,10 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
mdbx_verbose("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)",
bootid.x, bootid.y, (bootid.x | bootid.y) ? "" : "not-");
+ /* calculate readahead hint before mmap with zero redundant pages */
+ const bool readahead =
+ (env->me_flags & MDBX_NORDAHEAD) == 0 &&
+ mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE;
err = mdbx_mmap(env->me_flags, &env->me_dxb_mmap, env->me_dbgeo.now,
env->me_dbgeo.upper, lck_rc ? MMAP_OPTION_TRUNCATE : 0);
if (unlikely(err != MDBX_SUCCESS))
@@ -12974,9 +13129,6 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
#endif /* MADV_DONTNEED */
}
- const bool readahead = (env->me_flags & MDBX_NORDAHEAD) == 0 &&
- mdbx_is_readahead_reasonable(env->me_dxb_mmap.current,
- 0) == MDBX_RESULT_TRUE;
err = mdbx_set_readahead(env, 0, used_bytes, readahead);
if (err != MDBX_SUCCESS && lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE)
return err;
@@ -12987,7 +13139,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
/******************************************************************************/
/* Open and/or initialize the lock region for the environment. */
-static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
+static __cold int mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
mdbx_mode_t mode) {
mdbx_assert(env, env->me_lazy_fd != INVALID_HANDLE_VALUE);
mdbx_assert(env, env->me_lfd == INVALID_HANDLE_VALUE);
@@ -13001,7 +13153,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
/* ensure the file system is read-only */
err = mdbx_check_fs_rdonly(env->me_lazy_fd, lck_pathname, err);
- if (err != MDBX_SUCCESS)
+ if (err != MDBX_SUCCESS &&
+ /* ignore ERROR_NOT_SUPPORTED for exclusive mode */
+ !(err == MDBX_ENOSYS && (env->me_flags & MDBX_EXCLUSIVE)))
return err;
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
@@ -13394,22 +13548,21 @@ __cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname,
0);
}
-__cold int mdbx_env_open(MDBX_env *env, const char *pathname,
- MDBX_env_flags_t flags, mdbx_mode_t mode) {
- int rc = check_env(env);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
-
+typedef struct {
+ void *buffer_for_free;
+ char *lck, *dxb;
+ size_t ent_len;
+} MDBX_handle_env_pathname;
+
+__cold static int mdbx_handle_env_pathname(MDBX_handle_env_pathname *ctx,
+ const char *pathname,
+ MDBX_env_flags_t *flags,
+ const mdbx_mode_t mode) {
+ int rc;
+ memset(ctx, 0, sizeof(*ctx));
if (unlikely(!pathname))
return MDBX_EINVAL;
- if (flags & ~ENV_USABLE_FLAGS)
- return MDBX_EINVAL;
-
- if (env->me_lazy_fd != INVALID_HANDLE_VALUE ||
- (env->me_flags & MDBX_ENV_ACTIVE) != 0)
- return MDBX_EPERM;
-
#if defined(_WIN32) || defined(_WIN64)
const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX)
@@ -13417,33 +13570,28 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t));
if (wlen != mbstowcs(pathnameW, pathname, wlen + 1))
return ERROR_INVALID_NAME;
-#endif /* Windows */
-
- /* pickup previously mdbx_env_set_flags(),
- * but avoid MDBX_UTTERLY_NOSYNC by disjunction */
- flags = merge_sync_flags(flags, env->me_flags);
-#if defined(_WIN32) || defined(_WIN64)
const DWORD dwAttrib = GetFileAttributesW(pathnameW);
if (dwAttrib == INVALID_FILE_ATTRIBUTES) {
rc = GetLastError();
if (rc != MDBX_ENOFILE)
return rc;
- if (mode == 0 || (flags & MDBX_RDONLY) != 0)
+ if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
/* can't open existing */
return rc;
/* auto-create directory if requested */
- if ((flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathnameW, nullptr)) {
+ if ((*flags & MDBX_NOSUBDIR) == 0 &&
+ !CreateDirectoryW(pathnameW, nullptr)) {
rc = GetLastError();
if (rc != ERROR_ALREADY_EXISTS)
return rc;
}
} else {
/* ignore passed MDBX_NOSUBDIR flag and set it automatically */
- flags |= MDBX_NOSUBDIR;
+ *flags |= MDBX_NOSUBDIR;
if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY)
- flags -= MDBX_NOSUBDIR;
+ *flags -= MDBX_NOSUBDIR;
}
#else
struct stat st;
@@ -13451,7 +13599,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
rc = errno;
if (rc != MDBX_ENOFILE)
return rc;
- if (mode == 0 || (flags & MDBX_RDONLY) != 0)
+ if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
/* can't open existing */
return rc;
@@ -13462,41 +13610,151 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
/* always add read/write/search for owner */ S_IRWXU |
((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) |
((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0);
- if ((flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) {
+ if ((*flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) {
rc = errno;
if (rc != EEXIST)
return rc;
}
} else {
/* ignore passed MDBX_NOSUBDIR flag and set it automatically */
- flags |= MDBX_NOSUBDIR;
+ *flags |= MDBX_NOSUBDIR;
if (S_ISDIR(st.st_mode))
- flags -= MDBX_NOSUBDIR;
+ *flags -= MDBX_NOSUBDIR;
}
#endif
- size_t len_full, len = strlen(pathname);
- if (flags & MDBX_NOSUBDIR) {
- len_full = len + sizeof(MDBX_LOCK_SUFFIX) + len + 1;
- } else {
- len_full = len + sizeof(MDBX_LOCKNAME) + len + sizeof(MDBX_DATANAME);
+ static const char dxb_name[] = MDBX_DATANAME;
+ static const size_t dxb_name_len = sizeof(dxb_name) - 1;
+ static const char lck_name[] = MDBX_LOCKNAME;
+ static const char lock_suffix[] = MDBX_LOCK_SUFFIX;
+
+ ctx->ent_len = strlen(pathname);
+ if ((*flags & MDBX_NOSUBDIR) && ctx->ent_len >= dxb_name_len &&
+ !memcmp(dxb_name, pathname + ctx->ent_len - dxb_name_len, dxb_name_len)) {
+ *flags -= MDBX_NOSUBDIR;
+ ctx->ent_len -= dxb_name_len;
}
- char *lck_pathname = mdbx_malloc(len_full);
- if (!lck_pathname)
+
+ const size_t bytes_needed =
+ ctx->ent_len * 2 + ((*flags & MDBX_NOSUBDIR)
+ ? sizeof(lock_suffix) + 1
+ : sizeof(lck_name) + sizeof(dxb_name));
+ ctx->buffer_for_free = mdbx_malloc(bytes_needed);
+ if (!ctx->buffer_for_free)
return MDBX_ENOMEM;
- char *dxb_pathname;
- if (flags & MDBX_NOSUBDIR) {
- dxb_pathname = lck_pathname + len + sizeof(MDBX_LOCK_SUFFIX);
- sprintf(lck_pathname, "%s" MDBX_LOCK_SUFFIX, pathname);
- strcpy(dxb_pathname, pathname);
+ ctx->lck = ctx->buffer_for_free;
+ if (*flags & MDBX_NOSUBDIR) {
+ ctx->dxb = ctx->lck + ctx->ent_len + sizeof(lock_suffix);
+ sprintf(ctx->lck, "%s%s", pathname, lock_suffix);
+ strcpy(ctx->dxb, pathname);
} else {
- dxb_pathname = lck_pathname + len + sizeof(MDBX_LOCKNAME);
- sprintf(lck_pathname, "%s" MDBX_LOCKNAME, pathname);
- sprintf(dxb_pathname, "%s" MDBX_DATANAME, pathname);
+ ctx->dxb = ctx->lck + ctx->ent_len + sizeof(lck_name);
+ sprintf(ctx->lck, "%.*s%s", (int)ctx->ent_len, pathname, lck_name);
+ sprintf(ctx->dxb, "%.*s%s", (int)ctx->ent_len, pathname, dxb_name);
}
- rc = MDBX_SUCCESS;
+ return MDBX_SUCCESS;
+}
+
+__cold int mdbx_env_delete(const char *pathname, MDBX_env_delete_mode_t mode) {
+ switch (mode) {
+ default:
+ return MDBX_EINVAL;
+ case MDBX_ENV_JUST_DELETE:
+ case MDBX_ENV_ENSURE_UNUSED:
+ case MDBX_ENV_WAIT_FOR_UNUSED:
+ break;
+ }
+
+ MDBX_env dummy_env;
+ memset(&dummy_env, 0, sizeof(dummy_env));
+ dummy_env.me_flags =
+ (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS;
+ dummy_env.me_psize = dummy_env.me_os_psize = (unsigned)mdbx_syspagesize();
+ dummy_env.me_pathname = (char *)pathname;
+
+ MDBX_handle_env_pathname env_pathname;
+ STATIC_ASSERT(sizeof(dummy_env.me_flags) == sizeof(MDBX_env_flags_t));
+ int rc = MDBX_RESULT_TRUE,
+ err = mdbx_handle_env_pathname(
+ &env_pathname, pathname, (MDBX_env_flags_t *)&dummy_env.me_flags, 0);
+ if (likely(err == MDBX_SUCCESS)) {
+ mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE,
+ dxb_handle = INVALID_HANDLE_VALUE;
+ if (mode > MDBX_ENV_JUST_DELETE) {
+ err = mdbx_openfile(MDBX_OPEN_DELETE, &dummy_env, env_pathname.dxb,
+ &dxb_handle, 0);
+ err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err;
+ if (err == MDBX_SUCCESS) {
+ err = mdbx_openfile(MDBX_OPEN_DELETE, &dummy_env, env_pathname.lck,
+ &clk_handle, 0);
+ err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err;
+ }
+ if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE)
+ err = mdbx_lockfile(clk_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED);
+ if (err == MDBX_SUCCESS && dxb_handle != INVALID_HANDLE_VALUE)
+ err = mdbx_lockfile(dxb_handle, mode == MDBX_ENV_WAIT_FOR_UNUSED);
+ }
+
+ if (err == MDBX_SUCCESS) {
+ err = mdbx_removefile(env_pathname.dxb);
+ if (err == MDBX_SUCCESS)
+ rc = MDBX_SUCCESS;
+ else if (err == MDBX_ENOFILE)
+ err = MDBX_SUCCESS;
+ }
+
+ if (err == MDBX_SUCCESS) {
+ err = mdbx_removefile(env_pathname.lck);
+ if (err == MDBX_SUCCESS)
+ rc = MDBX_SUCCESS;
+ else if (err == MDBX_ENOFILE)
+ err = MDBX_SUCCESS;
+ }
+
+ if (err == MDBX_SUCCESS && !(dummy_env.me_flags & MDBX_NOSUBDIR)) {
+ err = mdbx_removedirectory(pathname);
+ if (err == MDBX_SUCCESS)
+ rc = MDBX_SUCCESS;
+ else if (err == MDBX_ENOFILE)
+ err = MDBX_SUCCESS;
+ }
+
+ if (dxb_handle != INVALID_HANDLE_VALUE)
+ mdbx_closefile(dxb_handle);
+ if (clk_handle != INVALID_HANDLE_VALUE)
+ mdbx_closefile(clk_handle);
+ } else if (err == MDBX_ENOFILE)
+ err = MDBX_SUCCESS;
+
+ mdbx_free(env_pathname.buffer_for_free);
+ return (err == MDBX_SUCCESS) ? rc : err;
+}
+
+__cold int mdbx_env_open(MDBX_env *env, const char *pathname,
+ MDBX_env_flags_t flags, mdbx_mode_t mode) {
+ int rc = check_env(env);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+
+ if (flags & ~ENV_USABLE_FLAGS)
+ return MDBX_EINVAL;
+
+ if (env->me_lazy_fd != INVALID_HANDLE_VALUE ||
+ (env->me_flags & MDBX_ENV_ACTIVE) != 0)
+ return MDBX_EPERM;
+
+ /* pickup previously mdbx_env_set_flags(),
+ * but avoid MDBX_UTTERLY_NOSYNC by disjunction */
+ const uint32_t saved_me_flags = env->me_flags;
+ flags = merge_sync_flags(flags, env->me_flags);
+
+ MDBX_handle_env_pathname env_pathname;
+ rc = mdbx_handle_env_pathname(&env_pathname, pathname, &flags, mode);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
+
if (flags & MDBX_RDONLY) {
/* LY: silently ignore irrelevant flags when
* we're only getting read access */
@@ -13524,38 +13782,39 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
rc = MDBX_ENOMEM;
}
- const uint32_t saved_me_flags = env->me_flags;
env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE;
- if (rc)
+ if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
- env->me_path = mdbx_strdup(pathname);
+ env->me_pathname = mdbx_calloc(env_pathname.ent_len + 1, 1);
env->me_dbxs = mdbx_calloc(env->me_maxdbs, sizeof(MDBX_dbx));
env->me_dbflags = mdbx_calloc(env->me_maxdbs, sizeof(env->me_dbflags[0]));
env->me_dbiseqs = mdbx_calloc(env->me_maxdbs, sizeof(env->me_dbiseqs[0]));
- if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) {
+ if (!(env->me_dbxs && env->me_pathname && env->me_dbflags &&
+ env->me_dbiseqs)) {
rc = MDBX_ENOMEM;
goto bailout;
}
+ memcpy(env->me_pathname, env_pathname.dxb, env_pathname.ent_len);
env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */
env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast;
rc = mdbx_openfile(F_ISSET(flags, MDBX_RDONLY) ? MDBX_OPEN_DXB_READ
: MDBX_OPEN_DXB_LAZY,
- env, dxb_pathname, &env->me_lazy_fd, mode);
+ env, env_pathname.dxb, &env->me_lazy_fd, mode);
if (rc != MDBX_SUCCESS)
goto bailout;
mdbx_assert(env, env->me_dsync_fd == INVALID_HANDLE_VALUE);
if ((flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC)) == 0) {
- rc = mdbx_openfile(MDBX_OPEN_DXB_DSYNC, env, dxb_pathname,
+ rc = mdbx_openfile(MDBX_OPEN_DXB_DSYNC, env, env_pathname.dxb,
&env->me_dsync_fd, 0);
mdbx_ensure(env, (rc != MDBX_SUCCESS) ==
(env->me_dsync_fd == INVALID_HANDLE_VALUE));
}
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
- env->me_sysv_ipc.key = ftok(dxb_pathname, 42);
+ env->me_sysv_ipc.key = ftok(env_pathname.dxb, 42);
if (env->me_sysv_ipc.key == -1) {
rc = errno;
goto bailout;
@@ -13565,6 +13824,7 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
#if !(defined(_WIN32) || defined(_WIN64))
if (mode == 0) {
/* pickup mode for lck-file */
+ struct stat st;
if (fstat(env->me_lazy_fd, &st)) {
rc = errno;
goto bailout;
@@ -13577,12 +13837,19 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) |
((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0);
#endif /* !Windows */
- const int lck_rc = mdbx_setup_lck(env, lck_pathname, mode);
+ const int lck_rc = mdbx_setup_lck(env, env_pathname.lck, mode);
if (MDBX_IS_ERROR(lck_rc)) {
rc = lck_rc;
goto bailout;
}
+ /* Set the position in files outside of the data to avoid corruption
+ * due to erroneous use of file descriptors in the application code. */
+ mdbx_fseek(env->me_lfd, UINT64_C(1) << 63);
+ mdbx_fseek(env->me_lazy_fd, UINT64_C(1) << 63);
+ if (env->me_dsync_fd != INVALID_HANDLE_VALUE)
+ mdbx_fseek(env->me_dsync_fd, UINT64_C(1) << 63);
+
const MDBX_env_flags_t rigorous_flags =
MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC;
const MDBX_env_flags_t mode_flags = rigorous_flags | MDBX_NOMETASYNC |
@@ -13667,8 +13934,8 @@ __cold int mdbx_env_open(MDBX_env *env, const char *pathname,
MDBX_txn *txn = mdbx_calloc(1, size);
if (txn) {
txn->mt_dbs = (MDBX_db *)((char *)txn + tsize);
- txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
- txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs);
+ txn->tw.cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
+ txn->mt_dbiseqs = (unsigned *)(txn->tw.cursors + env->me_maxdbs);
txn->mt_dbistate = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs);
txn->mt_env = env;
txn->mt_dbxs = env->me_dbxs;
@@ -13712,12 +13979,12 @@ bailout:
mdbx_txn_valgrind(env, nullptr);
#endif
}
- mdbx_free(lck_pathname);
+ mdbx_free(env_pathname.buffer_for_free);
return rc;
}
/* Destroy resources from mdbx_env_open(), clear our readers & DBIs */
-static int __cold mdbx_env_close0(MDBX_env *env) {
+static __cold int mdbx_env_close0(MDBX_env *env) {
env->me_stuck_meta = -1;
if (!(env->me_flags & MDBX_ENV_ACTIVE)) {
mdbx_ensure(env, env->me_lcklist_next == nullptr);
@@ -13773,7 +14040,7 @@ static int __cold mdbx_env_close0(MDBX_env *env) {
mdbx_memalign_free(env->me_pbuf);
mdbx_free(env->me_dbiseqs);
mdbx_free(env->me_dbflags);
- mdbx_free(env->me_path);
+ mdbx_free(env->me_pathname);
mdbx_free(env->me_dirtylist);
if (env->me_txn0) {
mdbx_txl_free(env->me_txn0->tw.lifo_reclaimed);
@@ -13786,7 +14053,7 @@ static int __cold mdbx_env_close0(MDBX_env *env) {
return rc;
}
-int __cold mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
+__cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
MDBX_page *dp;
int rc = MDBX_SUCCESS;
@@ -13866,7 +14133,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, bool dont_sync) {
}
__cold int mdbx_env_close(MDBX_env *env) {
- return mdbx_env_close_ex(env, false);
+ return __inline_mdbx_env_close(env);
}
/* Compare two items pointing at aligned unsigned int's. */
@@ -13903,7 +14170,7 @@ static int __hot cmp_int_align2(const MDBX_val *a, const MDBX_val *b) {
}
}
-/* Compare two items pointing at unsigneds of unknown alignment.
+/* Compare two items pointing at unsigned values with unknown alignment.
*
* This is also set as MDBX_INTEGERDUP|MDBX_DUPFIXED's MDBX_dbx.md_dcmp. */
static int __hot cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b) {
@@ -14059,7 +14326,7 @@ static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, const MDBX_val *key,
static void mdbx_cursor_adjust(MDBX_cursor *mc, func) {
MDBX_cursor *m2;
- for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
if (m2->mc_pg[m2->mc_top] == mc->mc_pg[mc->mc_top]) {
func(mc, m2);
}
@@ -14068,13 +14335,11 @@ static void mdbx_cursor_adjust(MDBX_cursor *mc, func) {
#endif
/* Pop a page off the top of the cursor's stack. */
-static void mdbx_cursor_pop(MDBX_cursor *mc) {
+static __inline void mdbx_cursor_pop(MDBX_cursor *mc) {
if (mc->mc_snum) {
mdbx_debug("popped page %" PRIaPGNO " off db %d cursor %p",
mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *)mc);
-
- mc->mc_snum--;
- if (mc->mc_snum) {
+ if (--mc->mc_snum) {
mc->mc_top--;
} else {
mc->mc_flags &= ~C_INITIALIZED;
@@ -14084,7 +14349,7 @@ static void mdbx_cursor_pop(MDBX_cursor *mc) {
/* Push a page onto the top of the cursor's stack.
* Set MDBX_TXN_ERROR on failure. */
-static int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) {
+static __inline int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) {
mdbx_debug("pushing page %" PRIaPGNO " on db %d cursor %p", mp->mp_pgno,
DDBI(mc), (void *)mc);
@@ -14113,7 +14378,7 @@ static int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) {
*
* Returns 0 on success, non-zero on failure. */
__hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
- int *lvl, const txnid_t pp_txnid) {
+ int *lvl, txnid_t pp_txnid) {
MDBX_txn *txn = mc->mc_txn;
if (unlikely(pgno >= txn->mt_next_pgno)) {
mdbx_error("page #%" PRIaPGNO " beyond next-pgno", pgno);
@@ -14138,8 +14403,11 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
* because the dirty list got full. Bring this page
* back in from the map (but don't unspill it here,
* leave that unless page_touch happens again). */
- if (txn->tw.spill_pages && mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1))
+ if (txn->tw.spill_pages &&
+ mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1)) {
+ pp_txnid = txn->mt_txnid;
goto spilled;
+ }
p = mdbx_dpl_find(txn->tw.dirtylist, pgno);
if (p)
goto dirty;
@@ -14495,9 +14763,6 @@ int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) {
int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
MDBX_val *data) {
- DKBUF;
- mdbx_debug("===> get db %u key [%s]", dbi, DKEY(key));
-
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -14516,21 +14781,7 @@ int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- MDBX_val save_data = *data;
- int exact = 0;
- rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_SET_RANGE, &exact);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
-
- if (exact && (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) != 0) {
- *data = save_data;
- exact = 0;
- rc = mdbx_cursor_set(&cx.outer, key, data, MDBX_GET_BOTH_RANGE, &exact);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- }
-
- return exact ? MDBX_SUCCESS : MDBX_RESULT_TRUE;
+ return mdbx_cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND);
}
int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
@@ -14584,15 +14835,15 @@ int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
* Replaces the page at the top of the cursor's stack with the specified
* sibling, if one exists.
*
- * [in] mc The cursor for this operation.
- * [in] move_right Non-zero if the right sibling is requested,
- * otherwise the left sibling.
+ * [in] mc The cursor for this operation.
+ * [in] dir SIBLING_LEFT or SIBLING_RIGHT.
*
* Returns 0 on success, non-zero on failure. */
-static int mdbx_cursor_sibling(MDBX_cursor *mc, int move_right) {
+static int mdbx_cursor_sibling(MDBX_cursor *mc, int dir) {
int rc;
- MDBX_node *indx;
+ MDBX_node *node;
MDBX_page *mp;
+ assert(dir == SIBLING_LEFT || dir == SIBLING_RIGHT);
if (unlikely(mc->mc_snum < 2))
return MDBX_NOTFOUND; /* root has no siblings */
@@ -14601,29 +14852,28 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int move_right) {
mdbx_debug("parent page is page %" PRIaPGNO ", index %u",
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]);
- if (move_right
+ if ((dir == SIBLING_RIGHT)
? (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mc->mc_pg[mc->mc_top]))
: (mc->mc_ki[mc->mc_top] == 0)) {
- mdbx_debug("no more keys left, moving to %s sibling",
- move_right ? "right" : "left");
- if (unlikely((rc = mdbx_cursor_sibling(mc, move_right)) != MDBX_SUCCESS)) {
+ mdbx_debug("no more keys aside, moving to next %s sibling",
+ dir ? "right" : "left");
+ if (unlikely((rc = mdbx_cursor_sibling(mc, dir)) != MDBX_SUCCESS)) {
/* undo cursor_pop before returning */
mc->mc_top++;
mc->mc_snum++;
return rc;
}
} else {
- if (move_right)
- mc->mc_ki[mc->mc_top]++;
- else
- mc->mc_ki[mc->mc_top]--;
- mdbx_debug("just moving to %s index key %u", move_right ? "right" : "left",
+ assert((dir - 1) == -1 || (dir - 1) == 1);
+ mc->mc_ki[mc->mc_top] += dir - 1;
+ mdbx_debug("just moving to %s index key %u",
+ (dir == SIBLING_RIGHT) ? "right" : "left",
mc->mc_ki[mc->mc_top]);
}
mdbx_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
- indx = page_node(mp = mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
- if (unlikely((rc = mdbx_page_get(mc, node_pgno(indx), &mp, NULL,
+ node = page_node(mp = mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
+ if (unlikely((rc = mdbx_page_get(mc, node_pgno(node), &mp, NULL,
pp_txnid4chk(mp, mc->mc_txn))) != 0)) {
/* mc will be inconsistent if caller does mc_snum++ as above */
mc->mc_flags &= ~(C_INITIALIZED | C_EOF);
@@ -14633,9 +14883,9 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int move_right) {
rc = mdbx_cursor_push(mc, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (!move_right)
- mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1;
+ mc->mc_ki[mc->mc_top] =
+ (indx_t)((dir == SIBLING_LEFT) ? page_numkeys(mp) - 1 : 0);
return MDBX_SUCCESS;
}
@@ -14685,17 +14935,21 @@ static int mdbx_cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
goto skip;
}
- if (mc->mc_ki[mc->mc_top] + 1u >= page_numkeys(mp)) {
+ int ki = mc->mc_ki[mc->mc_top];
+ mc->mc_ki[mc->mc_top] = (indx_t)++ki;
+ const int numkeys = page_numkeys(mp);
+ if (unlikely(ki >= numkeys)) {
mdbx_debug("%s", "=====> move to next sibling page");
- if (unlikely((rc = mdbx_cursor_sibling(mc, 1)) != MDBX_SUCCESS)) {
+ mc->mc_ki[mc->mc_top] = numkeys - 1;
+ if (unlikely((rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT)) !=
+ MDBX_SUCCESS)) {
mc->mc_flags |= C_EOF;
return rc;
}
mp = mc->mc_pg[mc->mc_top];
mdbx_debug("next page is %" PRIaPGNO ", key index %u", mp->mp_pgno,
mc->mc_ki[mc->mc_top]);
- } else
- mc->mc_ki[mc->mc_top]++;
+ }
skip:
mdbx_debug("==> cursor points to page %" PRIaPGNO
@@ -14718,18 +14972,14 @@ skip:
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- }
- if (data) {
+ rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+ } else if (likely(data)) {
if (unlikely((rc = mdbx_node_read(mc, node, data,
pp_txnid4chk(mp, mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
-
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- }
}
get_key_optional(node, key);
@@ -14781,22 +15031,24 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
mc->mc_flags &= ~(C_EOF | C_DEL);
- if (mc->mc_ki[mc->mc_top] == 0) {
+ int ki = mc->mc_ki[mc->mc_top];
+ mc->mc_ki[mc->mc_top] = (indx_t)--ki;
+ if (unlikely(ki < 0)) {
+ mc->mc_ki[mc->mc_top] = 0;
mdbx_debug("%s", "=====> move to prev sibling page");
- if ((rc = mdbx_cursor_sibling(mc, 0)) != MDBX_SUCCESS) {
+ if ((rc = mdbx_cursor_sibling(mc, SIBLING_LEFT)) != MDBX_SUCCESS)
return rc;
- }
mp = mc->mc_pg[mc->mc_top];
- mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mp) - 1;
mdbx_debug("prev page is %" PRIaPGNO ", key index %u", mp->mp_pgno,
mc->mc_ki[mc->mc_top]);
- } else
- mc->mc_ki[mc->mc_top]--;
-
+ }
mdbx_debug("==> cursor points to page %" PRIaPGNO
" with %u keys, key index %u",
mp->mp_pgno, page_numkeys(mp), mc->mc_ki[mc->mc_top]);
+ if (unlikely(!IS_LEAF(mp)))
+ return MDBX_CORRUPTED;
+
if (IS_LEAF2(mp)) {
if (likely(key)) {
key->iov_len = mc->mc_db->md_xsize;
@@ -14805,25 +15057,20 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
return MDBX_SUCCESS;
}
- mdbx_cassert(mc, IS_LEAF(mp));
node = page_node(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- }
- if (data) {
+ rc = mdbx_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+ } else if (likely(data)) {
if (unlikely((rc = mdbx_node_read(mc, node, data,
pp_txnid4chk(mp, mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
-
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- rc = mdbx_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- }
}
get_key_optional(node, key);
@@ -14878,8 +15125,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
mp = mc->mc_pg[mc->mc_top];
- if (!page_numkeys(mp)) {
+ if (unlikely(!page_numkeys(mp))) {
mc->mc_ki[mc->mc_top] = 0;
+ mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
if (IS_LEAF2(mp)) {
@@ -14895,6 +15143,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
* was the one we wanted. */
mc->mc_ki[mc->mc_top] = 0;
*exactp = 1;
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
goto set1;
}
if (rc > 0) {
@@ -14913,6 +15164,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
mdbx_cassert(mc, nkeys >= 1 && nkeys <= UINT16_MAX + 1);
mc->mc_ki[mc->mc_top] = (indx_t)(nkeys - 1);
*exactp = 1;
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
goto set1;
}
if (rc < 0) {
@@ -14929,6 +15183,9 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (rc == 0) {
/* current node was the one we wanted */
*exactp = 1;
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
goto set1;
}
}
@@ -14946,17 +15203,22 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
/* There are no other pages */
mdbx_cassert(mc, nkeys <= UINT16_MAX);
mc->mc_ki[mc->mc_top] = (uint16_t)nkeys;
+ mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
}
if (!mc->mc_top) {
/* There are no other pages */
mc->mc_ki[mc->mc_top] = 0;
- if (op == MDBX_SET_RANGE && exactp == &stub_exactp) {
+ if (op == MDBX_SET_RANGE) {
rc = 0;
goto set1;
- } else
+ } else {
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
return MDBX_NOTFOUND;
+ }
}
} else {
mc->mc_pg[0] = 0;
@@ -14971,21 +15233,28 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
set2:
node = mdbx_node_search(mc, &aligned_key, exactp);
- if (exactp != &stub_exactp && !*exactp) {
+ if (!*exactp && op != MDBX_SET_RANGE) {
/* MDBX_SET specified and not an exact match. */
+ if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])))
+ mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
if (node == NULL) {
mdbx_debug("%s", "===> inexact leaf not found, goto sibling");
- if (unlikely((rc = mdbx_cursor_sibling(mc, 1)) != MDBX_SUCCESS)) {
+ if (unlikely((rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT)) !=
+ MDBX_SUCCESS)) {
mc->mc_flags |= C_EOF;
return rc; /* no entries matched */
}
mp = mc->mc_pg[mc->mc_top];
mdbx_cassert(mc, IS_LEAF(mp));
- node = page_node(mp, 0);
+ if (!IS_LEAF2(mp))
+ node = page_node(mp, 0);
}
+ mdbx_cassert(mc,
+ mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
set1:
mc->mc_flags |= C_INITIALIZED;
@@ -15003,19 +15272,16 @@ set1:
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- }
- if (likely(data)) {
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- if (op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_SET_RANGE) {
- rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
- } else {
- int ex2 = 0, *ex2p = (op == MDBX_GET_BOTH) ? &ex2 : NULL;
- rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL,
- MDBX_SET_RANGE, ex2p);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- }
- } else if (op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE) {
+ if (op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_SET_RANGE) {
+ rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
+ } else {
+ rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL,
+ MDBX_SET_RANGE, NULL);
+ }
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+ } else if (likely(data)) {
+ if (op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE) {
if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min ||
data->iov_len > mc->mc_dbx->md_vlen_max)) {
mdbx_cassert(mc, !"Invalid data-size");
@@ -15050,20 +15316,20 @@ set1:
return rc;
rc = mc->mc_dbx->md_dcmp(&aligned_data, &olddata);
if (rc) {
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
if (op != MDBX_GET_BOTH_RANGE || rc > 0)
return MDBX_NOTFOUND;
+ *exactp = 0;
rc = 0;
}
*data = olddata;
- } else {
- if (mc->mc_xcursor)
- mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF);
- if (unlikely((rc = mdbx_node_read(
- mc, node, data,
- pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
- MDBX_SUCCESS))
- return rc;
- }
+ } else if (unlikely((rc = mdbx_node_read(mc, node, data,
+ pp_txnid4chk(mc->mc_pg[mc->mc_top],
+ mc->mc_txn))) !=
+ MDBX_SUCCESS))
+ return rc;
}
/* The key already matches in all other cases */
@@ -15095,28 +15361,29 @@ static int mdbx_cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
mc->mc_ki[mc->mc_top] = 0;
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
- key->iov_len = mc->mc_db->md_xsize;
- key->iov_base = page_leaf2key(mc->mc_pg[mc->mc_top], 0, key->iov_len);
+ if (likely(key)) {
+ key->iov_len = mc->mc_db->md_xsize;
+ key->iov_base = page_leaf2key(mc->mc_pg[mc->mc_top], 0, key->iov_len);
+ }
return MDBX_SUCCESS;
}
MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], 0);
- if (likely(data)) {
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
- if (unlikely(rc))
- return rc;
- } else {
- if (unlikely((rc = mdbx_node_read(
- mc, node, data,
- pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
- MDBX_SUCCESS))
- return rc;
- }
+ if (F_ISSET(node_flags(node), F_DUPDATA)) {
+ rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+ rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
+ if (unlikely(rc))
+ return rc;
+ } else if (likely(data)) {
+ if (unlikely((rc = mdbx_node_read(
+ mc, node, data,
+ pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
+ MDBX_SUCCESS))
+ return rc;
}
+
get_key_optional(node, key);
return MDBX_SUCCESS;
}
@@ -15128,12 +15395,10 @@ static int mdbx_cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
if (mc->mc_xcursor)
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF);
- if (likely((mc->mc_flags & (C_EOF | C_DEL)) != C_EOF)) {
- if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
- rc = mdbx_page_search(mc, NULL, MDBX_PS_LAST);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- }
+ if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
+ rc = mdbx_page_search(mc, NULL, MDBX_PS_LAST);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
}
if (unlikely(!IS_LEAF(mc->mc_pg[mc->mc_top])))
@@ -15143,28 +15408,28 @@ static int mdbx_cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
mc->mc_flags |= C_INITIALIZED | C_EOF;
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
- key->iov_len = mc->mc_db->md_xsize;
- key->iov_base = page_leaf2key(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top],
- key->iov_len);
+ if (likely(key)) {
+ key->iov_len = mc->mc_db->md_xsize;
+ key->iov_base = page_leaf2key(mc->mc_pg[mc->mc_top],
+ mc->mc_ki[mc->mc_top], key->iov_len);
+ }
return MDBX_SUCCESS;
}
MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
- if (likely(data)) {
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
- if (unlikely(rc != MDBX_SUCCESS))
- return rc;
- rc = mdbx_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
- if (unlikely(rc))
- return rc;
- } else {
- if (unlikely((rc = mdbx_node_read(
- mc, node, data,
- pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
- MDBX_SUCCESS))
- return rc;
- }
+ if (F_ISSET(node_flags(node), F_DUPDATA)) {
+ rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+ rc = mdbx_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
+ if (unlikely(rc))
+ return rc;
+ } else if (likely(data)) {
+ if (unlikely((rc = mdbx_node_read(
+ mc, node, data,
+ pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
+ MDBX_SUCCESS))
+ return rc;
}
get_key_optional(node, key);
@@ -15177,7 +15442,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -15188,12 +15454,13 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
switch (op) {
case MDBX_GET_CURRENT: {
if (unlikely(!(mc->mc_flags & C_INITIALIZED)))
- return MDBX_EINVAL;
+ return MDBX_ENODATA;
MDBX_page *mp = mc->mc_pg[mc->mc_top];
const unsigned nkeys = page_numkeys(mp);
if (mc->mc_ki[mc->mc_top] >= nkeys) {
mdbx_cassert(mc, nkeys <= UINT16_MAX);
mc->mc_ki[mc->mc_top] = (uint16_t)nkeys;
+ mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
mdbx_cassert(mc, nkeys > 0);
@@ -15214,14 +15481,17 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
if (unlikely(rc))
return rc;
+ } else {
+ rc = mdbx_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL,
+ MDBX_GET_CURRENT);
+ if (unlikely(rc))
+ return rc;
}
- rc = mdbx_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL,
- MDBX_GET_CURRENT);
} else {
rc = mdbx_node_read(mc, node, data, pp_txnid4chk(mp, mc->mc_txn));
+ if (unlikely(rc))
+ return rc;
}
- if (unlikely(rc))
- return rc;
}
}
break;
@@ -15239,8 +15509,13 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
case MDBX_SET_RANGE:
if (unlikely(key == NULL))
return MDBX_EINVAL;
- rc = mdbx_cursor_set(mc, key, data, op,
- op == MDBX_SET_RANGE ? NULL : &exact);
+ rc = mdbx_cursor_set(mc, key, data, op, &exact);
+ if (mc->mc_flags & C_INITIALIZED) {
+ mdbx_cassert(mc, mc->mc_snum > 0 && mc->mc_top < mc->mc_snum);
+ mdbx_cassert(mc, mc->mc_ki[mc->mc_top] <
+ page_numkeys(mc->mc_pg[mc->mc_top]) ||
+ (mc->mc_flags & C_EOF));
+ }
break;
case MDBX_GET_MULTIPLE:
if (unlikely(data == NULL || !(mc->mc_flags & C_INITIALIZED)))
@@ -15248,8 +15523,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (unlikely(!(mc->mc_db->md_flags & MDBX_DUPFIXED)))
return MDBX_INCOMPATIBLE;
rc = MDBX_SUCCESS;
- if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) ||
- (mc->mc_xcursor->mx_cursor.mc_flags & C_EOF))
+ if ((mc->mc_xcursor->mx_cursor.mc_flags & (C_INITIALIZED | C_EOF)) !=
+ C_INITIALIZED)
break;
goto fetchm;
case MDBX_NEXT_MULTIPLE:
@@ -15283,7 +15558,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (rc == MDBX_SUCCESS) {
MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor;
if (mx->mc_flags & C_INITIALIZED) {
- rc = mdbx_cursor_sibling(mx, 0);
+ rc = mdbx_cursor_sibling(mx, SIBLING_LEFT);
if (rc == MDBX_SUCCESS)
goto fetchm;
} else {
@@ -15313,6 +15588,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
return MDBX_INCOMPATIBLE;
if (mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top])) {
mc->mc_ki[mc->mc_top] = (indx_t)page_numkeys(mc->mc_pg[mc->mc_top]);
+ mc->mc_flags |= C_EOF;
return MDBX_NOTFOUND;
}
{
@@ -15334,6 +15610,33 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
case MDBX_LAST_DUP:
mfunc = mdbx_cursor_last;
goto mmove;
+ case MDBX_SET_LOWERBOUND: {
+ if (unlikely(key == NULL || data == NULL))
+ return MDBX_EINVAL;
+ MDBX_val save_data = *data;
+ rc = mdbx_cursor_set(mc, key, data, MDBX_SET_RANGE, &exact);
+ if (rc == MDBX_SUCCESS && exact && mc->mc_xcursor) {
+ mc->mc_flags &= ~C_DEL;
+ if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ *data = save_data;
+ exact = 0;
+ rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL,
+ MDBX_SET_RANGE, &exact);
+ if (rc == MDBX_NOTFOUND) {
+ mdbx_cassert(mc, !exact);
+ rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP);
+ }
+ } else {
+ int cmp = mc->mc_dbx->md_dcmp(&save_data, data);
+ exact = (cmp == 0);
+ if (cmp > 0)
+ rc = mdbx_cursor_next(mc, key, data, MDBX_NEXT_NODUP);
+ }
+ }
+ if (rc == MDBX_SUCCESS && !exact)
+ rc = MDBX_RESULT_TRUE;
+ break;
+ }
default:
mdbx_debug("unhandled/unimplemented cursor operation %u", op);
return MDBX_EINVAL;
@@ -15363,6 +15666,7 @@ static int mdbx_cursor_touch(MDBX_cursor *mc) {
if (unlikely(rc))
return rc;
*mc->mc_dbistate |= DBI_DIRTY;
+ mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
}
mc->mc_top = 0;
if (mc->mc_snum) {
@@ -15387,12 +15691,14 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
+ mdbx_cassert(mc, cursor_is_tracked(mc));
env = mc->mc_txn->mt_env;
/* Check this first so counter will always be zero on any early failures. */
@@ -15570,7 +15876,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
mc->mc_ki[mc->mc_top]++; /* step forward for appending */
rc = MDBX_NOTFOUND;
} else {
- if (unlikely(rc != 0 || !(flags & MDBX_APPENDDUP)))
+ if (unlikely(rc != MDBX_SUCCESS || !(flags & MDBX_APPENDDUP)))
/* new-key < last-key
* or new-key == last-key without MDBX_APPENDDUP */
return MDBX_EKEYMISMATCH;
@@ -15667,6 +15973,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
data->iov_len);
}
*mc->mc_dbistate |= DBI_DIRTY;
+ mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED)
np->mp_flags |= P_LEAF2;
mc->mc_flags |= C_INITIALIZED;
@@ -16050,18 +16357,17 @@ new_sub:;
rc = mdbx_node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, nflags);
if (likely(rc == 0)) {
/* Adjust other cursors pointing to mp */
- MDBX_cursor *m2, *m3;
- MDBX_dbi dbi = mc->mc_dbi;
- unsigned i = mc->mc_top;
- MDBX_page *mp = mc->mc_pg[i];
-
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
- m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
+ const MDBX_dbi dbi = mc->mc_dbi;
+ const unsigned i = mc->mc_top;
+ MDBX_page *const mp = mc->mc_pg[i];
+ for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2;
+ m2 = m2->mc_next) {
+ MDBX_cursor *m3 =
+ (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp)
continue;
- if (m3->mc_ki[i] >= mc->mc_ki[i] && insert_key) {
- m3->mc_ki[i]++;
- }
+ if (m3->mc_ki[i] >= mc->mc_ki[i])
+ m3->mc_ki[i] += insert_key;
if (XCURSOR_INITED(m3))
XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]);
}
@@ -16111,7 +16417,7 @@ new_sub:;
MDBX_page *mp = mc->mc_pg[i];
const int nkeys = page_numkeys(mp);
- for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum)
continue;
if (!(m2->mc_flags & C_INITIALIZED))
@@ -16184,7 +16490,8 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
int rc = check_txn_rw(mc->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -16205,6 +16512,8 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
return rc;
MDBX_page *mp = mc->mc_pg[mc->mc_top];
+ if (unlikely(!IS_LEAF(mp)))
+ return MDBX_CORRUPTED;
if (IS_LEAF2(mp))
goto del_key;
@@ -16215,9 +16524,8 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1;
mc->mc_xcursor->mx_cursor.mc_flags &= ~C_INITIALIZED;
} else {
- if (!F_ISSET(node_flags(node), F_SUBDATA)) {
+ if (!F_ISSET(node_flags(node), F_SUBDATA))
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
- }
rc = mdbx_cursor_del(&mc->mc_xcursor->mx_cursor, MDBX_NOSPILL);
if (unlikely(rc))
return rc;
@@ -16235,7 +16543,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
node = page_node(mp, mc->mc_ki[mc->mc_top]);
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
/* fix other sub-DB cursors pointed at fake pages on this page */
- for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) {
if (m2 == mc || m2->mc_snum < mc->mc_snum)
continue;
if (!(m2->mc_flags & C_INITIALIZED))
@@ -16847,7 +17155,7 @@ int mdbx_cursor_set_userctx(MDBX_cursor *mc, void *ctx) {
if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE &&
mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EINVAL;
+ return MDBX_EBADSIGN;
MDBX_cursor_couple *couple = container_of(mc, MDBX_cursor_couple, outer);
couple->mc_userctx = ctx;
@@ -16870,26 +17178,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) {
if (unlikely(!mc))
return MDBX_EINVAL;
- if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE)) {
- if (unlikely(mc->mc_signature != MDBX_MC_LIVE || mc->mc_backup))
- return MDBX_EINVAL;
- if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE))
- return MDBX_PROBLEM;
- if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
- MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
- while (*prev && *prev != mc)
- prev = &(*prev)->mc_next;
- if (*prev == mc)
- *prev = mc->mc_next;
- }
- mc->mc_signature = MDBX_MC_READY4CLOSE;
- mc->mc_flags = 0;
- mc->mc_dbi = UINT_MAX;
- }
-
- assert(!mc->mc_backup && !mc->mc_flags);
- if (unlikely(mc->mc_backup || mc->mc_flags))
- return MDBX_PROBLEM;
+ if (unlikely(mc->mc_signature != MDBX_MC_READY4CLOSE &&
+ mc->mc_signature != MDBX_MC_LIVE))
+ return MDBX_EBADSIGN;
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -16901,13 +17192,52 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) {
if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY)))
return MDBX_EACCESS;
+ if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ {
+ mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE);
+ if (unlikely(mc->mc_dbi != dbi ||
+ /* paranoia */ mc->mc_signature != MDBX_MC_LIVE ||
+ mc->mc_txn != txn))
+ return MDBX_EINVAL;
+
+ assert(mc->mc_db == &txn->mt_dbs[dbi]);
+ assert(mc->mc_dbx == &txn->mt_dbxs[dbi]);
+ assert(mc->mc_dbi == dbi);
+ assert(mc->mc_dbistate == &txn->mt_dbistate[dbi]);
+ return likely(mc->mc_dbi == dbi &&
+ /* paranoia */ mc->mc_signature == MDBX_MC_LIVE &&
+ mc->mc_txn == txn)
+ ? MDBX_SUCCESS
+ : MDBX_EINVAL /* Disallow change DBI in nested transactions */;
+ }
+
+ if (mc->mc_signature == MDBX_MC_LIVE) {
+ if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE))
+ return MDBX_PROBLEM;
+ if (mc->mc_flags & C_UNTRACK) {
+ mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY));
+ MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi];
+ while (*prev && *prev != mc)
+ prev = &(*prev)->mc_next;
+ mdbx_cassert(mc, *prev == mc);
+ *prev = mc->mc_next;
+ }
+ mc->mc_signature = MDBX_MC_READY4CLOSE;
+ mc->mc_flags = 0;
+ mc->mc_dbi = UINT_MAX;
+ mc->mc_next = NULL;
+ mc->mc_db = NULL;
+ mc->mc_dbx = NULL;
+ mc->mc_dbistate = NULL;
+ }
+ mdbx_cassert(mc, !(mc->mc_flags & C_UNTRACK));
+
rc = mdbx_cursor_init(mc, txn, dbi);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (txn->mt_cursors) {
- mc->mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = mc;
+ if (!(txn->mt_flags & MDBX_TXN_RDONLY)) {
+ mc->mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = mc;
mc->mc_flags |= C_UNTRACK;
}
@@ -16937,43 +17267,39 @@ int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) {
return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL;
}
-/* Return the count of duplicate data items for the current key */
-int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) {
- if (unlikely(mc == NULL))
+int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) {
+ if (unlikely(!src))
return MDBX_EINVAL;
+ if (unlikely(src->mc_signature != MDBX_MC_LIVE))
+ return (src->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
- if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
-
- int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
+ int rc = mdbx_cursor_bind(src->mc_txn, dest, src->mc_dbi);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(countp == NULL || !(mc->mc_flags & C_INITIALIZED)))
- return MDBX_EINVAL;
-
- if (!mc->mc_snum) {
- *countp = 0;
- return MDBX_NOTFOUND;
+ assert(dest->mc_db == src->mc_db);
+ assert(dest->mc_dbi == src->mc_dbi);
+ assert(dest->mc_dbx == src->mc_dbx);
+ assert(dest->mc_dbistate == src->mc_dbistate);
+again:
+ assert(dest->mc_txn == src->mc_txn);
+ dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK;
+ dest->mc_top = src->mc_top;
+ dest->mc_snum = src->mc_snum;
+ for (unsigned i = 0; i < src->mc_snum; ++i) {
+ dest->mc_ki[i] = src->mc_ki[i];
+ dest->mc_pg[i] = src->mc_pg[i];
}
- MDBX_page *mp = mc->mc_pg[mc->mc_top];
- if ((mc->mc_flags & C_EOF) && mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) {
- *countp = 0;
- return MDBX_NOTFOUND;
+ if (src->mc_xcursor) {
+ dest->mc_xcursor->mx_db = src->mc_xcursor->mx_db;
+ dest->mc_xcursor->mx_dbx = src->mc_xcursor->mx_dbx;
+ src = &src->mc_xcursor->mx_cursor;
+ dest = &dest->mc_xcursor->mx_cursor;
+ goto again;
}
- *countp = 1;
- if (mc->mc_xcursor != NULL) {
- MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]);
- if (F_ISSET(node_flags(node), F_DUPDATA)) {
- mdbx_cassert(mc, mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags &
- C_INITIALIZED));
- *countp = unlikely(mc->mc_xcursor->mx_db.md_entries > PTRDIFF_MAX)
- ? PTRDIFF_MAX
- : (size_t)mc->mc_xcursor->mx_db.md_entries;
- }
- }
return MDBX_SUCCESS;
}
@@ -16985,17 +17311,19 @@ void mdbx_cursor_close(MDBX_cursor *mc) {
/* Remove from txn, if tracked.
* A read-only txn (!C_UNTRACK) may have been freed already,
* so do not peek inside it. Only write txns track cursors. */
- if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
- MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
+ if (mc->mc_flags & C_UNTRACK) {
+ mdbx_cassert(mc, !(mc->mc_txn->mt_flags & MDBX_TXN_RDONLY));
+ MDBX_cursor **prev = &mc->mc_txn->tw.cursors[mc->mc_dbi];
while (*prev && *prev != mc)
prev = &(*prev)->mc_next;
- if (*prev == mc)
- *prev = mc->mc_next;
+ mdbx_cassert(mc, *prev == mc);
+ *prev = mc->mc_next;
}
mc->mc_signature = 0;
+ mc->mc_next = mc;
mdbx_free(mc);
} else {
- /* cursor closed before nested txn ends */
+ /* Cursor closed before nested txn ends */
mdbx_cassert(mc, mc->mc_signature == MDBX_MC_LIVE);
mc->mc_signature = MDBX_MC_WAIT4EOT;
}
@@ -17019,6 +17347,47 @@ MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *mc) {
return mc->mc_dbi;
}
+/* Return the count of duplicate data items for the current key */
+int mdbx_cursor_count(const MDBX_cursor *mc, size_t *countp) {
+ if (unlikely(mc == NULL))
+ return MDBX_EINVAL;
+
+ if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
+
+ int rc = check_txn(mc->mc_txn, MDBX_TXN_BLOCKED);
+ if (unlikely(rc != MDBX_SUCCESS))
+ return rc;
+
+ if (unlikely(countp == NULL || !(mc->mc_flags & C_INITIALIZED)))
+ return MDBX_EINVAL;
+
+ if (!mc->mc_snum) {
+ *countp = 0;
+ return MDBX_NOTFOUND;
+ }
+
+ MDBX_page *mp = mc->mc_pg[mc->mc_top];
+ if ((mc->mc_flags & C_EOF) && mc->mc_ki[mc->mc_top] >= page_numkeys(mp)) {
+ *countp = 0;
+ return MDBX_NOTFOUND;
+ }
+
+ *countp = 1;
+ if (mc->mc_xcursor != NULL) {
+ MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]);
+ if (F_ISSET(node_flags(node), F_DUPDATA)) {
+ mdbx_cassert(mc, mc->mc_xcursor && (mc->mc_xcursor->mx_cursor.mc_flags &
+ C_INITIALIZED));
+ *countp = unlikely(mc->mc_xcursor->mx_db.md_entries > PTRDIFF_MAX)
+ ? PTRDIFF_MAX
+ : (size_t)mc->mc_xcursor->mx_db.md_entries;
+ }
+ }
+ return MDBX_SUCCESS;
+}
+
/* Replace the key for a branch node with a new key.
* Set MDBX_TXN_ERROR on failure.
* [in] mc Cursor pointing to the node to operate on.
@@ -17033,6 +17402,7 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) {
int ptr, i, nkeys, indx;
DKBUF;
+ mdbx_cassert(mc, cursor_is_tracked(mc));
indx = mc->mc_ki[mc->mc_top];
mp = mc->mc_pg[mc->mc_top];
node = page_node(mp, indx);
@@ -17042,8 +17412,8 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) {
char kbuf2[DKBUF_MAXKEYSIZE * 2 + 1];
k2.iov_base = node_key(node);
k2.iov_len = node_ks(node);
- mdbx_debug("update key %u (ofs %u) [%s] to [%s] on page %" PRIaPGNO, indx,
- ptr, mdbx_dump_val(&k2, kbuf2, sizeof(kbuf2)), DKEY(key),
+ mdbx_debug("update key %u (offset %u) [%s] to [%s] on page %" PRIaPGNO,
+ indx, ptr, mdbx_dump_val(&k2, kbuf2, sizeof(kbuf2)), DKEY(key),
mp->mp_pgno);
}
@@ -17151,7 +17521,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
const unsigned snum = cdst->mc_snum;
mdbx_cassert(csrc, snum > 0);
MDBX_cursor mn;
- mdbx_cursor_copy(cdst, &mn);
+ cursor_copy_internal(cdst, &mn);
mn.mc_xcursor = NULL;
/* must find the lowest key below dst */
rc = mdbx_page_search_lowest(&mn);
@@ -17190,7 +17560,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
psrc = csrc->mc_pg[csrc->mc_top];
pdst = cdst->mc_pg[cdst->mc_top];
- rc = mdbx_update_key(&mn, &key);
+ WITH_CURSOR_TRACKING(mn, rc = mdbx_update_key(&mn, &key));
if (unlikely(rc))
return rc;
} else {
@@ -17274,7 +17644,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
mdbx_cassert(csrc, csrc->mc_top == cdst->mc_top);
if (fromleft) {
/* If we're adding on the left, bump others up */
- for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (!(m3->mc_flags & C_INITIALIZED) || m3->mc_top < csrc->mc_top)
continue;
@@ -17294,7 +17664,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
}
} else {
/* Adding on the right, bump others down */
- for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (m3 == csrc)
continue;
@@ -17333,7 +17703,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
mdbx_debug("update separator for source page %" PRIaPGNO " to [%s]",
psrc->mp_pgno, DKEY(&key));
MDBX_cursor mn;
- mdbx_cursor_copy(csrc, &mn);
+ cursor_copy_internal(csrc, &mn);
mn.mc_xcursor = NULL;
mdbx_cassert(csrc, mn.mc_snum > 0);
mn.mc_snum--;
@@ -17368,7 +17738,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) {
mdbx_debug("update separator for destination page %" PRIaPGNO " to [%s]",
pdst->mp_pgno, DKEY(&key));
MDBX_cursor mn;
- mdbx_cursor_copy(cdst, &mn);
+ cursor_copy_internal(cdst, &mn);
mn.mc_xcursor = NULL;
mdbx_cassert(cdst, mn.mc_snum > 0);
mn.mc_snum--;
@@ -17405,6 +17775,8 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) {
int rc;
mdbx_cassert(csrc, csrc != cdst);
+ mdbx_cassert(csrc, cursor_is_tracked(csrc));
+ mdbx_cassert(cdst, cursor_is_tracked(cdst));
const MDBX_page *const psrc = csrc->mc_pg[csrc->mc_top];
MDBX_page *pdst = cdst->mc_pg[cdst->mc_top];
mdbx_debug("merging page %" PRIaPGNO " into %" PRIaPGNO, psrc->mp_pgno,
@@ -17449,7 +17821,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) {
key.iov_base = node_key(srcnode);
if (pagetype & P_BRANCH) {
MDBX_cursor mn;
- mdbx_cursor_copy(csrc, &mn);
+ cursor_copy_internal(csrc, &mn);
mn.mc_xcursor = NULL;
/* must find the lowest key below src */
rc = mdbx_page_search_lowest(&mn);
@@ -17534,7 +17906,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) {
const MDBX_dbi dbi = csrc->mc_dbi;
const unsigned top = csrc->mc_top;
- for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = csrc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
m3 = (csrc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (m3 == csrc || top >= m3->mc_snum)
continue;
@@ -17645,7 +18017,7 @@ bailout:
/* Copy the contents of a cursor.
* [in] csrc The cursor to copy from.
* [out] cdst The cursor to copy to. */
-static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) {
+static void cursor_copy_internal(const MDBX_cursor *csrc, MDBX_cursor *cdst) {
mdbx_cassert(csrc,
csrc->mc_txn->mt_txnid >= *csrc->mc_txn->mt_env->me_oldest);
cdst->mc_txn = csrc->mc_txn;
@@ -17666,6 +18038,7 @@ static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) {
* [in] mc Cursor pointing to the page where rebalancing should begin.
* Returns 0 on success, non-zero on failure. */
static int mdbx_rebalance(MDBX_cursor *mc) {
+ mdbx_cassert(mc, cursor_is_tracked(mc));
mdbx_cassert(mc, mc->mc_snum > 0);
mdbx_cassert(mc, mc->mc_snum < mc->mc_db->md_depth ||
IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1]));
@@ -17723,8 +18096,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mc->mc_db->md_overflow_pages == 0 &&
mc->mc_db->md_leaf_pages == 1);
/* Adjust cursors pointing to mp */
- const MDBX_dbi dbi = mc->mc_dbi;
- for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2;
+ for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2;
m2 = m2->mc_next) {
MDBX_cursor *m3 =
(mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
@@ -17758,10 +18130,10 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
}
/* Adjust other cursors pointing to mp */
- MDBX_cursor *m2, *m3;
- MDBX_dbi dbi = mc->mc_dbi;
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
- m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
+ for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[mc->mc_dbi]; m2;
+ m2 = m2->mc_next) {
+ MDBX_cursor *m3 =
+ (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (m3 == mc || !(m3->mc_flags & C_INITIALIZED))
continue;
if (m3->mc_pg[0] == mp) {
@@ -17802,7 +18174,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
/* Find neighbors. */
MDBX_cursor mn;
- mdbx_cursor_copy(mc, &mn);
+ cursor_copy_internal(mc, &mn);
mn.mc_xcursor = NULL;
MDBX_page *left = nullptr, *right = nullptr;
@@ -17839,7 +18211,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
/* We want mdbx_rebalance to find mn when doing fixups */
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
if (likely(rc != MDBX_RESULT_TRUE)) {
- mdbx_cursor_copy(&mn, mc);
+ cursor_copy_internal(&mn, mc);
mc->mc_ki[mc->mc_top] = new_ki;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
@@ -17852,7 +18224,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
- rc = mdbx_page_merge(&mn, mc);
+ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
@@ -17867,7 +18239,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mn.mc_ki[mn.mc_top - 1] = ki_pre_top - 1;
mn.mc_ki[mn.mc_top] = (indx_t)(page_numkeys(left) - 1);
mc->mc_ki[mc->mc_top] = 0;
- rc = mdbx_node_move(&mn, mc, true);
+ WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, true));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top + 1;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
@@ -17880,7 +18252,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
- rc = mdbx_node_move(&mn, mc, false);
+ WITH_CURSOR_TRACKING(mn, rc = mdbx_node_move(&mn, mc, false));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
@@ -17908,19 +18280,20 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
/* We want mdbx_rebalance to find mn when doing fixups */
WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(mc, &mn));
if (likely(rc != MDBX_RESULT_TRUE)) {
- mdbx_cursor_copy(&mn, mc);
+ cursor_copy_internal(&mn, mc);
mc->mc_ki[mc->mc_top] = new_ki;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
return rc;
}
- } else if (likely(right)) {
+ }
+ if (likely(right)) {
/* try merge with right */
mdbx_cassert(mc, page_numkeys(right) >= minkeys);
mn.mc_pg[mn.mc_top] = right;
mn.mc_ki[mn.mc_top - 1] = ki_pre_top + 1;
mn.mc_ki[mn.mc_top] = 0;
mc->mc_ki[mc->mc_top] = nkeys;
- rc = mdbx_page_merge(&mn, mc);
+ WITH_CURSOR_TRACKING(mn, rc = mdbx_page_merge(&mn, mc));
if (likely(rc != MDBX_RESULT_TRUE)) {
mc->mc_ki[mc->mc_top] = ki_top;
mdbx_cassert(mc, rc || page_numkeys(mc->mc_pg[mc->mc_top]) >= minkeys);
@@ -18058,7 +18431,7 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
lp->mp_pgno);
continue;
}
- if (unlikely(number_of_ovpages(env, dsize) != lp->mp_pages))
+ if (unlikely(number_of_ovpages(env, dsize) > lp->mp_pages))
rc =
bad_page(mp, "big-node size (%zu) mismatch n-pages size (%u)\n",
dsize, lp->mp_pages);
@@ -18278,135 +18651,115 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) {
MDBX_page *mp;
indx_t ki;
unsigned nkeys;
- MDBX_cursor *m2, *m3;
MDBX_dbi dbi = mc->mc_dbi;
+ mdbx_cassert(mc, cursor_is_tracked(mc));
mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
ki = mc->mc_ki[mc->mc_top];
mp = mc->mc_pg[mc->mc_top];
mdbx_node_del(mc, mc->mc_db->md_xsize);
mc->mc_db->md_entries--;
- {
- /* Adjust other cursors pointing to mp */
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
- m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
- if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
- continue;
- if (m3->mc_snum < mc->mc_snum)
- continue;
- if (m3->mc_pg[mc->mc_top] == mp) {
- if (m3->mc_ki[mc->mc_top] == ki) {
- m3->mc_flags |= C_DEL;
- if (mc->mc_db->md_flags & MDBX_DUPSORT) {
- /* Sub-cursor referred into dataset which is gone */
- m3->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF);
- }
- continue;
- } else if (m3->mc_ki[mc->mc_top] > ki) {
- m3->mc_ki[mc->mc_top]--;
+
+ /* Adjust other cursors pointing to mp */
+ for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
+ MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
+ if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
+ continue;
+ if (m3->mc_snum < mc->mc_snum)
+ continue;
+ if (m3->mc_pg[mc->mc_top] == mp) {
+ if (m3->mc_ki[mc->mc_top] == ki) {
+ m3->mc_flags |= C_DEL;
+ if (mc->mc_db->md_flags & MDBX_DUPSORT) {
+ /* Sub-cursor referred into dataset which is gone */
+ m3->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF);
}
- if (XCURSOR_INITED(m3))
- XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
+ continue;
+ } else if (m3->mc_ki[mc->mc_top] > ki) {
+ m3->mc_ki[mc->mc_top]--;
}
+ if (XCURSOR_INITED(m3))
+ XCURSOR_REFRESH(m3, m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
}
}
+
rc = mdbx_rebalance(mc);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
- if (likely(rc == MDBX_SUCCESS)) {
+ if (unlikely(!mc->mc_snum)) {
/* DB is totally empty now, just bail out.
* Other cursors adjustments were already done
* by mdbx_rebalance and aren't needed here. */
- if (!mc->mc_snum) {
- mdbx_cassert(mc, mc->mc_db->md_entries == 0 && mc->mc_db->md_depth == 0 &&
- mc->mc_db->md_root == P_INVALID);
- mc->mc_flags |= C_DEL | C_EOF;
- return rc;
- }
-
- ki = mc->mc_ki[mc->mc_top];
- mp = mc->mc_pg[mc->mc_top];
- mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
- nkeys = page_numkeys(mp);
- mdbx_cassert(mc, (mc->mc_db->md_entries > 0 && nkeys > 0) ||
- ((mc->mc_flags & C_SUB) &&
- mc->mc_db->md_entries == 0 && nkeys == 0));
+ mdbx_cassert(mc, mc->mc_db->md_entries == 0 && mc->mc_db->md_depth == 0 &&
+ mc->mc_db->md_root == P_INVALID);
+ mc->mc_flags |= C_EOF;
+ return MDBX_SUCCESS;
+ }
- /* Adjust THIS and other cursors pointing to mp */
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
- m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
- if (m3 == mc || !(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
- continue;
- if (m3->mc_snum < mc->mc_snum)
- continue;
- if (m3->mc_pg[mc->mc_top] == mp) {
- /* if m3 points past last node in page, find next sibling */
- if (m3->mc_ki[mc->mc_top] >= nkeys) {
- rc = mdbx_cursor_sibling(m3, true);
- if (rc == MDBX_NOTFOUND) {
- m3->mc_flags |= C_EOF;
- rc = MDBX_SUCCESS;
- continue;
- } else if (unlikely(rc != MDBX_SUCCESS))
- break;
+ ki = mc->mc_ki[mc->mc_top];
+ mp = mc->mc_pg[mc->mc_top];
+ mdbx_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
+ nkeys = page_numkeys(mp);
+ mdbx_cassert(mc, (mc->mc_db->md_entries > 0 && nkeys > 0) ||
+ ((mc->mc_flags & C_SUB) && mc->mc_db->md_entries == 0 &&
+ nkeys == 0));
+
+ /* Adjust this and other cursors pointing to mp */
+ for (MDBX_cursor *m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
+ MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
+ if (!(m2->mc_flags & m3->mc_flags & C_INITIALIZED))
+ continue;
+ if (m3->mc_snum < mc->mc_snum)
+ continue;
+ if (m3->mc_pg[mc->mc_top] == mp) {
+ /* if m3 points past last node in page, find next sibling */
+ if (m3->mc_ki[mc->mc_top] >= nkeys) {
+ rc = mdbx_cursor_sibling(m3, SIBLING_RIGHT);
+ if (rc == MDBX_NOTFOUND) {
+ m3->mc_flags |= C_EOF;
+ rc = MDBX_SUCCESS;
+ continue;
}
- if (m3->mc_ki[mc->mc_top] >= ki || m3->mc_pg[mc->mc_top] != mp) {
- if ((mc->mc_db->md_flags & MDBX_DUPSORT) != 0 &&
- (m3->mc_flags & C_EOF) == 0) {
- MDBX_node *node =
- page_node(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
- /* If this node has dupdata, it may need to be reinited
- * because its data has moved.
- * If the xcursor was not initd it must be reinited.
- * Else if node points to a subDB, nothing is needed. */
- if (node_flags(node) & F_DUPDATA) {
- if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
- if (!(node_flags(node) & F_SUBDATA))
- m3->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
- } else {
- rc = mdbx_xcursor_init1(m3, node, m3->mc_pg[m3->mc_top]);
- if (unlikely(rc != MDBX_SUCCESS))
- break;
- m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
- }
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
+ }
+ if (m3->mc_ki[mc->mc_top] >= ki ||
+ /* moved to right sibling */ m3->mc_pg[mc->mc_top] != mp) {
+ if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) {
+ MDBX_node *node =
+ page_node(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
+ /* If this node has dupdata, it may need to be reinited
+ * because its data has moved.
+ * If the xcursor was not inited it must be reinited.
+ * Else if node points to a subDB, nothing is needed. */
+ if (node_flags(node) & F_DUPDATA) {
+ if (m3->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
+ if (!(node_flags(node) & F_SUBDATA))
+ m3->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
+ } else {
+ rc = mdbx_xcursor_init1(m3, node, m3->mc_pg[m3->mc_top]);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
+ rc = mdbx_cursor_first(&m3->mc_xcursor->mx_cursor, NULL, NULL);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
}
}
+ m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
}
+ m3->mc_flags |= C_DEL;
}
}
-
- if (mc->mc_ki[mc->mc_top] >= nkeys) {
- rc = mdbx_cursor_sibling(mc, true);
- if (rc == MDBX_NOTFOUND) {
- mc->mc_flags |= C_EOF;
- rc = MDBX_SUCCESS;
- }
- }
- if ((mc->mc_db->md_flags & MDBX_DUPSORT) != 0 &&
- (mc->mc_flags & C_EOF) == 0) {
- MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
- /* If this node has dupdata, it may need to be reinited
- * because its data has moved.
- * If the xcursor was not initd it must be reinited.
- * Else if node points to a subDB, nothing is needed. */
- if (node_flags(node) & F_DUPDATA) {
- if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) {
- if (!(node_flags(node) & F_SUBDATA))
- mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
- } else {
- rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
- if (likely(rc != MDBX_SUCCESS))
- mc->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
- }
- }
- }
- mc->mc_flags |= C_DEL;
}
- if (unlikely(rc))
- mc->mc_txn->mt_flags |= MDBX_TXN_ERROR;
- else if (mdbx_audit_enabled())
+ mdbx_cassert(mc, rc == MDBX_SUCCESS);
+ if (mdbx_audit_enabled())
rc = mdbx_cursor_check(mc, 0);
+ return rc;
+bailout:
+ mc->mc_txn->mt_flags |= MDBX_TXN_ERROR;
return rc;
}
@@ -18461,10 +18814,10 @@ static int mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key,
* is larger than the current one, the parent page may
* run out of space, triggering a split. We need this
* cursor to be consistent until the end of the rebalance. */
- cx.outer.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &cx.outer;
+ cx.outer.mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = &cx.outer;
rc = mdbx_cursor_del(&cx.outer, flags);
- txn->mt_cursors[dbi] = cx.outer.mc_next;
+ txn->tw.cursors[dbi] = cx.outer.mc_next;
}
return rc;
}
@@ -18551,7 +18904,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey,
mdbx_debug("parent branch page is %" PRIaPGNO, mc->mc_pg[ptop]->mp_pgno);
}
- mdbx_cursor_copy(mc, &mn);
+ cursor_copy_internal(mc, &mn);
mn.mc_xcursor = NULL;
mn.mc_pg[mn.mc_top] = rp;
mn.mc_ki[mn.mc_top] = 0;
@@ -18744,7 +19097,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey,
} else {
/* find right page's left sibling */
mc->mc_ki[ptop] = mn.mc_ki[ptop];
- rc = mdbx_cursor_sibling(mc, false);
+ rc = mdbx_cursor_sibling(mc, SIBLING_LEFT);
}
}
} else {
@@ -18900,7 +19253,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey,
MDBX_dbi dbi = mc->mc_dbi;
nkeys = page_numkeys(mp);
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) {
+ for (m2 = mc->mc_txn->tw.cursors[dbi]; m2; m2 = m2->mc_next) {
m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
if (m3 == mc)
continue;
@@ -18976,8 +19329,8 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data,
rc = mdbx_cursor_init(&cx.outer, txn, dbi);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- cx.outer.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &cx.outer;
+ cx.outer.mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = &cx.outer;
/* LY: support for update (explicit overwrite) */
if (flags & MDBX_CURRENT) {
@@ -18998,7 +19351,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data,
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_cursor_put(&cx.outer, key, data, flags);
- txn->mt_cursors[dbi] = cx.outer.mc_next;
+ txn->tw.cursors[dbi] = cx.outer.mc_next;
return rc;
}
@@ -19008,7 +19361,6 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data,
#ifndef MDBX_WBUF
#define MDBX_WBUF ((size_t)1024 * 1024)
#endif
-#define MDBX_EOF 0x10 /* mdbx_env_copythr() is done reading */
/* State needed for a double-buffering compacting copy. */
typedef struct mdbx_copy {
@@ -19020,53 +19372,67 @@ typedef struct mdbx_copy {
size_t mc_wlen[2];
size_t mc_olen[2];
mdbx_filehandle_t mc_fd;
- volatile int mc_error;
- pgno_t mc_next_pgno;
- short mc_toggle; /* Buffer number in provider */
- short mc_new; /* (0-2 buffers to write) | (MDBX_EOF at end) */
/* Error code. Never cleared if set. Both threads can set nonzero
* to fail the copy. Not mutex-protected, MDBX expects atomic int. */
+ volatile int mc_error;
+ pgno_t mc_next_pgno;
+ volatile unsigned mc_head;
+ volatile unsigned mc_tail;
} mdbx_copy;
/* Dedicated writer thread for compacting copy. */
static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) {
mdbx_copy *my = arg;
- uint8_t *ptr;
- int toggle = 0;
+
+#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64))
+ sigset_t sigset;
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIGPIPE);
+ my->mc_error = pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+#endif /* EPIPE */
mdbx_condpair_lock(&my->mc_condpair);
while (!my->mc_error) {
- while (!my->mc_new && !my->mc_error) {
+ while (my->mc_tail == my->mc_head && !my->mc_error) {
int err = mdbx_condpair_wait(&my->mc_condpair, true);
if (err != MDBX_SUCCESS) {
my->mc_error = err;
goto bailout;
}
}
- if (my->mc_new == 0 + MDBX_EOF) /* 0 buffers, just EOF */
- break;
+ const unsigned toggle = my->mc_tail & 1;
size_t wsize = my->mc_wlen[toggle];
- ptr = my->mc_wbuf[toggle];
+ if (wsize == 0) {
+ my->mc_tail += 1;
+ break /* EOF */;
+ }
+ my->mc_wlen[toggle] = 0;
+ uint8_t *ptr = my->mc_wbuf[toggle];
again:
- if (wsize > 0 && !my->mc_error) {
+ if (!my->mc_error) {
int err = mdbx_write(my->mc_fd, ptr, wsize);
if (err != MDBX_SUCCESS) {
+#if defined(EPIPE) && !(defined(_WIN32) || defined(_WIN64))
+ if (err == EPIPE) {
+ /* Collect the pending SIGPIPE,
+ * otherwise at least OS X gives it to the process on thread-exit. */
+ int unused;
+ sigwait(&sigset, &unused);
+ }
+#endif /* EPIPE */
my->mc_error = err;
goto bailout;
}
}
/* If there's an overflow page tail, write it too */
- if (my->mc_olen[toggle]) {
- wsize = my->mc_olen[toggle];
- ptr = my->mc_over[toggle];
+ wsize = my->mc_olen[toggle];
+ if (wsize) {
my->mc_olen[toggle] = 0;
+ ptr = my->mc_over[toggle];
goto again;
}
- my->mc_wlen[toggle] = 0;
- toggle ^= 1;
- /* Return the empty buffer to provider */
- my->mc_new--;
+ my->mc_tail += 1;
mdbx_condpair_signal(&my->mc_condpair, false);
}
bailout:
@@ -19074,24 +19440,19 @@ bailout:
return (THREAD_RESULT)0;
}
-/* Give buffer and/or MDBX_EOF to writer thread, await unused buffer.
- *
- * [in] my control structure.
- * [in] adjust (1 to hand off 1 buffer) | (MDBX_EOF when ending). */
-static int __cold mdbx_env_cthr_toggle(mdbx_copy *my, int adjust) {
+/* Give buffer and/or MDBX_EOF to writer thread, await unused buffer. */
+static __cold int mdbx_env_cthr_toggle(mdbx_copy *my) {
mdbx_condpair_lock(&my->mc_condpair);
- my->mc_new += (short)adjust;
+ mdbx_assert(my->mc_env, my->mc_head - my->mc_tail < 2 || my->mc_error);
+ my->mc_head += 1;
mdbx_condpair_signal(&my->mc_condpair, true);
- while (!my->mc_error && (my->mc_new & 2) /* both buffers in use */) {
+ while (!my->mc_error &&
+ my->mc_head - my->mc_tail == 2 /* both buffers in use */) {
int err = mdbx_condpair_wait(&my->mc_condpair, false);
if (err != MDBX_SUCCESS)
my->mc_error = err;
}
mdbx_condpair_unlock(&my->mc_condpair);
-
- my->mc_toggle ^= (adjust & 1);
- /* Both threads reset mc_wlen, to be safe from threading errors */
- my->mc_wlen[my->mc_toggle] = 0;
return my->mc_error;
}
@@ -19099,11 +19460,11 @@ static int __cold mdbx_env_cthr_toggle(mdbx_copy *my, int adjust) {
* [in] my control structure.
* [in,out] pg database root.
* [in] flags includes F_DUPDATA if it is a sorted-duplicate sub-DB. */
-static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
+static __cold int mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
MDBX_cursor_couple couple;
MDBX_page *mo, *mp, *leaf;
char *buf, *ptr;
- int rc, toggle;
+ int rc;
unsigned i;
/* Empty DB, nothing to do */
@@ -19139,11 +19500,9 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
/* This is writable space for a leaf page. Usually not needed. */
leaf = (MDBX_page *)ptr;
- toggle = my->mc_toggle;
while (couple.outer.mc_snum > 0) {
- unsigned n;
mp = couple.outer.mc_pg[couple.outer.mc_top];
- n = page_numkeys(mp);
+ unsigned n = page_numkeys(mp);
if (IS_LEAF(mp)) {
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
@@ -19166,11 +19525,12 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
pp_txnid4chk(mp, my->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
goto done;
- if (my->mc_wlen[toggle] >= MDBX_WBUF) {
- rc = mdbx_env_cthr_toggle(my, 1);
+ unsigned toggle = my->mc_head & 1;
+ if (my->mc_wlen[toggle] + my->mc_env->me_psize > MDBX_WBUF) {
+ rc = mdbx_env_cthr_toggle(my);
if (unlikely(rc != MDBX_SUCCESS))
goto done;
- toggle = my->mc_toggle;
+ toggle = my->mc_head & 1;
}
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
memcpy(mo, omp, my->mc_env->me_psize);
@@ -19180,10 +19540,10 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
if (omp->mp_pages > 1) {
my->mc_olen[toggle] = pgno2bytes(my->mc_env, omp->mp_pages - 1);
my->mc_over[toggle] = (uint8_t *)omp + my->mc_env->me_psize;
- rc = mdbx_env_cthr_toggle(my, 1);
+ rc = mdbx_env_cthr_toggle(my);
if (unlikely(rc != MDBX_SUCCESS))
goto done;
- toggle = my->mc_toggle;
+ toggle = my->mc_head & 1;
}
} else if (node_flags(node) & F_SUBDATA) {
if (node_ds(node) != sizeof(MDBX_db)) {
@@ -19201,11 +19561,9 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
MDBX_db db;
memcpy(&db, node_data(node), sizeof(MDBX_db));
- my->mc_toggle = (short)toggle;
rc = mdbx_env_cwalk(my, &db.md_root, node_flags(node) & F_DUPDATA);
if (rc)
goto done;
- toggle = my->mc_toggle;
memcpy(node_data(node), &db, sizeof(MDBX_db));
}
}
@@ -19234,11 +19592,12 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
continue;
}
}
- if (my->mc_wlen[toggle] >= MDBX_WBUF) {
- rc = mdbx_env_cthr_toggle(my, 1);
+ unsigned toggle = my->mc_head & 1;
+ if (my->mc_wlen[toggle] + my->mc_wlen[toggle] > MDBX_WBUF) {
+ rc = mdbx_env_cthr_toggle(my);
if (unlikely(rc != MDBX_SUCCESS))
goto done;
- toggle = my->mc_toggle;
+ toggle = my->mc_head & 1;
}
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
mdbx_page_copy(mo, mp, my->mc_env->me_psize);
@@ -19298,7 +19657,7 @@ static __cold void make_sizeable(MDBX_meta *meta) {
}
/* Copy environment with compaction. */
-static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
+static __cold int mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
mdbx_filehandle_t fd, uint8_t *buffer,
const bool dest_is_pipe, const int flags) {
const size_t meta_bytes = pgno2bytes(env, NUM_METAS);
@@ -19375,8 +19734,12 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
}
if (rc == MDBX_SUCCESS)
rc = mdbx_env_cwalk(&ctx, &root, 0);
- mdbx_env_cthr_toggle(&ctx, 1 | MDBX_EOF);
+ mdbx_env_cthr_toggle(&ctx);
+ mdbx_env_cthr_toggle(&ctx);
thread_err = mdbx_thread_join(thread);
+ mdbx_assert(env, (ctx.mc_tail == ctx.mc_head &&
+ ctx.mc_wlen[ctx.mc_head & 1] == 0) ||
+ ctx.mc_error);
mdbx_condpair_destroy(&ctx.mc_condpair);
}
if (unlikely(thread_err != MDBX_SUCCESS))
@@ -19435,7 +19798,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn,
}
/* Copy environment as-is. */
-static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
+static __cold int mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
mdbx_filehandle_t fd, uint8_t *buffer,
const bool dest_is_pipe, const int flags) {
/* We must start the actual read txn after blocking writers */
@@ -19542,7 +19905,7 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
return rc;
}
-int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
+__cold int mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
unsigned flags) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
@@ -19607,7 +19970,7 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd,
return rc;
}
-int __cold mdbx_env_copy(MDBX_env *env, const char *dest_path,
+__cold int mdbx_env_copy(MDBX_env *env, const char *dest_path,
MDBX_copy_flags_t flags) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
@@ -19669,7 +20032,7 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *dest_path,
/******************************************************************************/
-int __cold mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags,
+__cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags,
bool onoff) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
@@ -19697,7 +20060,7 @@ int __cold mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags,
return MDBX_SUCCESS;
}
-int __cold mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) {
+__cold int mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -19709,7 +20072,7 @@ int __cold mdbx_env_get_flags(const MDBX_env *env, unsigned *arg) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_set_userctx(MDBX_env *env, void *ctx) {
+__cold int mdbx_env_set_userctx(MDBX_env *env, void *ctx) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -19722,7 +20085,7 @@ void *__cold mdbx_env_get_userctx(const MDBX_env *env) {
return env ? env->me_userctx : NULL;
}
-int __cold mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) {
+__cold int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -19736,7 +20099,7 @@ int __cold mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func) {
#endif
}
-int __cold mdbx_env_get_path(const MDBX_env *env, const char **arg) {
+__cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -19744,11 +20107,11 @@ int __cold mdbx_env_get_path(const MDBX_env *env, const char **arg) {
if (unlikely(!arg))
return MDBX_EINVAL;
- *arg = env->me_path;
+ *arg = env->me_pathname;
return MDBX_SUCCESS;
}
-int __cold mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) {
+__cold int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *arg) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -19778,11 +20141,11 @@ static void mdbx_stat0(const MDBX_env *env, const MDBX_db *db, MDBX_stat *dest,
dest->ms_mod_txnid = db->md_mod_txnid;
}
-int __cold mdbx_env_stat(MDBX_env *env, MDBX_stat *dest, size_t bytes) {
- return mdbx_env_stat_ex(env, NULL, dest, bytes);
+__cold int mdbx_env_stat(const MDBX_env *env, MDBX_stat *stat, size_t bytes) {
+ return __inline_mdbx_env_stat(env, stat, bytes);
}
-int __cold mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn,
+__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn,
MDBX_stat *dest, size_t bytes) {
if (unlikely((env == NULL && txn == NULL) || dest == NULL))
return MDBX_EINVAL;
@@ -19820,7 +20183,7 @@ int __cold mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn,
}
}
-int __cold mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi,
+__cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi,
uint32_t *mask) {
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -19871,11 +20234,12 @@ int __cold mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi,
return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc;
}
-int __cold mdbx_env_info(MDBX_env *env, MDBX_envinfo *arg, size_t bytes) {
- return mdbx_env_info_ex(env, NULL, arg, bytes);
+__cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info,
+ size_t bytes) {
+ return __inline_mdbx_env_info(env, info, bytes);
}
-int __cold mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
+__cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
MDBX_envinfo *arg, size_t bytes) {
if (unlikely((env == NULL && txn == NULL) || arg == NULL))
return MDBX_EINVAL;
@@ -20249,6 +20613,7 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags,
goto later_bailout;
dbiflags |= DBI_DIRTY | DBI_CREAT;
+ txn->mt_flags |= MDBX_TXN_DIRTY;
}
/* Got info, register DBI in this txn */
@@ -20266,6 +20631,8 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags,
txn->mt_dbistate[slot] = (uint8_t)dbiflags;
txn->mt_dbxs[slot].md_name.iov_base = namedup;
txn->mt_dbxs[slot].md_name.iov_len = len;
+ if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0)
+ txn->tw.cursors[slot] = NULL;
txn->mt_numdbs += (slot == txn->mt_numdbs);
if ((dbiflags & DBI_CREAT) == 0) {
env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID;
@@ -20294,7 +20661,7 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name,
return dbi_open(txn, table_name, table_flags, dbi, keycmp, datacmp);
}
-int __cold mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest,
+__cold int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest,
size_t bytes) {
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -20376,8 +20743,7 @@ int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags,
}
int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) {
- unsigned state;
- return mdbx_dbi_flags_ex(txn, dbi, flags, &state);
+ return __inline_mdbx_dbi_flags(txn, dbi, flags);
}
/* Add all the DB's pages to the free list.
@@ -20404,7 +20770,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
if (unlikely(rc))
goto done;
- mdbx_cursor_copy(mc, &mx);
+ cursor_copy_internal(mc, &mx);
while (mc->mc_snum > 0) {
MDBX_page *mp = mc->mc_pg[mc->mc_top];
unsigned n = page_numkeys(mp);
@@ -20446,7 +20812,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
break;
mdbx_cassert(mc, i <= UINT16_MAX);
mc->mc_ki[mc->mc_top] = (indx_t)i;
- rc = mdbx_cursor_sibling(mc, 1);
+ rc = mdbx_cursor_sibling(mc, SIBLING_RIGHT);
if (rc) {
if (unlikely(rc != MDBX_NOTFOUND))
goto done;
@@ -20501,7 +20867,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) {
rc = mdbx_drop0(mc, mc->mc_db->md_flags & MDBX_DUPSORT);
/* Invalidate the dropped DB's cursors */
- for (MDBX_cursor *m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
+ for (MDBX_cursor *m2 = txn->tw.cursors[dbi]; m2; m2 = m2->mc_next)
m2->mc_flags &= ~(C_INITIALIZED | C_EOF);
if (unlikely(rc))
goto bailout;
@@ -20566,7 +20932,7 @@ int mdbx_set_dupsort(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cmp_func *cmp) {
return MDBX_SUCCESS;
}
-int __cold mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func,
+__cold int mdbx_reader_list(const MDBX_env *env, MDBX_reader_list_func *func,
void *ctx) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
@@ -20666,7 +21032,7 @@ static bool __cold mdbx_pid_insert(uint32_t *ids, uint32_t pid) {
return true;
}
-int __cold mdbx_reader_check(MDBX_env *env, int *dead) {
+__cold int mdbx_reader_check(MDBX_env *env, int *dead) {
if (dead)
*dead = 0;
return mdbx_cleanup_dead_readers(env, false, dead);
@@ -20676,9 +21042,8 @@ int __cold mdbx_reader_check(MDBX_env *env, int *dead) {
* MDBX_RESULT_TRUE - done and mutex recovered
* MDBX_SUCCESS - done
* Otherwise errcode. */
-MDBX_INTERNAL_FUNC int __cold mdbx_cleanup_dead_readers(MDBX_env *env,
- int rdt_locked,
- int *dead) {
+MDBX_INTERNAL_FUNC __cold int
+mdbx_cleanup_dead_readers(MDBX_env *env, int rdt_locked, int *dead) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -20775,7 +21140,7 @@ MDBX_INTERNAL_FUNC int __cold mdbx_cleanup_dead_readers(MDBX_env *env,
return rc;
}
-int __cold mdbx_setup_debug(int loglevel, int flags, MDBX_debug_func *logger) {
+__cold int mdbx_setup_debug(int loglevel, int flags, MDBX_debug_func *logger) {
const int rc = mdbx_runtime_flags | (mdbx_loglevel << 16);
if (loglevel != MDBX_LOG_DONTCHANGE)
@@ -20893,7 +21258,7 @@ static txnid_t __cold mdbx_kick_longlived_readers(MDBX_env *env,
return mdbx_find_oldest(env->me_txn);
}
-int __cold mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) {
+__cold int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -20913,7 +21278,7 @@ int __cold mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
+__cold int mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -20933,7 +21298,7 @@ int __cold mdbx_env_set_syncperiod(MDBX_env *env, unsigned seconds_16dot16) {
return MDBX_SUCCESS;
}
-int __cold mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) {
+__cold int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr) {
int rc = check_env(env);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -20990,7 +21355,7 @@ typedef struct mdbx_walk_ctx {
bool mw_dont_check_keys_ordering;
} mdbx_walk_ctx_t;
-static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
+static __cold int mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
const char *name, int deep);
static MDBX_page_type_t walk_page_type(const MDBX_page *mp) {
@@ -21011,7 +21376,7 @@ static MDBX_page_type_t walk_page_type(const MDBX_page *mp) {
}
/* Depth-first tree traversal. */
-static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno,
+static __cold int mdbx_walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno,
const char *name, int deep,
txnid_t parent_txnid) {
assert(pgno != P_INVALID);
@@ -21249,7 +21614,7 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno,
return MDBX_SUCCESS;
}
-static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
+static __cold int mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
const char *name, int deep) {
if (unlikely(db->md_root == P_INVALID))
return MDBX_SUCCESS; /* empty db */
@@ -21272,7 +21637,7 @@ static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
return rc;
}
-int __cold mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
+__cold int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
void *user, bool dont_check_keys_ordering) {
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -21333,10 +21698,11 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) {
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
if (!(mc->mc_flags & C_INITIALIZED))
- return MDBX_RESULT_FALSE;
+ return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
for (unsigned i = 0; i < mc->mc_snum; ++i) {
if (mc->mc_ki[i])
@@ -21351,10 +21717,11 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) {
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
if (!(mc->mc_flags & C_INITIALIZED))
- return MDBX_RESULT_FALSE;
+ return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
for (unsigned i = 0; i < mc->mc_snum; ++i) {
unsigned nkeys = page_numkeys(mc->mc_pg[i]);
@@ -21370,7 +21737,8 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) {
return MDBX_EINVAL;
if (unlikely(mc->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
if ((mc->mc_flags & C_INITIALIZED) == 0)
return MDBX_RESULT_TRUE;
@@ -21401,9 +21769,13 @@ __hot static int cursor_diff(const MDBX_cursor *const __restrict x,
r->level = 0;
r->root_nkeys = 0;
- if (unlikely(y->mc_signature != MDBX_MC_LIVE ||
- x->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ if (unlikely(x->mc_signature != MDBX_MC_LIVE))
+ return (x->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
+
+ if (unlikely(y->mc_signature != MDBX_MC_LIVE))
+ return (y->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
int rc = check_txn(x->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -21569,7 +21941,8 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data,
return MDBX_EINVAL;
if (unlikely(cursor->mc_signature != MDBX_MC_LIVE))
- return MDBX_EBADSIGN;
+ return (cursor->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL
+ : MDBX_EBADSIGN;
int rc = check_txn(cursor->mc_txn, MDBX_TXN_BLOCKED);
if (unlikely(rc != MDBX_SUCCESS))
@@ -21579,7 +21952,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data,
return MDBX_ENODATA;
MDBX_cursor_couple next;
- mdbx_cursor_copy(cursor, &next.outer);
+ cursor_copy_internal(cursor, &next.outer);
next.outer.mc_xcursor = NULL;
if (cursor->mc_db->md_flags & MDBX_DUPSORT) {
next.outer.mc_xcursor = &next.inner;
@@ -21587,7 +21960,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data,
if (unlikely(rc != MDBX_SUCCESS))
return rc;
MDBX_xcursor *mx = &container_of(cursor, MDBX_cursor_couple, outer)->inner;
- mdbx_cursor_copy(&mx->mx_cursor, &next.inner.mx_cursor);
+ cursor_copy_internal(&mx->mx_cursor, &next.inner.mx_cursor);
}
MDBX_val stub = {0, 0};
@@ -21824,8 +22197,8 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key,
rc = mdbx_cursor_init(&cx.outer, txn, dbi);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- cx.outer.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &cx.outer;
+ cx.outer.mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = &cx.outer;
MDBX_val present_key = *key;
if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) {
@@ -21901,7 +22274,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key,
rc = mdbx_cursor_del(&cx.outer, flags & MDBX_ALLDUPS);
bailout:
- txn->mt_cursors[dbi] = cx.outer.mc_next;
+ txn->tw.cursors[dbi] = cx.outer.mc_next;
return rc;
}
@@ -22040,6 +22413,14 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result,
/*----------------------------------------------------------------------------*/
+__cold MDBX_NOTHROW_CONST_FUNCTION intptr_t mdbx_limits_pgsize_min(void) {
+ return __inline_mdbx_limits_pgsize_min();
+}
+
+__cold MDBX_NOTHROW_CONST_FUNCTION intptr_t mdbx_limits_pgsize_max(void) {
+ return __inline_mdbx_limits_pgsize_max();
+}
+
__cold intptr_t mdbx_limits_dbsize_min(intptr_t pagesize) {
if (pagesize < 1)
pagesize = (intptr_t)mdbx_syspagesize();
@@ -22143,6 +22524,14 @@ uint32_t mdbx_key_from_ptrfloat(const float *const ieee754_32bit) {
return float2key(ieee754_32bit);
}
+MDBX_NOTHROW_CONST_FUNCTION uint64_t mdbx_key_from_int64(const int64_t i64) {
+ return __inline_mdbx_key_from_int64(i64);
+}
+
+MDBX_NOTHROW_CONST_FUNCTION uint32_t mdbx_key_from_int32(const int32_t i32) {
+ return __inline_mdbx_key_from_int32(i32);
+}
+
#define IEEE754_DOUBLE_MANTISSA_SIZE 52
#define IEEE754_DOUBLE_EXPONENTA_BIAS 0x3FF
#define IEEE754_DOUBLE_EXPONENTA_MAX 0x7FF
@@ -22409,10 +22798,10 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
rc = mdbx_cursor_set(&cx.outer, key, &old_data, MDBX_SET, NULL);
if (unlikely(rc != MDBX_SUCCESS)) {
if (rc == MDBX_NOTFOUND && data) {
- cx.outer.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &cx.outer;
+ cx.outer.mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = &cx.outer;
rc = mdbx_cursor_put_attr(&cx.outer, key, data, attr, 0);
- txn->mt_cursors[dbi] = cx.outer.mc_next;
+ txn->tw.cursors[dbi] = cx.outer.mc_next;
}
return rc;
}
@@ -22427,11 +22816,11 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
old_data.iov_len) == 0)))
return MDBX_SUCCESS;
- cx.outer.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &cx.outer;
+ cx.outer.mc_next = txn->tw.cursors[dbi];
+ txn->tw.cursors[dbi] = &cx.outer;
rc = mdbx_cursor_put_attr(&cx.outer, key, data ? data : &old_data, attr,
MDBX_CURRENT);
- txn->mt_cursors[dbi] = cx.outer.mc_next;
+ txn->tw.cursors[dbi] = cx.outer.mc_next;
return rc;
}
#endif /* MDBX_NEXENTA_ATTRS */
@@ -22805,6 +23194,9 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 {
#ifndef STATUS_INVALID_DEVICE_REQUEST
#define STATUS_INVALID_DEVICE_REQUEST ((NTSTATUS)0xC0000010L)
#endif
+#ifndef STATUS_NOT_SUPPORTED
+#define STATUS_NOT_SUPPORTED ((NTSTATUS)0xC00000BBL)
+#endif
#ifndef FILE_DEVICE_FILE_SYSTEM
#define FILE_DEVICE_FILE_SYSTEM 0x00000009
@@ -23206,6 +23598,20 @@ MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname) {
#endif
}
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname) {
+#if defined(_WIN32) || defined(_WIN64)
+ const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
+ if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX)
+ return ERROR_INVALID_NAME;
+ wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t));
+ if (wlen != mbstowcs(pathnameW, pathname, wlen + 1))
+ return ERROR_INVALID_NAME;
+ return RemoveDirectoryW(pathnameW) ? MDBX_SUCCESS : GetLastError();
+#else
+ return rmdir(pathname) ? errno : MDBX_SUCCESS;
+#endif
+}
+
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
const MDBX_env *env, const char *pathname,
mdbx_filehandle_t *fd,
@@ -23256,6 +23662,12 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
FlagsAndAttributes |=
(env->me_psize < env->me_os_psize) ? 0 : FILE_FLAG_NO_BUFFERING;
break;
+ case MDBX_OPEN_DELETE:
+ CreationDisposition = OPEN_EXISTING;
+ ShareMode |= FILE_SHARE_DELETE;
+ DesiredAccess =
+ FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES | DELETE | SYNCHRONIZE;
+ break;
}
*fd = CreateFileW(pathnameW, DesiredAccess, ShareMode, NULL,
@@ -23304,6 +23716,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
flags |= O_FSYNC;
#endif
break;
+ case MDBX_OPEN_DELETE:
+ flags = O_RDWR;
+ break;
}
const bool direct_nocache_for_copy =
@@ -23729,7 +24144,8 @@ static int mdbx_check_fs_local(mdbx_filehandle_t handle, int flags) {
if (!(flags & MDBX_EXCLUSIVE))
return ERROR_REMOTE_STORAGE_MEDIA_ERROR;
} else if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED &&
- rc != STATUS_INVALID_DEVICE_REQUEST)
+ rc != STATUS_INVALID_DEVICE_REQUEST &&
+ rc != STATUS_NOT_SUPPORTED)
return ntstatus2errcode(rc);
}
@@ -24113,10 +24529,9 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
LARGE_INTEGER SectionSize;
int err, rc = MDBX_SUCCESS;
- if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current) {
+ if (!(flags & MDBX_RDONLY) && limit == map->limit && size > map->current &&
+ /* workaround for Wine */ mdbx_NtExtendSection) {
/* growth rw-section */
- if (!mdbx_NtExtendSection)
- return MDBX_UNABLE_EXTEND_MAPSIZE /* workaround for Wine */;
SectionSize.QuadPart = size;
status = mdbx_NtExtendSection(map->section, &SectionSize);
if (!NT_SUCCESS(status))
@@ -24173,6 +24588,7 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
return err;
}
+retry_file_and_section:
/* resizing of the file may take a while,
* therefore we reserve address space to avoid occupy it by other threads */
ReservedAddress = map->address;
@@ -24188,7 +24604,6 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
map->address = NULL;
}
-retry_file_and_section:
err = mdbx_filesize(map->fd, &map->filesize);
if (err != MDBX_SUCCESS)
goto bailout;
@@ -24255,7 +24670,7 @@ retry_mapview:;
* but will return MDBX_UNABLE_EXTEND_MAPSIZE on success */
rc = MDBX_UNABLE_EXTEND_MAPSIZE;
size = map->current;
- limit = map->limit;
+ ReservedSize = limit = map->limit;
goto retry_file_and_section;
}
@@ -24997,10 +25412,10 @@ __dll_export
const struct MDBX_version_info mdbx_version = {
0,
9,
- 1,
- 18,
- {"2020-10-08T01:50:18+03:00", "5cbfdfdc65b62937f7fc927e55cec6304bf04f7c", "1d31ebdc1c930ceb1b223691ac27e507d86cf8e2",
- "v0.9.1-18-g1d31ebdc1c"},
+ 2,
+ 0,
+ {"2020-11-27T10:09:03+03:00", "481b41a5b377ff247625bd2cec60c0ca84f2f2f5", "092ab094c4f8e7c704030568db62bad5d106755f",
+ "v0.9.2-0-g092ab09"},
sourcery};
__dll_export
@@ -25221,6 +25636,15 @@ MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
}
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
+ return flock(fd,
+ wait ? LCK_EXCLUSIVE | LCK_WAITFOR
+ : LCK_EXCLUSIVE | LCK_DONTWAIT,
+ 0, LCK_MAXLEN)
+ ? MDBX_SUCCESS
+ : GetLastError();
+}
+
static int suspend_and_append(mdbx_handle_array_t **array,
const DWORD ThreadId) {
const unsigned limit = (*array)->limit;
@@ -25866,13 +26290,17 @@ static void mdbx_winnt_import(void) {
#ifndef MDBX_ALLOY
uint32_t mdbx_linux_kernel_version;
-bool mdbx_RunningOnWSL;
+bool mdbx_RunningOnWSL1;
#endif /* MDBX_ALLOY */
-static __cold bool probe_for_WSL(const char *tag) {
- /* "Official" way of detecting WSL but not WSL2
- * https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
- return strstr(tag, "Microsoft") || strstr(tag, "WSL");
+static __cold uint8_t probe_for_WSL(const char *tag) {
+ const char *const WSL = strstr(tag, "WSL");
+ if (WSL && WSL[3] >= '2' && WSL[3] <= '9')
+ return WSL[3] - '0';
+ const char *const wsl = strstr(tag, "wsl");
+ if (wsl && wsl[3] >= '2' && wsl[3] <= '9')
+ return wsl[3] - '0';
+ return (WSL || wsl || strcasestr(tag, "Microsoft")) ? 1 : 0;
}
#endif /* Linux */
@@ -25882,9 +26310,16 @@ mdbx_global_constructor(void) {
#if defined(__linux__) || defined(__gnu_linux__)
struct utsname buffer;
if (uname(&buffer) == 0) {
- mdbx_RunningOnWSL = probe_for_WSL(buffer.version) ||
- probe_for_WSL(buffer.sysname) ||
- probe_for_WSL(buffer.release);
+ /* "Official" way of detecting WSL1 but not WSL2
+ * https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364
+ *
+ * WARNING: False negative detection of WSL1 will result in DATA LOSS!
+ * So, the REQUIREMENTS for this code:
+ * 1. MUST detect WSL1 without false-negatives.
+ * 2. DESIRABLE detect WSL2 but without the risk of violating the first. */
+ mdbx_RunningOnWSL1 = probe_for_WSL(buffer.version) == 1 ||
+ probe_for_WSL(buffer.sysname) == 1 ||
+ probe_for_WSL(buffer.release) == 1;
int i = 0;
char *p = buffer.release;
while (*p && i < 4) {
@@ -26030,6 +26465,14 @@ static int lck_op(mdbx_filehandle_t fd, int cmd, int lck, off_t offset,
}
}
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait) {
+#if MDBX_USE_OFDLOCKS
+ if (unlikely(op_setlk == 0))
+ choice_fcntl();
+#endif /* MDBX_USE_OFDLOCKS */
+ return lck_op(fd, wait ? op_setlkw : op_setlk, F_WRLCK, 0, OFF_T_MAX);
+}
+
MDBX_INTERNAL_FUNC int mdbx_rpid_set(MDBX_env *env) {
assert(env->me_lfd != INVALID_HANDLE_VALUE);
assert(env->me_pid > 0);
@@ -26146,10 +26589,10 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
int rc = MDBX_SUCCESS;
#if defined(__linux__) || defined(__gnu_linux__)
- if (unlikely(mdbx_RunningOnWSL)) {
+ if (unlikely(mdbx_RunningOnWSL1)) {
rc = ENOLCK /* No record locks available */;
mdbx_error("%s, err %u",
- "WSL (Windows Subsystem for Linux) is mad and trouble-full, "
+ "WSL1 (Windows Subsystem for Linux) is mad and trouble-full, "
"injecting failure to avoid data loss",
rc);
return rc;
diff --git a/libs/libmdbx/src/mdbx.c++ b/libs/libmdbx/src/mdbx.c++
index ef4cb789fb..e3c3552b39 100644
--- a/libs/libmdbx/src/mdbx.c++
+++ b/libs/libmdbx/src/mdbx.c++
@@ -12,11 +12,16 @@
* <http://www.OpenLDAP.org/license.html>. */
#define MDBX_ALLOY 1
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -97,11 +102,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h++"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -817,7 +817,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -993,6 +993,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1034,8 +1063,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1088,8 +1117,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1160,7 +1188,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1169,7 +1198,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1429,32 +1460,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1890,7 +1895,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1974,7 +1979,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2222,7 +2227,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2381,8 +2386,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2409,6 +2412,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2546,7 +2551,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2556,7 +2561,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2832,7 +2837,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
@@ -3441,15 +3446,15 @@ __cold void error::throw_exception() const {
bool slice::is_printable(bool disable_utf8) const noexcept {
enum : byte {
- LS = 5, // shift for UTF8 sequence length
- P_ = 1 << (LS - 1), // printable ASCII flag
+ LS = 4, // shift for UTF8 sequence length
+ P_ = 1 << LS, // printable ASCII flag
N_ = 0, // non-printable ASCII
second_range_mask = P_ - 1, // mask for range flag
- r80_BF = P_ | 0, // flag for UTF8 2nd byte range
- rA0_BF = P_ | 1, // flag for UTF8 2nd byte range
- r80_9F = P_ | 2, // flag for UTF8 2nd byte range
- r90_BF = P_ | 3, // flag for UTF8 2nd byte range
- r80_8F = P_ | 4, // flag for UTF8 2nd byte range
+ r80_BF = 0, // flag for UTF8 2nd byte range
+ rA0_BF = 1, // flag for UTF8 2nd byte range
+ r80_9F = 2, // flag for UTF8 2nd byte range
+ r90_BF = 3, // flag for UTF8 2nd byte range
+ r80_8F = 4, // flag for UTF8 2nd byte range
// valid utf-8 byte sequences
// http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
@@ -4117,9 +4122,35 @@ bool env::is_pristine() const {
bool env::is_empty() const { return get_stat().ms_branch_pages == 0; }
-env &env::copy(const path &destination, bool compactify,
+#ifdef MDBX_STD_FILESYSTEM_PATH
+env &env::copy(const ::std::filesystem::path &destination, bool compactify,
+ bool force_dynamic_size) {
+ const path_to_pchar<::std::filesystem::path> utf8(destination);
+ error::success_or_throw(
+ ::mdbx_env_copy(handle_, utf8,
+ (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) |
+ (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE
+ : MDBX_CP_DEFAULTS)));
+ return *this;
+}
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+
+#if defined(_WIN32) || defined(_WIN64)
+env &env::copy(const ::std::wstring &destination, bool compactify,
+ bool force_dynamic_size) {
+ const path_to_pchar<::std::wstring> utf8(destination);
+ error::success_or_throw(
+ ::mdbx_env_copy(handle_, utf8,
+ (compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) |
+ (force_dynamic_size ? MDBX_CP_FORCE_DYNAMIC_SIZE
+ : MDBX_CP_DEFAULTS)));
+ return *this;
+}
+#endif /* Windows */
+
+env &env::copy(const ::std::string &destination, bool compactify,
bool force_dynamic_size) {
- const path_to_pchar<path> utf8(destination);
+ const path_to_pchar<::std::string> utf8(destination);
error::success_or_throw(
::mdbx_env_copy(handle_, utf8,
(compactify ? MDBX_CP_COMPACT : MDBX_CP_DEFAULTS) |
@@ -4143,6 +4174,29 @@ path env::get_path() const {
return pchar_to_path<path>(c_str);
}
+#ifdef MDBX_STD_FILESYSTEM_PATH
+bool env::remove(const ::std::filesystem::path &pathname,
+ const remove_mode mode) {
+ const path_to_pchar<::std::filesystem::path> utf8(pathname);
+ return error::boolean_or_throw(
+ ::mdbx_env_delete(utf8, MDBX_env_delete_mode_t(mode)));
+}
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+
+#if defined(_WIN32) || defined(_WIN64)
+bool env::remove(const ::std::wstring &pathname, const remove_mode mode) {
+ const path_to_pchar<::std::wstring> utf8(pathname);
+ return error::boolean_or_throw(
+ ::mdbx_env_delete(utf8, MDBX_env_delete_mode_t(mode)));
+}
+#endif /* Windows */
+
+bool env::remove(const ::std::string &pathname, const remove_mode mode) {
+ const path_to_pchar<::std::string> utf8(pathname);
+ return error::boolean_or_throw(
+ ::mdbx_env_delete(utf8, MDBX_env_delete_mode_t(mode)));
+}
+
//------------------------------------------------------------------------------
static inline MDBX_env *create_env() {
@@ -4179,11 +4233,43 @@ __cold void env_managed::setup(unsigned max_maps, unsigned max_readers) {
error::success_or_throw(::mdbx_env_set_maxdbs(handle_, max_maps));
}
-__cold env_managed::env_managed(const path &pathname,
+#ifdef MDBX_STD_FILESYSTEM_PATH
+__cold env_managed::env_managed(const ::std::filesystem::path &pathname,
+ const operate_parameters &op, bool accede)
+ : env_managed(create_env()) {
+ setup(op.max_maps, op.max_readers);
+ const path_to_pchar<::std::filesystem::path> utf8(pathname);
+ error::success_or_throw(
+ ::mdbx_env_open(handle_, utf8, op.make_flags(accede), 0));
+
+ if (op.options.nested_write_transactions &&
+ !get_options().nested_write_transactions)
+ error::throw_exception(MDBX_INCOMPATIBLE);
+}
+
+__cold env_managed::env_managed(const ::std::filesystem::path &pathname,
+ const env_managed::create_parameters &cp,
+ const env::operate_parameters &op, bool accede)
+ : env_managed(create_env()) {
+ setup(op.max_maps, op.max_readers);
+ const path_to_pchar<::std::filesystem::path> utf8(pathname);
+ set_geometry(cp.geometry);
+ error::success_or_throw(
+ ::mdbx_env_open(handle_, utf8, op.make_flags(accede, cp.use_subdirectory),
+ cp.file_mode_bits));
+
+ if (op.options.nested_write_transactions &&
+ !get_options().nested_write_transactions)
+ error::throw_exception(MDBX_INCOMPATIBLE);
+}
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+
+#if defined(_WIN32) || defined(_WIN64)
+__cold env_managed::env_managed(const ::std::wstring &pathname,
const operate_parameters &op, bool accede)
: env_managed(create_env()) {
setup(op.max_maps, op.max_readers);
- const path_to_pchar<path> utf8(pathname);
+ const path_to_pchar<::std::wstring> utf8(pathname);
error::success_or_throw(
::mdbx_env_open(handle_, utf8, op.make_flags(accede), 0));
@@ -4192,12 +4278,42 @@ __cold env_managed::env_managed(const path &pathname,
error::throw_exception(MDBX_INCOMPATIBLE);
}
-__cold env_managed::env_managed(const path &pathname,
+__cold env_managed::env_managed(const ::std::wstring &pathname,
const env_managed::create_parameters &cp,
const env::operate_parameters &op, bool accede)
: env_managed(create_env()) {
setup(op.max_maps, op.max_readers);
- const path_to_pchar<path> utf8(pathname);
+ const path_to_pchar<::std::wstring> utf8(pathname);
+ set_geometry(cp.geometry);
+ error::success_or_throw(
+ ::mdbx_env_open(handle_, utf8, op.make_flags(accede, cp.use_subdirectory),
+ cp.file_mode_bits));
+
+ if (op.options.nested_write_transactions &&
+ !get_options().nested_write_transactions)
+ error::throw_exception(MDBX_INCOMPATIBLE);
+}
+#endif /* Windows */
+
+__cold env_managed::env_managed(const ::std::string &pathname,
+ const operate_parameters &op, bool accede)
+ : env_managed(create_env()) {
+ setup(op.max_maps, op.max_readers);
+ const path_to_pchar<::std::string> utf8(pathname);
+ error::success_or_throw(
+ ::mdbx_env_open(handle_, utf8, op.make_flags(accede), 0));
+
+ if (op.options.nested_write_transactions &&
+ !get_options().nested_write_transactions)
+ error::throw_exception(MDBX_INCOMPATIBLE);
+}
+
+__cold env_managed::env_managed(const ::std::string &pathname,
+ const env_managed::create_parameters &cp,
+ const env::operate_parameters &op, bool accede)
+ : env_managed(create_env()) {
+ setup(op.max_maps, op.max_readers);
+ const path_to_pchar<::std::string> utf8(pathname);
set_geometry(cp.geometry);
error::success_or_throw(
::mdbx_env_open(handle_, utf8, op.make_flags(accede, cp.use_subdirectory),
@@ -4300,9 +4416,13 @@ __cold ::std::ostream &operator<<(::std::ostream &out, const slice &it) {
out << "EMPTY->" << it.data();
else {
const slice root(it.head(std::min(it.length(), size_t(64))));
- out << it.length() << "->"
- << (root.is_printable() ? root.string() : root.base58_encode())
- << ((root == it) ? "" : "...");
+ out << it.length() << ".";
+ if (root.is_printable())
+ (out << "\"").write(root.char_ptr(), root.length()) << "\"";
+ else
+ out << root.base58_encode();
+ if (root.length() < it.length())
+ out << "...";
}
return out << "}";
}
@@ -4311,6 +4431,11 @@ __cold ::std::ostream &operator<<(::std::ostream &out, const pair &it) {
return out << "{" << it.key << " => " << it.value << "}";
}
+__cold ::std::ostream &operator<<(::std::ostream &out, const pair_result &it) {
+ return out << "{" << (it.done ? "done: " : "non-done: ") << it.key << " => "
+ << it.value << "}";
+}
+
__cold ::std::ostream &operator<<(::std::ostream &out,
const ::mdbx::env::geometry::size &it) {
switch (it.bytes) {
diff --git a/libs/libmdbx/src/mdbx.h b/libs/libmdbx/src/mdbx.h
index 4cf29e4502..966cf2e701 100644
--- a/libs/libmdbx/src/mdbx.h
+++ b/libs/libmdbx/src/mdbx.h
@@ -350,6 +350,17 @@ typedef mode_t mdbx_mode_t;
#endif
#endif /* __dll_import */
+/** \brief Auxiliary macro for robustly define the both inline version of API
+ * function and non-inline fallback dll-exported version for applications linked
+ * with old version of libmdbx, with a strictly ODR-common implementation. */
+#if !defined(LIBMDBX_INTERNALS)
+#define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) static __inline TYPE NAME ARGS
+#else
+#define LIBMDBX_INLINE_API(TYPE, NAME, ARGS) \
+ /* proto of exported which uses common impl */ LIBMDBX_API TYPE NAME ARGS; \
+ /* definition of common impl */ static __inline TYPE __inline_##NAME ARGS
+#endif /* LIBMDBX_INLINE_API */
+
/*----------------------------------------------------------------------------*/
#ifndef __cplusplus
@@ -440,8 +451,27 @@ typedef mode_t mdbx_mode_t;
#endif
#endif /* MDBX_PRINTF_ARGS */
+/* Oh, below are some songs and dances since:
+ * - C++ requires explicit definition of the necessary operators.
+ * - the proper implementation of DEFINE_ENUM_FLAG_OPERATORS for C++ required
+ * the constexpr feature which is broken in most old compilers;
+ * - DEFINE_ENUM_FLAG_OPERATORS may be defined broken as in the Windows SDK. */
#ifndef DEFINE_ENUM_FLAG_OPERATORS
-#if defined(__cplusplus)
+
+#ifdef __cplusplus
+#if !defined(__cpp_constexpr) || __cpp_constexpr < 200704L || \
+ (defined(__LCC__) && __LCC__ < 124) || \
+ (defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ < 407) && \
+ !defined(__clang__) && !defined(__LCC__)) || \
+ (defined(_MSC_VER) && _MSC_VER < 1910) || \
+ (defined(__clang__) && __clang_major__ < 4)
+/* The constexpr feature is not available or (may be) broken */
+#define CONSTEXPR_ENUM_FLAGS_OPERATIONS 0
+#else
+/* C always allows these operators for enums */
+#define CONSTEXPR_ENUM_FLAGS_OPERATIONS 1
+#endif /* __cpp_constexpr */
+
/// Define operator overloads to enable bit operations on enum values that are
/// used to define flags (based on Microsoft's DEFINE_ENUM_FLAG_OPERATORS).
#define DEFINE_ENUM_FLAG_OPERATORS(ENUM) \
@@ -453,19 +483,41 @@ typedef mode_t mdbx_mode_t;
MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, ENUM b) { \
return ENUM(std::size_t(a) & std::size_t(b)); \
} \
+ MDBX_CXX01_CONSTEXPR ENUM operator&(ENUM a, size_t b) { \
+ return ENUM(std::size_t(a) & b); \
+ } \
+ MDBX_CXX01_CONSTEXPR ENUM operator&(size_t a, ENUM b) { \
+ return ENUM(a & std::size_t(b)); \
+ } \
MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, ENUM b) { return a = a & b; } \
- MDBX_CXX01_CONSTEXPR ENUM operator~(ENUM a) { \
- return ENUM(~std::size_t(a)); \
+ MDBX_CXX14_CONSTEXPR ENUM &operator&=(ENUM &a, size_t b) { \
+ return a = a & b; \
+ } \
+ MDBX_CXX01_CONSTEXPR std::size_t operator~(ENUM a) { \
+ return ~std::size_t(a); \
} \
MDBX_CXX01_CONSTEXPR ENUM operator^(ENUM a, ENUM b) { \
return ENUM(std::size_t(a) ^ std::size_t(b)); \
} \
MDBX_CXX14_CONSTEXPR ENUM &operator^=(ENUM &a, ENUM b) { return a = a ^ b; } \
}
-#else /* __cplusplus */
-#define DEFINE_ENUM_FLAG_OPERATORS(ENUM) /* nope, C allows these operators */
-#endif /* !__cplusplus */
-#endif /* DEFINE_ENUM_FLAG_OPERATORS */
+#else /* __cplusplus */
+/* nope for C since it always allows these operators for enums */
+#define DEFINE_ENUM_FLAG_OPERATORS(ENUM)
+#define CONSTEXPR_ENUM_FLAGS_OPERATIONS 1
+#endif /* !__cplusplus */
+
+#elif !defined(CONSTEXPR_ENUM_FLAGS_OPERATIONS)
+
+#ifdef __cplusplus
+/* DEFINE_ENUM_FLAG_OPERATORS may be defined broken as in the Windows SDK */
+#define CONSTEXPR_ENUM_FLAGS_OPERATIONS 0
+#else
+/* C always allows these operators for enums */
+#define CONSTEXPR_ENUM_FLAGS_OPERATIONS 1
+#endif
+
+#endif /* DEFINE_ENUM_FLAG_OPERATORS */
/** @} end of Common Macros */
@@ -1120,7 +1172,7 @@ enum MDBX_env_flags_t {
/** Don't sync anything but keep previous steady commits.
*
- * Like \ref MDBX_UTTERLY_NOSYNC the `MDBX_SAFE_NOSYNC` flag similarly disable
+ * Like \ref MDBX_UTTERLY_NOSYNC the `MDBX_SAFE_NOSYNC` flag disable similarly
* flush system buffers to disk when committing a transaction. But there is a
* huge difference in how are recycled the MVCC snapshots corresponding to
* previous "steady" transactions (see below).
@@ -1202,7 +1254,7 @@ enum MDBX_env_flags_t {
* - a system crash immediately after commit the write transaction
* high likely lead to database corruption.
* - successful completion of mdbx_env_sync(force = true) after one or
- * more commited transactions guarantees consistency and durability.
+ * more committed transactions guarantees consistency and durability.
* - BUT by committing two or more transactions you back database into
* a weak state, in which a system crash may lead to database corruption!
* In case single transaction after mdbx_env_sync, you may lose transaction
@@ -1251,10 +1303,10 @@ enum MDBX_txn_flags_t {
* will be ready for use with \ref mdbx_txn_renew(). This flag allows to
* preallocate memory and assign a reader slot, thus avoiding these operations
* at the next start of the transaction. */
-#if defined(__cplusplus) && !defined(__cpp_constexpr) && !defined(DOXYGEN)
- MDBX_TXN_RDONLY_PREPARE = uint32_t(MDBX_RDONLY) | uint32_t(MDBX_NOMEMINIT),
+#if CONSTEXPR_ENUM_FLAGS_OPERATIONS || defined(DOXYGEN)
+ MDBX_TXN_RDONLY_PREPARE = MDBX_RDONLY | MDBX_NOMEMINIT,
#else
- MDBX_TXN_RDONLY_PREPARE = MDBX_RDONLY + MDBX_NOMEMINIT,
+ MDBX_TXN_RDONLY_PREPARE = uint32_t(MDBX_RDONLY) | uint32_t(MDBX_NOMEMINIT),
#endif
/** Do not block when starting a write transaction. */
@@ -1295,9 +1347,9 @@ enum MDBX_db_flags_t {
/** With \ref MDBX_DUPSORT; sorted dup items have fixed size */
MDBX_DUPFIXED = UINT32_C(0x10),
- /** With \ref MDBX_DUPSORT; dups are \ref MDBX_INTEGERKEY -style integers. The
- * data values must all be of the same size and must be aligned while passing
- * as arguments. */
+ /** With \ref MDBX_DUPSORT and with \ref MDBX_DUPFIXED; dups are fixed size
+ * \ref MDBX_INTEGERKEY -style integers. The data values must all be of the
+ * same size and must be aligned while passing as arguments. */
MDBX_INTEGERDUP = UINT32_C(0x20),
/** With \ref MDBX_DUPSORT; use reverse string comparison */
@@ -1462,7 +1514,20 @@ enum MDBX_cursor_op {
/** \ref MDBX_DUPFIXED -only: Position at previous page and return up to
* a page of duplicate data items. */
- MDBX_PREV_MULTIPLE
+ MDBX_PREV_MULTIPLE,
+
+ /** Position at first key-value pair greater than or equal to specified,
+ * return both key and data, and the return code depends on a exact match.
+ *
+ * For non DUPSORT-ed collections this work the same to \ref MDBX_SET_RANGE,
+ * but returns \ref MDBX_SUCCESS if key found exactly and
+ * \ref MDBX_RESULT_TRUE if greater key was found.
+ *
+ * For DUPSORT-ed a data value is taken into account for duplicates,
+ * i.e. for a pairs/tuples of a key and an each data value of duplicates.
+ * Returns \ref MDBX_SUCCESS if key-value pair found exactly and
+ * \ref MDBX_RESULT_TRUE if the next pair was returned. */
+ MDBX_SET_LOWERBOUND
};
#ifndef __cplusplus
/** \ingroup c_cursors */
@@ -1737,8 +1802,9 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* \param [in] env An environment handle returned
* by \ref mdbx_env_create()
*
- * \param [in] pathname The directory in which the database files reside.
- * This directory must already exist and be writable.
+ * \param [in] pathname The pathname for the database or the directory in which
+ * the database files reside. In the case of directory it
+ * must already exist and be writable.
*
* \param [in] flags Special options for this environment. This parameter
* must be set to 0 or by bitwise OR'ing together one
@@ -1800,6 +1866,49 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *pathname,
MDBX_env_flags_t flags, mdbx_mode_t mode);
+/** \brief Deletion modes for \ref mdbx_env_delete().
+ * \ingroup c_extra
+ * \see mdbx_env_delete() */
+enum MDBX_env_delete_mode_t {
+ /** \brief Just delete the environment's files and directory if any.
+ * \note On POSIX systems, processes already working with the database will
+ * continue to work without interference until it close the environment.
+ * \note On Windows, the behavior of `MDB_ENV_JUST_DELETE` is different
+ * because the system does not support deleting files that are currently
+ * memory mapped. */
+ MDBX_ENV_JUST_DELETE = 0,
+ /** \brief Make sure that the environment is not being used by other
+ * processes, or return an error otherwise. */
+ MDBX_ENV_ENSURE_UNUSED = 1,
+ /** \brief Wait until other processes closes the environment before deletion.
+ */
+ MDBX_ENV_WAIT_FOR_UNUSED = 2,
+};
+#ifndef __cplusplus
+/** \ingroup c_extra */
+typedef enum MDBX_env_delete_mode_t MDBX_env_delete_mode_t;
+#endif
+
+/** \brief Delete the environment's files in a proper and multiprocess-safe way.
+ * \ingroup c_extra
+ *
+ * \param [in] pathname The pathname for the database or the directory in which
+ * the database files reside.
+ *
+ * \param [in] mode Special deletion mode for the environment. This
+ * parameter must be set to one of the values described
+ * above in the \ref MDBX_env_delete_mode_t section.
+ *
+ * \note The \ref MDBX_ENV_JUST_DELETE don't supported on Windows since system
+ * unable to delete a memory-mapped files.
+ *
+ * \returns A non-zero error value on failure and 0 on success,
+ * some possible errors are:
+ * \retval MDBX_RESULT_TRUE No corresponding files or directories were found,
+ * so no deletion was performed. */
+LIBMDBX_API int mdbx_env_delete(const char *pathname,
+ MDBX_env_delete_mode_t mode);
+
/** \brief Copy an MDBX environment to the specified path, with options.
* \ingroup c_extra
*
@@ -1867,7 +1976,7 @@ struct MDBX_stat {
uint64_t ms_leaf_pages; /**< Number of leaf pages */
uint64_t ms_overflow_pages; /**< Number of overflow pages */
uint64_t ms_entries; /**< Number of data items */
- uint64_t ms_mod_txnid; /**< Transaction ID of commited last modification */
+ uint64_t ms_mod_txnid; /**< Transaction ID of committed last modification */
};
#ifndef __cplusplus
/** \ingroup c_statinfo */
@@ -1895,11 +2004,15 @@ typedef struct MDBX_stat MDBX_stat;
* \returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn,
MDBX_stat *stat, size_t bytes);
+
/** \brief Return statistics about the MDBX environment.
* \ingroup c_statinfo
* \deprecated Please use mdbx_env_stat_ex() instead. */
-MDBX_DEPRECATED LIBMDBX_API int mdbx_env_stat(MDBX_env *env, MDBX_stat *stat,
- size_t bytes);
+MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_stat,
+ (const MDBX_env *env, MDBX_stat *stat,
+ size_t bytes)) {
+ return mdbx_env_stat_ex(env, NULL, stat, bytes);
+}
/** \brief Information about the environment
* \ingroup c_statinfo
@@ -1985,8 +2098,11 @@ LIBMDBX_API int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
/** \brief Return information about the MDBX environment.
* \ingroup c_statinfo
* \deprecated Please use mdbx_env_info_ex() instead. */
-MDBX_DEPRECATED LIBMDBX_API int mdbx_env_info(MDBX_env *env, MDBX_envinfo *info,
- size_t bytes);
+MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_info,
+ (const MDBX_env *env, MDBX_envinfo *info,
+ size_t bytes)) {
+ return mdbx_env_info_ex(env, NULL, info, bytes);
+}
/** \brief Flush the environment data buffers to disk.
* \ingroup c_extra
@@ -2028,12 +2144,16 @@ LIBMDBX_API int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock);
/** \brief The shortcut to calling \ref mdbx_env_sync_ex() with
* the `force=true` and `nonblock=false` arguments.
* \ingroup c_extra */
-LIBMDBX_API int mdbx_env_sync(MDBX_env *env);
+LIBMDBX_INLINE_API(int, mdbx_env_sync, (MDBX_env * env)) {
+ return mdbx_env_sync_ex(env, true, false);
+}
/** \brief The shortcut to calling \ref mdbx_env_sync_ex() with
* the `force=false` and `nonblock=true` arguments.
* \ingroup c_extra */
-LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
+LIBMDBX_INLINE_API(int, mdbx_env_sync_poll, (MDBX_env * env)) {
+ return mdbx_env_sync_ex(env, false, true);
+}
/** \brief Sets threshold to force flush the data buffers to disk, even any of
* \ref MDBX_SAFE_NOSYNC flag in the environment.
@@ -2135,7 +2255,9 @@ LIBMDBX_API int mdbx_env_close_ex(MDBX_env *env, bool dont_sync);
/** \brief The shortcut to calling \ref mdbx_env_close_ex() with
* the `dont_sync=false` argument.
* \ingroup c_opening */
-LIBMDBX_API int mdbx_env_close(MDBX_env *env);
+LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) {
+ return mdbx_env_close_ex(env, false);
+}
/** \brief Set environment flags.
* \ingroup c_settings
@@ -2356,9 +2478,11 @@ LIBMDBX_API int mdbx_env_get_fd(const MDBX_env *env, mdbx_filehandle_t *fd);
* means "keep current or use default".
*
* \param [in] shrink_threshold The shrink threshold in bytes, must be greater
- * than zero to allow the database to shrink.
+ * than zero to allow the database to shrink and
+ * greater than growth_step to avoid shrinking
+ * right after grow.
* Negative value means "keep current
- * or use default".
+ * or use default". Default is 2*growth_step.
*
* \param [in] pagesize The database page size for new database
* creation or -1 otherwise. Must be power of 2
@@ -2389,8 +2513,10 @@ LIBMDBX_API int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower,
/** \deprecated Please use \ref mdbx_env_set_geometry() instead.
* \ingroup c_settings */
-MDBX_DEPRECATED LIBMDBX_API int mdbx_env_set_mapsize(MDBX_env *env,
- size_t size);
+MDBX_DEPRECATED LIBMDBX_INLINE_API(int, mdbx_env_set_mapsize,
+ (MDBX_env * env, size_t size)) {
+ return mdbx_env_set_geometry(env, size, size, size, -1, -1, -1);
+}
/** \brief Find out whether to use readahead or not, based on the given database
* size and the amount of available memory. \ingroup c_extra
@@ -2411,13 +2537,15 @@ LIBMDBX_API int mdbx_is_readahead_reasonable(size_t volume,
/** \brief Returns the minimal database page size in bytes.
* \ingroup c_statinfo */
-MDBX_NOTHROW_CONST_FUNCTION __inline intptr_t mdbx_limits_pgsize_min(void) {
+MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_min,
+ (void)) {
return MDBX_MIN_PAGESIZE;
}
/** \brief Returns the maximal database page size in bytes.
* \ingroup c_statinfo */
-MDBX_NOTHROW_CONST_FUNCTION __inline intptr_t mdbx_limits_pgsize_max(void) {
+MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(intptr_t, mdbx_limits_pgsize_max,
+ (void)) {
return MDBX_MAX_PAGESIZE;
}
@@ -2574,12 +2702,13 @@ LIBMDBX_API int mdbx_env_set_userctx(MDBX_env *env, void *ctx);
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API void *
mdbx_env_get_userctx(const MDBX_env *env);
-/** \brief Create a transaction for use with the environment.
+/** \brief Create a transaction with a user provided context pointer
+ * for use with the environment.
* \ingroup c_transactions
*
* The transaction handle may be discarded using \ref mdbx_txn_abort()
* or \ref mdbx_txn_commit().
- * \see mdbx_txn_begin_ex()
+ * \see mdbx_txn_begin()
*
* \note A transaction and its cursors must only be used by a single thread,
* and a thread may only have a single transaction at a time. If \ref MDBX_NOTLS
@@ -2587,7 +2716,8 @@ mdbx_env_get_userctx(const MDBX_env *env);
*
* \note Cursors may not span transactions.
*
- * \param [in] env An environment handle returned by \ref mdbx_env_create()
+ * \param [in] env An environment handle returned by \ref mdbx_env_create().
+ *
* \param [in] parent If this parameter is non-NULL, the new transaction will
* be a nested transaction, with the transaction indicated
* by parent as its parent. Transactions may be nested
@@ -2595,6 +2725,7 @@ mdbx_env_get_userctx(const MDBX_env *env);
* not issue any other operations than mdbx_txn_commit and
* \ref mdbx_txn_abort() while it has active child
* transactions.
+ *
* \param [in] flags Special options for this transaction. This parameter
* must be set to 0 or by bitwise OR'ing together one
* or more of the values described here:
@@ -2611,6 +2742,10 @@ mdbx_env_get_userctx(const MDBX_env *env);
*
* \param [out] txn Address where the new MDBX_txn handle will be stored.
*
+ * \param [in] context A pointer to application context to be associated with
+ * created transaction and could be retrieved by
+ * \ref mdbx_txn_get_userctx() until transaction finished.
+ *
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_PANIC A fatal error occurred earlier and the
@@ -2625,16 +2760,16 @@ mdbx_env_get_userctx(const MDBX_env *env);
* \retval MDBX_ENOMEM Out of memory.
* \retval MDBX_BUSY The write transaction is already started by the
* current thread. */
-LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent,
- MDBX_txn_flags_t flags, MDBX_txn **txn);
+LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent,
+ MDBX_txn_flags_t flags, MDBX_txn **txn,
+ void *context);
-/** \brief Create a transaction with a user provided context pointer
- * for use with the environment.
+/** \brief Create a transaction for use with the environment.
* \ingroup c_transactions
*
* The transaction handle may be discarded using \ref mdbx_txn_abort()
* or \ref mdbx_txn_commit().
- * \see mdbx_txn_begin()
+ * \see mdbx_txn_begin_ex()
*
* \note A transaction and its cursors must only be used by a single thread,
* and a thread may only have a single transaction at a time. If \ref MDBX_NOTLS
@@ -2668,10 +2803,6 @@ LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent,
*
* \param [out] txn Address where the new MDBX_txn handle will be stored.
*
- * \param [in] context A pointer to application context to be associated with
- * created transaction and could be retrieved by
- * \ref mdbx_txn_get_userctx() until transaction finished.
- *
* \returns A non-zero error value on failure and 0 on success,
* some possible errors are:
* \retval MDBX_PANIC A fatal error occurred earlier and the
@@ -2686,9 +2817,11 @@ LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent,
* \retval MDBX_ENOMEM Out of memory.
* \retval MDBX_BUSY The write transaction is already started by the
* current thread. */
-LIBMDBX_API int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent,
- MDBX_txn_flags_t flags, MDBX_txn **txn,
- void *context);
+LIBMDBX_INLINE_API(int, mdbx_txn_begin,
+ (MDBX_env * env, MDBX_txn *parent, MDBX_txn_flags_t flags,
+ MDBX_txn **txn)) {
+ return mdbx_txn_begin_ex(env, parent, flags, txn, NULL);
+}
/** \brief Set application information associated with the \ref MDBX_txn.
* \ingroup c_transactions
@@ -2722,9 +2855,9 @@ struct MDBX_txn_info {
uint64_t txn_id;
/** For READ-ONLY transaction: the lag from a recent MVCC-snapshot, i.e. the
- number of committed transaction since read transaction started. For WRITE
- transaction (provided if `scan_rlt=true`): the lag of the oldest reader
- from current transaction (i.e. at least 1 if any reader running). */
+ number of committed transaction since read transaction started.
+ For WRITE transaction (provided if `scan_rlt=true`): the lag of the oldest
+ reader from current transaction (i.e. at least 1 if any reader running). */
uint64_t txn_reader_lag;
/** Used space by this transaction, i.e. corresponding to the last used
@@ -2748,7 +2881,8 @@ struct MDBX_txn_info {
/** For READ-ONLY transaction: the space available for writer(s) and that
must be exhausted for reason to call the Handle-Slow-Readers callback for
- this read transaction. For WRITE transaction: the space inside transaction
+ this read transaction.
+ For WRITE transaction: the space inside transaction
that left to `MDBX_TXN_FULL` error. */
uint64_t txn_space_leftover;
@@ -2812,6 +2946,38 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_txn_flags(const MDBX_txn *txn);
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t
mdbx_txn_id(const MDBX_txn *txn);
+/** \brief Latency of commit stages in 1/65536 of seconds units.
+ * \warning This structure may be changed in future releases.
+ * \see mdbx_txn_commit_ex() */
+struct MDBX_commit_latency {
+ /** \brief Duration of preparation (commit child transactions, update
+ * sub-databases records and cursors destroying). */
+ uint32_t preparation;
+ /** \brief Duration of GC/freeDB handling & updation. */
+ uint32_t gc;
+ /** \brief Duration of internal audit if enabled. */
+ uint32_t audit;
+ /** \brief Duration of writing dirty/modified data pages. */
+ uint32_t write;
+ /** \brief Duration of syncing written data to the dist/storage. */
+ uint32_t sync;
+ /** \brief Duration of transaction ending (releasing resources). */
+ uint32_t ending;
+ /** \brief The total duration of a commit. */
+ uint32_t whole;
+};
+#ifndef __cplusplus
+/** \ingroup c_statinfo */
+typedef struct MDBX_commit_latency MDBX_commit_latency;
+#endif
+
+/** \brief Commit all the operations of a transaction into the database and
+ * collect latency information.
+ * \see mdbx_txn_commit()
+ * \ingroup c_statinfo
+ * \warning This function may be changed in future releases. */
+LIBMDBX_API int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency);
+
/** \brief Commit all the operations of a transaction into the database.
* \ingroup c_transactions
*
@@ -2849,7 +3015,9 @@ mdbx_txn_id(const MDBX_txn *txn);
* \retval MDBX_ENOSPC No more disk space.
* \retval MDBX_EIO A system-level I/O error occurred.
* \retval MDBX_ENOMEM Out of memory. */
-LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn);
+LIBMDBX_INLINE_API(int, mdbx_txn_commit, (MDBX_txn * txn)) {
+ return mdbx_txn_commit_ex(txn, NULL);
+}
/** \brief Abandon all the operations of the transaction instead of saving them.
* \ingroup c_transactions
@@ -3149,13 +3317,13 @@ mdbx_key_from_float(const float ieee754_32bit);
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint32_t
mdbx_key_from_ptrfloat(const float *const ieee754_32bit);
-MDBX_NOTHROW_CONST_FUNCTION __inline uint64_t
-mdbx_key_from_int64(const int64_t i64) {
+MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint64_t, mdbx_key_from_int64,
+ (const int64_t i64)) {
return UINT64_C(0x8000000000000000) + i64;
}
-MDBX_NOTHROW_CONST_FUNCTION __inline uint32_t
-mdbx_key_from_int32(const int32_t i32) {
+MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_INLINE_API(uint32_t, mdbx_key_from_int32,
+ (const int32_t i32)) {
return UINT32_C(0x80000000) + i32;
}
/** @} */
@@ -3247,7 +3415,11 @@ LIBMDBX_API int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags,
unsigned *state);
/** \brief The shortcut to calling \ref mdbx_dbi_flags_ex() with `state=NULL`
* for discarding it result. \ingroup c_statinfo */
-LIBMDBX_API int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags);
+LIBMDBX_INLINE_API(int, mdbx_dbi_flags,
+ (MDBX_txn * txn, MDBX_dbi dbi, unsigned *flags)) {
+ unsigned state;
+ return mdbx_dbi_flags_ex(txn, dbi, flags, &state);
+}
/** \brief Close a database handle. Normally unnecessary.
* \ingroup c_dbi
@@ -3711,6 +3883,18 @@ mdbx_cursor_txn(const MDBX_cursor *cursor);
* \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). */
LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor);
+/** \brief Copy cursor position and state.
+ * \ingroup c_cursors
+ *
+ * \param [in] src A source cursor handle returned
+ * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open().
+ *
+ * \param [in,out] dest A destination cursor handle returned
+ * by \ref mdbx_cursor_create() or \ref mdbx_cursor_open().
+ *
+ * \returns A non-zero error value on failure and 0 on success. */
+LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest);
+
/** \brief Retrieve by cursor.
* \ingroup c_crud
*
@@ -4251,7 +4435,7 @@ LIBMDBX_API int mdbx_thread_unregister(const MDBX_env *env);
* \param [in] pid A pid of the reader process.
* \param [in] tid A thread_id of the reader thread.
* \param [in] laggard An oldest read transaction number on which stalled.
- * \param [in] gap A lag from the last commited txn.
+ * \param [in] gap A lag from the last committed txn.
* \param [in] space A space that actually become available for reuse after
* this reader finished. The callback function can take
* this value into account to evaluate the impact that
diff --git a/libs/libmdbx/src/mdbx.h++ b/libs/libmdbx/src/mdbx.h++
index 2050f4b925..29d3f6fdca 100644
--- a/libs/libmdbx/src/mdbx.h++
+++ b/libs/libmdbx/src/mdbx.h++
@@ -208,7 +208,8 @@ using filehandle = ::mdbx_filehandle_t;
(defined(__cpp_lib_filesystem) && __cpp_lib_filesystem >= 201703L && \
(!defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || \
__MAC_OS_X_VERSION_MIN_REQUIRED >= 101500))
-using path = std::filesystem::path;
+#define MDBX_STD_FILESYSTEM_PATH
+using path = ::std::filesystem::path;
#elif defined(_WIN32) || defined(_WIN64)
using path = ::std::wstring;
#else
@@ -396,8 +397,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val {
template <size_t SIZE>
MDBX_CXX14_CONSTEXPR slice(const char (&text)[SIZE]) noexcept
: slice(text, SIZE - 1) {
- static_assert(SIZE > 0 && text[SIZE - 1] == '\0',
- "Must be a null-terminated C-string");
+ MDBX_CONSTEXPR_ASSERT(SIZE > 0 && text[SIZE - 1] == '\0');
}
/// \brief Create a slice that refers to c_str[0,strlen(c_str)-1].
explicit MDBX_CXX17_CONSTEXPR slice(const char *c_str);
@@ -1563,14 +1563,7 @@ enum class value_mode {
///< lexicographic comparison like `std::memcmp()`.
///< In terms of keys, they are not unique, i.e. has
///< duplicates which are sorted by associated data values.
-#if !defined(__cpp_constexpr) && !defined(DOXYGEN)
- multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP),
- multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED),
- multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) |
- uint32_t(MDBX_INTEGERDUP),
- multi_reverse_samelength = uint32_t(MDBX_DUPSORT) |
- uint32_t(MDBX_REVERSEDUP) | uint32_t(MDBX_DUPFIXED)
-#else
+#if CONSTEXPR_ENUM_FLAGS_OPERATIONS || defined(DOXYGEN)
multi_reverse =
MDBX_DUPSORT |
MDBX_REVERSEDUP, ///< A more than one data value could be associated with
@@ -1617,6 +1610,13 @@ enum class value_mode {
///< In terms of keys, they are not unique, i.e. has duplicates
///< which are sorted by associated data values.
///< \note Not yet implemented and PRs are welcome.
+#else
+ multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP),
+ multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED),
+ multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) |
+ uint32_t(MDBX_INTEGERDUP),
+ multi_reverse_samelength = uint32_t(MDBX_DUPSORT) |
+ uint32_t(MDBX_REVERSEDUP) | uint32_t(MDBX_DUPFIXED)
#endif
};
@@ -1651,9 +1651,9 @@ struct LIBMDBX_API_TYPE map_handle {
/// \brief Key-value pairs put mode.
enum put_mode {
- insert = MDBX_NOOVERWRITE, ///< Insert only unique keys.
- upsert = MDBX_UPSERT, ///< Insert or update.
- update = MDBX_CURRENT, ///< Update existing, don't insert new.
+ insert_unique = MDBX_NOOVERWRITE, ///< Insert only unique keys.
+ upsert = MDBX_UPSERT, ///< Insert or update.
+ update = MDBX_CURRENT, ///< Update existing, don't insert new.
};
/// \brief Unmanaged database environment.
@@ -1916,12 +1916,50 @@ public:
/// \brief Make a copy (backup) of an existing environment to the specified
/// path.
- env &copy(const path &destination, bool compactify,
+#ifdef MDBX_STD_FILESYSTEM_PATH
+ env &copy(const ::std::filesystem::path &destination, bool compactify,
+ bool force_dynamic_size = false);
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+#if defined(_WIN32) || defined(_WIN64)
+ env &copy(const ::std::wstring &destination, bool compactify,
+ bool force_dynamic_size = false);
+#endif /* Windows */
+ env &copy(const ::std::string &destination, bool compactify,
bool force_dynamic_size = false);
/// \brief Copy an environment to the specified file descriptor.
env &copy(filehandle fd, bool compactify, bool force_dynamic_size = false);
+ /// \brief Deletion modes for \ref remove().
+ enum remove_mode {
+ /// \brief Just delete the environment's files and directory if any.
+ /// \note On POSIX systems, processes already working with the database will
+ /// continue to work without interference until it close the environment.
+ /// \note On Windows, the behavior of `just_remove` is different
+ /// because the system does not support deleting files that are currently
+ /// memory mapped.
+ just_remove = MDBX_ENV_JUST_DELETE,
+ /// \brief Make sure that the environment is not being used by other
+ /// processes, or return an error otherwise.
+ ensure_unused = MDBX_ENV_ENSURE_UNUSED,
+ /// \brief Wait until other processes closes the environment before
+ /// deletion.
+ wait_for_unused = MDBX_ENV_WAIT_FOR_UNUSED
+ };
+
+ /// \brief Removes the environment's files in a proper and multiprocess-safe
+ /// way.
+#ifdef MDBX_STD_FILESYSTEM_PATH
+ static bool remove(const ::std::filesystem::path &,
+ const remove_mode mode = just_remove);
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+#if defined(_WIN32) || defined(_WIN64)
+ static bool remove(const ::std::wstring &,
+ const remove_mode mode = just_remove);
+#endif /* Windows */
+ static bool remove(const ::std::string &,
+ const remove_mode mode = just_remove);
+
/// \brief Statistics for a database in the MDBX environment.
using stat = ::MDBX_stat;
@@ -2060,7 +2098,7 @@ public:
/// \ref MDBX_BAD_DBI (since the DB name is gone).
inline void close_map(const map_handle &);
- /// \brief Readed information
+ /// \brief Reader information
struct reader_info {
int slot; ///< The reader lock table slot number.
mdbx_pid_t pid; ///< The reader process ID.
@@ -2156,7 +2194,16 @@ public:
MDBX_CXX11_CONSTEXPR env_managed() noexcept = default;
/// \brief Open existing database.
- env_managed(const path &, const operate_parameters &, bool accede = true);
+#ifdef MDBX_STD_FILESYSTEM_PATH
+ env_managed(const ::std::filesystem::path &, const operate_parameters &,
+ bool accede = true);
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+#if defined(_WIN32) || defined(_WIN64)
+ env_managed(const ::std::wstring &, const operate_parameters &,
+ bool accede = true);
+#endif /* Windows */
+ env_managed(const ::std::string &, const operate_parameters &,
+ bool accede = true);
/// \brief Additional parameters for creating a new database.
struct create_parameters {
@@ -2166,7 +2213,15 @@ public:
};
/// \brief Create new or open existing database.
- env_managed(const path &, const create_parameters &,
+#ifdef MDBX_STD_FILESYSTEM_PATH
+ env_managed(const ::std::filesystem::path &, const create_parameters &,
+ const operate_parameters &, bool accede = true);
+#endif /* MDBX_STD_FILESYSTEM_PATH */
+#if defined(_WIN32) || defined(_WIN64)
+ env_managed(const ::std::wstring &, const create_parameters &,
+ const operate_parameters &, bool accede = true);
+#endif /* Windows */
+ env_managed(const ::std::string &, const create_parameters &,
const operate_parameters &, bool accede = true);
/// \brief Explicitly closes the environment and release the memory map.
@@ -2659,6 +2714,7 @@ public:
LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const slice &);
LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const pair &);
+LIBMDBX_API ::std::ostream &operator<<(::std::ostream &, const pair_result &);
template <class ALLOCATOR>
inline ::std::ostream &operator<<(::std::ostream &out,
const buffer<ALLOCATOR> &it) {
@@ -3106,7 +3162,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::hex_encode(bool uppercase, const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(to_hex_bytes());
+ result.resize(to_hex_bytes());
result.resize(to_hex(const_cast<char *>(result.data()), result.capacity()) -
result.data(),
uppercase);
@@ -3119,7 +3175,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::hex_decode(const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(from_hex_bytes());
+ result.resize(from_hex_bytes());
result.resize(
from_hex(static_cast<byte *>(
static_cast<void *>(const_cast<char *>(result.data()))),
@@ -3134,7 +3190,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::base58_encode(const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(to_base58_bytes());
+ result.resize(to_base58_bytes());
result.resize(
to_base58(const_cast<char *>(result.data()), result.capacity()) -
result.data());
@@ -3147,7 +3203,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::base58_decode(const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(from_base58_bytes());
+ result.resize(from_base58_bytes());
result.resize(
from_base58(static_cast<byte *>(
static_cast<void *>(const_cast<char *>(result.data()))),
@@ -3162,7 +3218,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::base64_encode(const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(to_base64_bytes());
+ result.resize(to_base64_bytes());
result.resize(
to_base64(const_cast<char *>(result.data()), result.capacity()) -
result.data());
@@ -3175,7 +3231,7 @@ inline ::mdbx::string<ALLOCATOR>
slice::base64_decode(const ALLOCATOR &allocator) const {
::mdbx::string<ALLOCATOR> result(allocator);
if (MDBX_LIKELY(length() > 0)) {
- result.reserve(from_base64_bytes());
+ result.resize(from_base64_bytes());
result.resize(
from_base64(static_cast<byte *>(
static_cast<void *>(const_cast<char *>(result.data()))),
@@ -3818,14 +3874,14 @@ inline void txn::put(map_handle map, const slice &key, slice value,
inline void txn::insert(map_handle map, const slice &key, slice value) {
error::success_or_throw(
put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert)));
+ MDBX_put_flags_t(put_mode::insert_unique)));
}
inline value_result txn::try_insert(map_handle map, const slice &key,
slice value) {
const int err =
put(map, key, &value /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert));
+ MDBX_put_flags_t(put_mode::insert_unique));
switch (err) {
case MDBX_SUCCESS:
return value_result{slice(), true};
@@ -3841,7 +3897,7 @@ inline slice txn::insert_reserve(map_handle map, const slice &key,
slice result(nullptr, value_length);
error::success_or_throw(
put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert) | MDBX_RESERVE));
+ MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE));
return result;
}
@@ -3850,7 +3906,7 @@ inline value_result txn::try_insert_reserve(map_handle map, const slice &key,
slice result(nullptr, value_length);
const int err =
put(map, key, &result /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert) | MDBX_RESERVE);
+ MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE);
switch (err) {
case MDBX_SUCCESS:
return value_result{result, true};
@@ -4287,13 +4343,13 @@ inline MDBX_error_t cursor::put(const slice &key, slice *value,
inline void cursor::insert(const slice &key, slice value) {
error::success_or_throw(
put(key, &value /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert)));
+ MDBX_put_flags_t(put_mode::insert_unique)));
}
inline value_result cursor::try_insert(const slice &key, slice value) {
const int err =
put(key, &value /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert));
+ MDBX_put_flags_t(put_mode::insert_unique));
switch (err) {
case MDBX_SUCCESS:
return value_result{slice(), true};
@@ -4308,7 +4364,7 @@ inline slice cursor::insert_reserve(const slice &key, size_t value_length) {
slice result(nullptr, value_length);
error::success_or_throw(
put(key, &result /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert) | MDBX_RESERVE));
+ MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE));
return result;
}
@@ -4317,7 +4373,7 @@ inline value_result cursor::try_insert_reserve(const slice &key,
slice result(nullptr, value_length);
const int err =
put(key, &result /* takes the present value in case MDBX_KEYEXIST */,
- MDBX_put_flags_t(put_mode::insert) | MDBX_RESERVE);
+ MDBX_put_flags_t(put_mode::insert_unique) | MDBX_RESERVE);
switch (err) {
case MDBX_SUCCESS:
return value_result{result, true};
diff --git a/libs/libmdbx/src/mdbx_chk.c b/libs/libmdbx/src/mdbx_chk.c
index 120e4b4048..194bde6f9a 100644
--- a/libs/libmdbx/src/mdbx_chk.c
+++ b/libs/libmdbx/src/mdbx_chk.c
@@ -34,11 +34,16 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -119,11 +124,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -839,7 +839,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -1015,6 +1015,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1056,8 +1085,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1110,8 +1139,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1182,7 +1210,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1191,7 +1220,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1451,32 +1482,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1912,7 +1917,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1996,7 +2001,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2244,7 +2249,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2403,8 +2408,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2431,6 +2434,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2568,7 +2573,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2578,7 +2583,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2854,7 +2859,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
diff --git a/libs/libmdbx/src/mdbx_copy.c b/libs/libmdbx/src/mdbx_copy.c
index 71f6a05527..eb3c49d1d9 100644
--- a/libs/libmdbx/src/mdbx_copy.c
+++ b/libs/libmdbx/src/mdbx_copy.c
@@ -34,11 +34,16 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -119,11 +124,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -839,7 +839,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -1015,6 +1015,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1056,8 +1085,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1110,8 +1139,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1182,7 +1210,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1191,7 +1220,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1451,32 +1482,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1912,7 +1917,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1996,7 +2001,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2244,7 +2249,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2403,8 +2408,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2431,6 +2434,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2568,7 +2573,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2578,7 +2583,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2854,7 +2859,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
diff --git a/libs/libmdbx/src/mdbx_dump.c b/libs/libmdbx/src/mdbx_dump.c
index f338498c7c..96cf69c47a 100644
--- a/libs/libmdbx/src/mdbx_dump.c
+++ b/libs/libmdbx/src/mdbx_dump.c
@@ -34,11 +34,16 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -119,11 +124,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -839,7 +839,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -1015,6 +1015,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1056,8 +1085,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1110,8 +1139,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1182,7 +1210,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1191,7 +1220,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1451,32 +1482,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1912,7 +1917,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1996,7 +2001,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2244,7 +2249,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2403,8 +2408,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2431,6 +2434,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2568,7 +2573,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2578,7 +2583,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2854,7 +2859,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
@@ -3320,6 +3325,8 @@ static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) {
rc = MDBX_SUCCESS;
if (unlikely(rc != MDBX_SUCCESS))
error("mdbx_cursor_get", rc);
+
+ mdbx_cursor_close(cursor);
return rc;
}
@@ -3354,7 +3361,7 @@ int main(int argc, char *argv[]) {
MDBX_dbi dbi;
prog = argv[0];
char *envname;
- char *subname = nullptr;
+ char *subname = nullptr, *buf4free = nullptr;
unsigned envflags = 0;
bool alldbs = false, list = false;
@@ -3500,7 +3507,13 @@ int main(int argc, char *argv[]) {
if (memchr(key.iov_base, '\0', key.iov_len))
continue;
- subname = mdbx_malloc(key.iov_len + 1);
+ subname = mdbx_realloc(buf4free, key.iov_len + 1);
+ if (!subname) {
+ rc = MDBX_ENOMEM;
+ break;
+ }
+
+ buf4free = subname;
memcpy(subname, key.iov_base, key.iov_len);
subname[key.iov_len] = '\0';
@@ -3553,7 +3566,6 @@ int main(int argc, char *argv[]) {
break;
}
}
- mdbx_free(subname);
}
mdbx_cursor_close(cursor);
cursor = nullptr;
@@ -3587,6 +3599,7 @@ txn_abort:
mdbx_txn_abort(txn);
env_close:
mdbx_env_close(env);
+ free(buf4free);
return rc ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/libs/libmdbx/src/mdbx_load.c b/libs/libmdbx/src/mdbx_load.c
index 6832e0f73a..3a49a60ec7 100644
--- a/libs/libmdbx/src/mdbx_load.c
+++ b/libs/libmdbx/src/mdbx_load.c
@@ -34,11 +34,16 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -119,11 +124,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -839,7 +839,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -1015,6 +1015,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1056,8 +1085,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1110,8 +1139,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1182,7 +1210,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1191,7 +1220,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1451,32 +1482,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1912,7 +1917,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1996,7 +2001,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2244,7 +2249,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2403,8 +2408,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2431,6 +2434,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2568,7 +2573,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2578,7 +2583,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2854,7 +2859,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \
@@ -3229,7 +3234,6 @@ static MDBX_envinfo envinfo;
static int mode = GLOBAL;
static MDBX_val kbuf, dbuf;
-static MDBX_val k0buf;
#define STRLENOF(s) (sizeof(s) - 1)
@@ -3561,7 +3565,7 @@ static int readline(MDBX_val *out, MDBX_val *buf) {
static void usage(void) {
fprintf(stderr,
"usage: %s [-V] [-q] [-a] [-f file] [-s name] [-N] [-T] [-r] [-n]"
- "dbpath\n"
+ " dbpath\n"
" -V\t\tprint version and exit\n"
" -q\t\tbe quiet\n"
" -a\t\tappend records in input order (required for custom "
@@ -3592,11 +3596,9 @@ int main(int argc, char *argv[]) {
MDBX_cursor *mc = nullptr;
MDBX_dbi dbi;
char *envname = nullptr;
- int envflags = MDBX_UTTERLY_NOSYNC, putflags = 0;
- bool append = false;
+ int envflags = MDBX_UTTERLY_NOSYNC, putflags = MDBX_UPSERT;
bool quiet = false;
bool rescue = false;
- MDBX_val prevk;
prog = argv[0];
if (argc < 2)
@@ -3619,7 +3621,7 @@ int main(int argc, char *argv[]) {
mdbx_build.options);
return EXIT_SUCCESS;
case 'a':
- append = true;
+ putflags |= MDBX_APPEND;
break;
case 'f':
if (freopen(optarg, "r", stdin) == nullptr) {
@@ -3635,7 +3637,7 @@ int main(int argc, char *argv[]) {
subname = mdbx_strdup(optarg);
break;
case 'N':
- putflags = MDBX_NOOVERWRITE | MDBX_NODUPDATA;
+ putflags |= MDBX_NOOVERWRITE | MDBX_NODUPDATA;
break;
case 'T':
mode |= NOHDR | PRINT;
@@ -3676,6 +3678,11 @@ int main(int argc, char *argv[]) {
dbuf.iov_len = 4096;
dbuf.iov_base = mdbx_malloc(dbuf.iov_len);
+ if (!dbuf.iov_base) {
+ rc = MDBX_ENOMEM;
+ error("value-buffer", rc);
+ goto env_close;
+ }
/* read first header for mapsize= */
if (!(mode & NOHDR)) {
@@ -3703,7 +3710,7 @@ int main(int argc, char *argv[]) {
}
}
- if (envinfo.mi_mapsize) {
+ if (envinfo.mi_geo.current | envinfo.mi_mapsize) {
if (envinfo.mi_geo.current) {
rc = mdbx_env_set_geometry(
env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current,
@@ -3736,17 +3743,19 @@ int main(int argc, char *argv[]) {
goto env_close;
}
- kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, MDBX_DUPSORT);
- if (kbuf.iov_len >= INTPTR_MAX / 4) {
+ kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + 1;
+ if (kbuf.iov_len >= INTPTR_MAX / 2) {
fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n",
kbuf.iov_len);
goto env_close;
}
- kbuf.iov_len = (kbuf.iov_len + 1) * 2;
- kbuf.iov_base = malloc(kbuf.iov_len * 2);
- k0buf.iov_len = kbuf.iov_len;
- k0buf.iov_base = (char *)kbuf.iov_base + kbuf.iov_len;
- prevk.iov_base = k0buf.iov_base;
+
+ kbuf.iov_base = malloc(kbuf.iov_len);
+ if (!kbuf.iov_base) {
+ rc = MDBX_ENOMEM;
+ error("key-buffer", rc);
+ goto env_close;
+ }
while (rc == MDBX_SUCCESS) {
if (user_break) {
@@ -3772,9 +3781,10 @@ int main(int argc, char *argv[]) {
}
const char *const dbi_name = subname ? subname : "@MAIN";
- rc = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi,
- append ? equal_or_greater : nullptr,
- append ? equal_or_greater : nullptr);
+ rc =
+ mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi,
+ (putflags & MDBX_APPEND) ? equal_or_greater : nullptr,
+ (putflags & MDBX_APPEND) ? equal_or_greater : nullptr);
if (unlikely(rc != MDBX_SUCCESS)) {
error("mdbx_dbi_open_ex", rc);
goto txn_abort;
@@ -3789,7 +3799,7 @@ int main(int argc, char *argv[]) {
if (present_sequence > sequence) {
fprintf(stderr,
"present sequence for '%s' value (%" PRIu64
- ") is greated than loaded (%" PRIu64 ")\n",
+ ") is greater than loaded (%" PRIu64 ")\n",
dbi_name, present_sequence, sequence);
rc = MDBX_RESULT_TRUE;
goto txn_abort;
@@ -3802,19 +3812,17 @@ int main(int argc, char *argv[]) {
}
}
+ if (putflags & MDBX_APPEND)
+ putflags = (dbi_flags & MDBX_DUPSORT) ? putflags | MDBX_APPENDDUP
+ : putflags & ~MDBX_APPENDDUP;
+
rc = mdbx_cursor_open(txn, dbi, &mc);
if (unlikely(rc != MDBX_SUCCESS)) {
error("mdbx_cursor_open", rc);
goto txn_abort;
}
- /* if (append) {
- mc->mc_flags |= C_SKIPORD;
- if (mc->mc_xcursor)
- mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD;
- } */
int batch = 0;
- prevk.iov_len = 0;
while (rc == MDBX_SUCCESS) {
MDBX_val key, data;
rc = readline(&key, &kbuf);
@@ -3829,18 +3837,7 @@ int main(int argc, char *argv[]) {
goto txn_abort;
}
- int appflag = 0;
- if (append) {
- appflag = MDBX_APPEND;
- if (dbi_flags & MDBX_DUPSORT) {
- if (prevk.iov_len == key.iov_len &&
- memcmp(prevk.iov_base, key.iov_base, key.iov_len) == 0)
- appflag = MDBX_APPEND | MDBX_APPENDDUP;
- else
- memcpy(prevk.iov_base, key.iov_base, prevk.iov_len = key.iov_len);
- }
- }
- rc = mdbx_cursor_put(mc, &key, &data, putflags | appflag);
+ rc = mdbx_cursor_put(mc, &key, &data, putflags);
if (rc == MDBX_KEYEXIST && putflags)
continue;
if (rc == MDBX_BAD_VALSIZE && rescue) {
@@ -3861,9 +3858,7 @@ int main(int argc, char *argv[]) {
goto txn_abort;
}
- if (batch == 10000 || txn_info.txn_space_dirty > MEGABYTE * 16) {
- mdbx_cursor_close(mc);
- mc = nullptr;
+ if (batch == 10000 || txn_info.txn_space_dirty > MEGABYTE * 256) {
rc = mdbx_txn_commit(txn);
if (unlikely(rc != MDBX_SUCCESS)) {
error("mdbx_txn_commit", rc);
@@ -3876,16 +3871,11 @@ int main(int argc, char *argv[]) {
error("mdbx_txn_begin", rc);
goto env_close;
}
- rc = mdbx_cursor_open(txn, dbi, &mc);
+ rc = mdbx_cursor_bind(txn, mc, dbi);
if (unlikely(rc != MDBX_SUCCESS)) {
- error("mdbx_cursor_open", rc);
+ error("mdbx_cursor_bind", rc);
goto txn_abort;
}
- /* if (append) {
- mc->mc_flags |= C_SKIPORD;
- if (mc->mc_xcursor)
- mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD;
- } */
}
}
@@ -3897,15 +3887,22 @@ int main(int argc, char *argv[]) {
error("mdbx_txn_commit", rc);
goto env_close;
}
- rc = mdbx_dbi_close(env, dbi);
- if (unlikely(rc != MDBX_SUCCESS)) {
- error("mdbx_dbi_close", rc);
- goto env_close;
+ if (subname) {
+ assert(dbi != MAIN_DBI);
+ rc = mdbx_dbi_close(env, dbi);
+ if (unlikely(rc != MDBX_SUCCESS)) {
+ error("mdbx_dbi_close", rc);
+ goto env_close;
+ }
+ } else {
+ assert(dbi == MAIN_DBI);
}
/* try read next header */
if (!(mode & NOHDR))
rc = readhdr();
+ else if (ferror(stdin) || feof(stdin))
+ break;
}
switch (rc) {
@@ -3926,6 +3923,8 @@ txn_abort:
mdbx_txn_abort(txn);
env_close:
mdbx_env_close(env);
+ free(kbuf.iov_base);
+ free(dbuf.iov_base);
return rc ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/libs/libmdbx/src/mdbx_stat.c b/libs/libmdbx/src/mdbx_stat.c
index 37ebc55a00..ce47bd75fd 100644
--- a/libs/libmdbx/src/mdbx_stat.c
+++ b/libs/libmdbx/src/mdbx_stat.c
@@ -34,11 +34,16 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-#define MDBX_BUILD_SOURCERY 3b5677a6062b714f1e138b0066c5590ee3c9ebf3bf8cfa3bb9503515ea0d1f02_v0_9_1_18_g1d31ebdc1c
+#define MDBX_BUILD_SOURCERY 47492323531afee427a3de6ddaeae26eed45bfd1b52d92fd121a5a13a9747dbb_v0_9_2_0_g092ab09
#ifdef MDBX_CONFIG_H
#include MDBX_CONFIG_H
#endif
+#define LIBMDBX_INTERNALS
+#ifdef MDBX_TOOLS
+#define MDBX_DEPRECATED
+#endif /* MDBX_TOOLS */
+
/* *INDENT-OFF* */
/* clang-format off */
@@ -119,11 +124,6 @@
#pragma warning(disable : 4505) /* unreferenced local function has been removed */
#endif /* _MSC_VER (warnings) */
-#if defined(MDBX_TOOLS)
-#undef MDBX_DEPRECATED
-#define MDBX_DEPRECATED
-#endif /* MDBX_TOOLS */
-
#include "mdbx.h"
/*
* Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>
@@ -839,7 +839,7 @@ typedef pthread_mutex_t mdbx_fastmutex_t;
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
#ifndef __ia32__
-/* LY: define neutral __ia32__ for x86 and x86-64 archs */
+/* LY: define neutral __ia32__ for x86 and x86-64 */
#define __ia32__ 1
#endif /* __ia32__ */
#if !defined(__amd64__) && (defined(__x86_64) || defined(__x86_64__) || \
@@ -1015,6 +1015,35 @@ typedef union MDBX_srwlock {
#ifdef __cplusplus
extern void mdbx_osal_jitter(bool tiny);
#else
+
+/*----------------------------------------------------------------------------*/
+/* Atomics */
+
+#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
+#include <cstdatomic>
+#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
+ !defined(__STDC_NO_ATOMICS__) && \
+ (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
+ !(defined(__GNUC__) || defined(__clang__)))
+#include <stdatomic.h>
+#elif defined(__GNUC__) || defined(__clang__)
+/* LY: nothing required */
+#elif defined(_MSC_VER)
+#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
+#pragma warning(disable : 4133) /* 'function': incompatible types - from \
+ 'size_t' to 'LONGLONG' */
+#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
+ 'std::size_t', possible loss of data */
+#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
+ 'long', possible loss of data */
+#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
+#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
+#elif defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#else
+#error FIXME atomic-ops
+#endif
+
/*----------------------------------------------------------------------------*/
/* Memory/Compiler barriers, cache coherence */
@@ -1056,8 +1085,8 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
}
static __maybe_unused __inline void mdbx_memory_barrier(void) {
-#if __has_extension(c_atomic) || __has_extension(cxx_atomic)
- __c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
+#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
+ atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__ATOMIC_SEQ_CST)
__atomic_thread_fence(__ATOMIC_SEQ_CST);
#elif defined(__clang__) || defined(__GNUC__)
@@ -1110,8 +1139,7 @@ MDBX_INTERNAL_FUNC int mdbx_vasprintf(char **strp, const char *fmt, va_list ap);
#if defined(__linux__) || defined(__gnu_linux__)
MDBX_INTERNAL_VAR uint32_t mdbx_linux_kernel_version;
-MDBX_INTERNAL_VAR bool
- mdbx_RunningOnWSL /* Windows Subsystem for Linux is mad and trouble-full */;
+MDBX_INTERNAL_VAR bool mdbx_RunningOnWSL1 /* Windows Subsystem 1 for Linux */;
#endif /* Linux */
#ifndef mdbx_strdup
@@ -1182,7 +1210,8 @@ enum mdbx_openfile_purpose {
MDBX_OPEN_DXB_LAZY = 1,
MDBX_OPEN_DXB_DSYNC = 2,
MDBX_OPEN_LCK = 3,
- MDBX_OPEN_COPY = 4
+ MDBX_OPEN_COPY = 4,
+ MDBX_OPEN_DELETE = 5
};
MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
@@ -1191,7 +1220,9 @@ MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
mdbx_mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
+MDBX_INTERNAL_FUNC int mdbx_removedirectory(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
+MDBX_INTERNAL_FUNC int mdbx_lockfile(mdbx_filehandle_t fd, bool wait);
#define MMAP_OPTION_TRUNCATE 1
#define MMAP_OPTION_SEMAPHORE 2
@@ -1451,32 +1482,6 @@ MDBX_INTERNAL_VAR MDBX_RegGetValueA mdbx_RegGetValueA;
#endif /* Windows */
-/*----------------------------------------------------------------------------*/
-/* Atomics */
-
-#if !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
- !defined(__STDC_NO_ATOMICS__) && \
- (__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
- !(defined(__GNUC__) || defined(__clang__)))
-#include <stdatomic.h>
-#elif defined(__GNUC__) || defined(__clang__)
-/* LY: nothing required */
-#elif defined(_MSC_VER)
-#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
-#pragma warning(disable : 4133) /* 'function': incompatible types - from \
- 'size_t' to 'LONGLONG' */
-#pragma warning(disable : 4244) /* 'return': conversion from 'LONGLONG' to \
- 'std::size_t', possible loss of data */
-#pragma warning(disable : 4267) /* 'function': conversion from 'size_t' to \
- 'long', possible loss of data */
-#pragma intrinsic(_InterlockedExchangeAdd, _InterlockedCompareExchange)
-#pragma intrinsic(_InterlockedExchangeAdd64, _InterlockedCompareExchange64)
-#elif defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#else
-#error FIXME atomic-ops
-#endif
-
#endif /* !__cplusplus */
/*----------------------------------------------------------------------------*/
@@ -1912,7 +1917,7 @@ typedef struct MDBX_db {
pgno_t md_overflow_pages; /* number of overflow pages */
uint64_t md_seq; /* table sequence counter */
uint64_t md_entries; /* number of data items */
- uint64_t md_mod_txnid; /* txnid of last commited modification */
+ uint64_t md_mod_txnid; /* txnid of last committed modification */
} MDBX_db;
/* database size-related parameters */
@@ -1996,7 +2001,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
+ uint64_t mp_txnid; /* txnid that committed this page */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -2244,7 +2249,7 @@ typedef struct MDBX_lockinfo {
#if defined(_WIN32) || defined(_WIN64)
#define MAX_MAPSIZE32 UINT32_C(0x38000000)
#else
-#define MAX_MAPSIZE32 UINT32_C(0x7ff80000)
+#define MAX_MAPSIZE32 UINT32_C(0x7f000000)
#endif
#define MAX_MAPSIZE64 (MAX_PAGENO * (uint64_t)MAX_PAGESIZE)
@@ -2403,8 +2408,6 @@ struct MDBX_txn {
MDBX_db *mt_dbs;
/* Array of sequence numbers for each DB handle */
unsigned *mt_dbiseqs;
- /* In write txns, array of cursors for each DB */
- MDBX_cursor **mt_cursors;
/* Transaction DBI Flags */
#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */
@@ -2431,6 +2434,8 @@ struct MDBX_txn {
MDBX_reader *reader;
} to;
struct {
+ /* In write txns, array of cursors for each DB */
+ MDBX_cursor **cursors;
pgno_t *reclaimed_pglist; /* Reclaimed GC pages */
txnid_t last_reclaimed; /* ID of last used record */
pgno_t loose_refund_wl /* FIXME: describe */;
@@ -2568,7 +2573,7 @@ struct MDBX_env {
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
- unsigned me_psize; /* DB page size, inited from me_os_psize */
+ unsigned me_psize; /* DB page size, initialized from me_os_psize */
uint8_t me_psize2log; /* log2 of DB page size */
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
@@ -2578,7 +2583,7 @@ struct MDBX_env {
MDBX_dbi me_maxdbs; /* size of the DB table */
uint32_t me_pid; /* process ID of this env */
mdbx_thread_key_t me_txkey; /* thread-key for readers */
- char *me_path; /* path to the DB files */
+ char *me_pathname; /* path to the DB files */
void *me_pbuf; /* scratch area for DUPSORT put() */
MDBX_txn *me_txn; /* current write transaction */
MDBX_txn *me_txn0; /* prealloc'd write transaction */
@@ -2854,7 +2859,7 @@ static __maybe_unused __inline void mdbx_jitter4testing(bool tiny) {
((rc) != MDBX_RESULT_TRUE && (rc) != MDBX_RESULT_FALSE)
/* Internal error codes, not exposed outside libmdbx */
-#define MDBX_NO_ROOT (MDBX_LAST_LMDB_ERRCODE + 10)
+#define MDBX_NO_ROOT (MDBX_LAST_ADDED_ERRCODE + 10)
/* Debugging output value of a cursor DBI: Negative in a sub-cursor. */
#define DDBI(mc) \