diff options
author | George Hazan <ghazan@miranda.im> | 2018-07-04 12:43:33 +0300 |
---|---|---|
committer | George Hazan <ghazan@miranda.im> | 2018-07-04 12:43:33 +0300 |
commit | c7864c27c2f621ad5010be227c86f8ff6513ecb8 (patch) | |
tree | d2d7d658c02530e2e0853972bc93f6a3759a82e0 /plugins/Dbx_mdbx | |
parent | 22c315a94aa18035e22a487f7ee3255e544fc8b1 (diff) |
merge with libmdbx master
Diffstat (limited to 'plugins/Dbx_mdbx')
-rw-r--r-- | plugins/Dbx_mdbx/dbx_mdbx.vcxproj | 1 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/dbintf.cpp | 2 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/.circleci/config.yml | 20 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/README.md | 2 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/appveyor.yml | 2 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/circle.yml | 14 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/mdbx.h | 12 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/bits.h | 50 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/lck-posix.c | 31 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c | 13 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c | 328 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/osal.c | 48 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/osal.h | 3 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/tools/mdbx_chk.c | 29 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/version.c | 6 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/test/base.h | 4 |
16 files changed, 338 insertions, 227 deletions
diff --git a/plugins/Dbx_mdbx/dbx_mdbx.vcxproj b/plugins/Dbx_mdbx/dbx_mdbx.vcxproj index 0ef93cb590..4cc04efd5c 100644 --- a/plugins/Dbx_mdbx/dbx_mdbx.vcxproj +++ b/plugins/Dbx_mdbx/dbx_mdbx.vcxproj @@ -21,6 +21,7 @@ <PropertyGroup Label="Globals">
<ProjectName>Dbx_mdbx</ProjectName>
<ProjectGuid>{E0ACDEA0-0AC9-4431-8CA3-6B0CCACB2E18}</ProjectGuid>
+ <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
</PropertyGroup>
<ImportGroup Label="PropertySheets">
<Import Project="$(ProjectDir)..\..\build\vc.common\plugin.props" />
diff --git a/plugins/Dbx_mdbx/src/dbintf.cpp b/plugins/Dbx_mdbx/src/dbintf.cpp index cb663276c6..86d909a3bd 100644 --- a/plugins/Dbx_mdbx/src/dbintf.cpp +++ b/plugins/Dbx_mdbx/src/dbintf.cpp @@ -208,7 +208,7 @@ BOOL CDbxMDBX::Backup(const wchar_t *pwszPath) return 1;
}
- int res = mdbx_env_copy2fd(m_env, pFile, 0);
+ int res = mdbx_env_copy2fd(m_env, pFile, MDBX_CP_COMPACT);
CloseHandle(pFile);
if (res == MDBX_SUCCESS)
return 0;
diff --git a/plugins/Dbx_mdbx/src/libmdbx/.circleci/config.yml b/plugins/Dbx_mdbx/src/libmdbx/.circleci/config.yml new file mode 100644 index 0000000000..91e11a4bfe --- /dev/null +++ b/plugins/Dbx_mdbx/src/libmdbx/.circleci/config.yml @@ -0,0 +1,20 @@ +version: 2 +jobs: + build: + docker: + - image: circleci/buildpack-deps:artful + environment: + - TESTDB: /tmp/test.db + - TESTLOG: /tmp/test.log + steps: + - checkout + - run: make all + - run: ulimit -c unlimited && make check + - run: + command: | + mkdir -p /tmp/artifacts + mv -t /tmp/artifacts $TESTLOG $TESTDB core.* + when: on_fail + - store_artifacts: + path: /tmp/artifacts + destination: test-artifacts diff --git a/plugins/Dbx_mdbx/src/libmdbx/README.md b/plugins/Dbx_mdbx/src/libmdbx/README.md index 1a046717ee..92b6542fa7 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/README.md +++ b/plugins/Dbx_mdbx/src/libmdbx/README.md @@ -20,6 +20,8 @@ libmdbx 6. [Asynchronous lazy data flushing](https://sites.fas.harvard.edu/~cs265/papers/kathuria-2008.pdf) to disk(s); 7. etc... +Don't miss [Java Native Interface](https://github.com/castortech/mdbxjni) by [Castor Technologies](https://castortech.com/). + ----- Nowadays MDBX intended for Linux, and support Windows (since diff --git a/plugins/Dbx_mdbx/src/libmdbx/appveyor.yml b/plugins/Dbx_mdbx/src/libmdbx/appveyor.yml index ce817b237f..d002453fdd 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/appveyor.yml +++ b/plugins/Dbx_mdbx/src/libmdbx/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.1.5.{build} +version: 0.2.0.{build} environment: matrix: diff --git a/plugins/Dbx_mdbx/src/libmdbx/circle.yml b/plugins/Dbx_mdbx/src/libmdbx/circle.yml deleted file mode 100644 index 77da30e932..0000000000 --- a/plugins/Dbx_mdbx/src/libmdbx/circle.yml +++ /dev/null @@ -1,14 +0,0 @@ -machine: - timezone: - Europe/Moscow - -database: - override: - -compile: - override: - - make all - -test: - override: - - make check || mv test.log ${CIRCLE_ARTIFACTS}/ diff --git a/plugins/Dbx_mdbx/src/libmdbx/mdbx.h b/plugins/Dbx_mdbx/src/libmdbx/mdbx.h index 2f28623424..35faed8488 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/mdbx.h +++ b/plugins/Dbx_mdbx/src/libmdbx/mdbx.h @@ -105,6 +105,7 @@ typedef DWORD mdbx_tid_t; #define MDBX_EIO ERROR_WRITE_FAULT #define MDBX_EPERM ERROR_INVALID_FUNCTION #define MDBX_EINTR ERROR_CANCELLED +#define MDBX_ENOFILE ERROR_FILE_NOT_FOUND #else @@ -125,6 +126,8 @@ typedef pthread_t mdbx_tid_t; #define MDBX_EIO EIO #define MDBX_EPERM EPERM #define MDBX_EINTR EINTR +#define MDBX_ENOFILE ENOENT + #endif #ifdef _MSC_VER @@ -170,7 +173,7 @@ typedef pthread_t mdbx_tid_t; /*--------------------------------------------------------------------------*/ #define MDBX_VERSION_MAJOR 0 -#define MDBX_VERSION_MINOR 1 +#define MDBX_VERSION_MINOR 2 #if defined(LIBMDBX_EXPORTS) #define LIBMDBX_API __dll_export @@ -700,7 +703,8 @@ LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, * NOTE: Currently it fails if the environment has suffered a page leak. * * Returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags); +LIBMDBX_API int mdbx_env_copy(MDBX_env *env, const char *dest_path, + unsigned flags); /* Copy an MDBX environment to the specified file descriptor, * with options. @@ -1657,7 +1661,9 @@ typedef int MDBX_pgvisitor_func(uint64_t pgno, unsigned pgnumber, void *ctx, LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, void *ctx); -typedef struct mdbx_canary { uint64_t x, y, z, v; } mdbx_canary; +typedef struct mdbx_canary { + uint64_t x, y, z, v; +} mdbx_canary; LIBMDBX_API int mdbx_canary_put(MDBX_txn *txn, const mdbx_canary *canary); LIBMDBX_API int mdbx_canary_get(MDBX_txn *txn, mdbx_canary *canary); diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/bits.h b/plugins/Dbx_mdbx/src/libmdbx/src/bits.h index f535d749de..955a583264 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/bits.h +++ b/plugins/Dbx_mdbx/src/libmdbx/src/bits.h @@ -1,4 +1,4 @@ -/* +/* * Copyright 2015-2018 Leonid Yuriev <leo@yuriev.ru> * and other libmdbx authors: please see AUTHORS file. * All rights reserved. @@ -370,19 +370,19 @@ typedef struct MDBX_page { #define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_data)) /* The maximum size of a database page. -* -* It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. -* -* MDBX will use database pages < OS pages if needed. -* That causes more I/O in write transactions: The OS must -* know (read) the whole page before writing a partial page. -* -* Note that we don't currently support Huge pages. On Linux, -* regular data files cannot use Huge pages, and in general -* Huge pages aren't actually pageable. We rely on the OS -* demand-pager to read our data and page it out when memory -* pressure from other processes is high. So until OSs have -* actual paging support for Huge pages, they're not viable. */ + * + * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. + * + * MDBX will use database pages < OS pages if needed. + * That causes more I/O in write transactions: The OS must + * know (read) the whole page before writing a partial page. + * + * Note that we don't currently support Huge pages. On Linux, + * regular data files cannot use Huge pages, and in general + * Huge pages aren't actually pageable. We rely on the OS + * demand-pager to read our data and page it out when memory + * pressure from other processes is high. So until OSs have + * actual paging support for Huge pages, they're not viable. */ #define MAX_PAGESIZE 0x10000u #define MIN_PAGESIZE 512u @@ -411,7 +411,7 @@ typedef struct MDBX_lockinfo { volatile uint32_t mti_envmode; #ifdef MDBX_OSAL_LOCK - /* Mutex protecting write access to this table. */ + /* Mutex protecting write-txn. */ union { MDBX_OSAL_LOCK mti_wmutex; uint8_t pad_mti_wmutex[MDBX_OSAL_LOCK_SIZE % sizeof(size_t)]; @@ -736,14 +736,17 @@ struct MDBX_env { /* Max MDBX_lockinfo.mti_numreaders of interest to mdbx_env_close() */ unsigned me_close_readers; mdbx_fastmutex_t me_dbi_lock; - MDBX_dbi me_numdbs; /* number of DBs opened */ - MDBX_dbi me_maxdbs; /* size of the DB table */ - mdbx_pid_t me_pid; /* process ID of this env */ - mdbx_thread_key_t me_txkey; /* thread-key for readers */ - char *me_path; /* path to the DB files */ - void *me_pbuf; /* scratch area for DUPSORT put() */ - MDBX_txn *me_txn; /* current write transaction */ - MDBX_txn *me_txn0; /* prealloc'd write transaction */ + MDBX_dbi me_numdbs; /* number of DBs opened */ + MDBX_dbi me_maxdbs; /* size of the DB table */ + mdbx_pid_t me_pid; /* process ID of this env */ + mdbx_thread_key_t me_txkey; /* thread-key for readers */ + char *me_path; /* path to the DB files */ + void *me_pbuf; /* scratch area for DUPSORT put() */ + MDBX_txn *me_txn; /* current write transaction */ + MDBX_txn *me_txn0; /* prealloc'd write transaction */ +#ifdef MDBX_OSAL_LOCK + MDBX_OSAL_LOCK *me_wmutex; /* write-txn mutex */ +#endif MDBX_dbx *me_dbxs; /* array of static DB info */ uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */ unsigned *me_dbiseqs; /* array of dbi sequence numbers */ @@ -788,6 +791,7 @@ struct MDBX_env { /* Workaround for LockFileEx and WriteFile multithread bug */ CRITICAL_SECTION me_windowsbug_lock; #else + mdbx_fastmutex_t me_lckless_wmutex; mdbx_fastmutex_t me_remap_guard; #endif }; diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/lck-posix.c b/plugins/Dbx_mdbx/src/libmdbx/src/lck-posix.c index 532505e807..869b98c054 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/lck-posix.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/lck-posix.c @@ -86,16 +86,19 @@ static __inline int mdbx_lck_shared(int lfd) { } int mdbx_lck_downgrade(MDBX_env *env, bool complete) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); return complete ? mdbx_lck_shared(env->me_lfd) : MDBX_SUCCESS; } int mdbx_lck_upgrade(MDBX_env *env) { return mdbx_lck_exclusive(env->me_lfd); } int mdbx_rpid_set(MDBX_env *env) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); return mdbx_lck_op(env->me_lfd, F_SETLK, F_WRLCK, env->me_pid, 1); } int mdbx_rpid_clear(MDBX_env *env) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); return mdbx_lck_op(env->me_lfd, F_SETLKW, F_UNLCK, env->me_pid, 1); } @@ -106,6 +109,7 @@ int mdbx_rpid_clear(MDBX_env *env) { * MDBX_RESULT_FALSE, if pid is dead (lock acquired) * or otherwise the errcode. */ int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); int rc = mdbx_lck_op(env->me_lfd, F_GETLK, F_WRLCK, pid, 1); if (rc == 0) return MDBX_RESULT_FALSE; @@ -118,7 +122,7 @@ int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) { static int mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex, int rc); -int mdbx_lck_init(MDBX_env *env) { +int __cold mdbx_lck_init(MDBX_env *env) { pthread_mutexattr_t ma; int rc = pthread_mutexattr_init(&ma); if (rc) @@ -156,7 +160,7 @@ bailout: return rc; } -void mdbx_lck_destroy(MDBX_env *env) { +void __cold mdbx_lck_destroy(MDBX_env *env) { if (env->me_lfd != INVALID_HANDLE_VALUE) { /* try get exclusive access */ if (env->me_lck && mdbx_lck_exclusive(env->me_lfd) == 0) { @@ -166,7 +170,8 @@ void mdbx_lck_destroy(MDBX_env *env) { rc = pthread_mutex_destroy(&env->me_lck->mti_wmutex); assert(rc == 0); (void)rc; - /* lock would be released (by kernel) while the me_lfd will be closed */ + /* file locks would be released (by kernel) + * while the me_lfd will be closed */ } } } @@ -209,21 +214,21 @@ void mdbx_rdt_unlock(MDBX_env *env) { int mdbx_txn_lock(MDBX_env *env, bool dontwait) { mdbx_trace(">>"); - int rc = dontwait ? mdbx_robust_trylock(env, &env->me_lck->mti_wmutex) - : mdbx_robust_lock(env, &env->me_lck->mti_wmutex); + int rc = dontwait ? mdbx_robust_trylock(env, env->me_wmutex) + : mdbx_robust_lock(env, env->me_wmutex); mdbx_trace("<< rc %d", rc); return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS; } void mdbx_txn_unlock(MDBX_env *env) { mdbx_trace(">>"); - int rc = mdbx_robust_unlock(env, &env->me_lck->mti_wmutex); + int rc = mdbx_robust_unlock(env, env->me_wmutex); mdbx_trace("<< rc %d", rc); if (unlikely(MDBX_IS_ERROR(rc))) mdbx_panic("%s() failed: errcode %d\n", mdbx_func_, rc); } -static int internal_seize_lck(int lfd) { +static int __cold internal_seize_lck(int lfd) { assert(lfd != INVALID_HANDLE_VALUE); /* try exclusive access */ @@ -249,17 +254,19 @@ static int internal_seize_lck(int lfd) { return rc; } -int mdbx_lck_seize(MDBX_env *env) { +int __cold mdbx_lck_seize(MDBX_env *env) { assert(env->me_fd != INVALID_HANDLE_VALUE); if (env->me_lfd == INVALID_HANDLE_VALUE) { - /* LY: without-lck mode (e.g. on read-only filesystem) */ - int rc = mdbx_lck_op(env->me_fd, F_SETLK, F_RDLCK, 0, LCK_WHOLE); + /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ + int rc = mdbx_lck_op(env->me_fd, F_SETLK, + (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, + LCK_WHOLE); if (rc != 0) { mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc); return rc; } - return MDBX_RESULT_FALSE; + return MDBX_RESULT_TRUE; } if ((env->me_flags & MDBX_RDONLY) == 0) { @@ -285,7 +292,7 @@ static int __cold mdbx_mutex_failed(MDBX_env *env, pthread_mutex_t *mutex, if (rc == EOWNERDEAD) { /* We own the mutex. Clean up after dead previous owner. */ - int rlocked = (mutex == &env->me_lck->mti_rmutex); + int rlocked = (env->me_lck && mutex == &env->me_lck->mti_rmutex); rc = MDBX_SUCCESS; if (!rlocked) { if (unlikely(env->me_txn)) { diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c b/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c index 6e53212b20..ab3fae57f8 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c @@ -1,4 +1,4 @@ -/* +/* * Copyright 2015-2018 Leonid Yuriev <leo@yuriev.ru> * and other libmdbx authors: please see AUTHORS file. * All rights reserved. @@ -133,8 +133,9 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } if ((env->me_flags & MDBX_EXCLUSIVE) || - flock(env->me_fd, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) - : (LCK_EXCLUSIVE | LCK_WAITFOR), + flock(env->me_fd, + dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) + : (LCK_EXCLUSIVE | LCK_WAITFOR), LCK_BODY)) return MDBX_SUCCESS; int rc = GetLastError(); @@ -143,8 +144,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } void mdbx_txn_unlock(MDBX_env *env) { - int rc = (env->me_flags & MDBX_EXCLUSIVE) ? TRUE - : funlock(env->me_fd, LCK_BODY); + int rc = + (env->me_flags & MDBX_EXCLUSIVE) ? TRUE : funlock(env->me_fd, LCK_BODY); LeaveCriticalSection(&env->me_windowsbug_lock); if (!rc) mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); @@ -362,7 +363,7 @@ static int internal_seize_lck(HANDLE lfd) { "?-E(middle) >> S-E(locked)", rc); /* 8) now on S-E (locked) or still on ?-E (middle), - * transite to S-? (used) or ?-? (free) */ + * transite to S-? (used) or ?-? (free) */ if (!funlock(lfd, LCK_UPPER)) mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_, "X-E(locked/middle) >> X-?(used/free)", GetLastError()); diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c b/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c index ce3d7a52ce..e8bdc0085e 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c @@ -1,4 +1,4 @@ -/* +/* * Copyright 2015-2018 Leonid Yuriev <leo@yuriev.ru> * and other libmdbx authors: please see AUTHORS file. * All rights reserved. @@ -42,8 +42,8 @@ int __hot mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2, size_t n) { if (unlikely(n > 42 - /* LY: align followed access if reasonable possible */ && - (((uintptr_t)s1) & 7) != 0 && + /* LY: align followed access if reasonable possible */ + && (((uintptr_t)s1) & 7) != 0 && (((uintptr_t)s1) & 7) == (((uintptr_t)s2) & 7))) { if (((uintptr_t)s1) & 1) { const int diff = *(uint8_t *)s1 - *(uint8_t *)s2; @@ -1142,6 +1142,13 @@ const char *__cold mdbx_strerror(int errnum) { if (size) msg = buffer; #else + if (errnum < 0) { + static char buffer[32]; + int rc = snprintf(buffer, sizeof(buffer) - 1, "unknown error %d", errnum); + assert(rc > 0); + (void)rc; + return buffer; + } msg = strerror(errnum); #endif } @@ -1924,13 +1931,16 @@ static const char *mdbx_durable_str(const MDBX_meta *const meta) { static txnid_t mdbx_find_oldest(MDBX_txn *txn) { mdbx_tassert(txn, (txn->mt_flags & MDBX_RDONLY) == 0); const MDBX_env *env = txn->mt_env; - MDBX_lockinfo *const lck = env->me_lck; - const txnid_t edge = mdbx_reclaiming_detent(env); mdbx_tassert(txn, edge <= txn->mt_txnid - 1); + + MDBX_lockinfo *const lck = env->me_lck; + if (unlikely(env->me_lck == NULL /* exclusive mode */)) + return edge; + const txnid_t last_oldest = lck->mti_oldest; mdbx_tassert(txn, edge >= last_oldest); - if (last_oldest == edge) + if (likely(last_oldest == edge)) return edge; const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); @@ -2041,6 +2051,11 @@ static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno, bailout: if (rc == MDBX_SUCCESS) { +#if defined(_WIN32) || defined(_WIN64) + assert(size_bytes == env->me_dxb_mmap.current); + assert(size_bytes <= env->me_dxb_mmap.filesize); + assert(limit_bytes == env->me_dxb_mmap.length); +#endif env->me_dbgeo.now = size_bytes; env->me_dbgeo.upper = limit_bytes; if (env->me_txn) { @@ -2722,9 +2737,6 @@ static int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) { if (unlikely(flags & (MDBX_RDONLY | MDBX_FATAL_ERROR))) return MDBX_EACCESS; - if (unlikely(!env->me_lck)) - return MDBX_PANIC; - const bool outside_txn = (!env->me_txn0 || env->me_txn0->mt_owner != mdbx_thread_self()); @@ -4262,7 +4274,9 @@ int mdbx_txn_commit(MDBX_txn *txn) { } if (unlikely(rc != MDBX_SUCCESS)) goto fail; - env->me_lck->mti_readers_refresh_flag = false; + + if (likely(env->me_lck)) + env->me_lck->mti_readers_refresh_flag = false; end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE; done: @@ -4589,11 +4603,10 @@ static MDBX_page *__cold mdbx_init_metas(const MDBX_env *env, void *buffer) { MDBX_page *page1 = mdbx_meta_model(env, page0, 0); MDBX_page *page2 = mdbx_meta_model(env, page1, 1); mdbx_meta_model(env, page2, 2); - page2->mp_meta.mm_datasync_sign = MDBX_DATASIGN_WEAK; mdbx_assert(env, !mdbx_meta_eq(env, &page0->mp_meta, &page1->mp_meta)); mdbx_assert(env, !mdbx_meta_eq(env, &page1->mp_meta, &page2->mp_meta)); mdbx_assert(env, !mdbx_meta_eq(env, &page2->mp_meta, &page0->mp_meta)); - return page1; + return page2; } static int mdbx_sync_locked(MDBX_env *env, unsigned flags, @@ -4729,9 +4742,8 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, mdbx_assert(env, !mdbx_meta_eq(env, pending, meta2)); mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); - mdbx_ensure(env, - target == head || - mdbx_meta_txnid_stable(env, target) < pending->mm_txnid_a); + mdbx_ensure(env, target == head || mdbx_meta_txnid_stable(env, target) < + pending->mm_txnid_a); if (env->me_flags & MDBX_WRITEMAP) { mdbx_jitter4testing(true); if (likely(target != head)) { @@ -4917,6 +4929,12 @@ int __cold mdbx_env_create(MDBX_env **penv) { mdbx_fastmutex_destroy(&env->me_dbi_lock); goto bailout; } + rc = mdbx_fastmutex_init(&env->me_lckless_wmutex); + if (unlikely(rc != MDBX_SUCCESS)) { + mdbx_fastmutex_destroy(&env->me_remap_guard); + mdbx_fastmutex_destroy(&env->me_dbi_lock); + goto bailout; + } #endif /* Windows */ VALGRIND_CREATE_MEMPOOL(env, 0, 0); @@ -5580,18 +5598,30 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd == INVALID_HANDLE_VALUE); - uint32_t bExclusive = (env->me_flags & MDBX_EXCLUSIVE); - int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd, - (bExclusive) ? true : false); + const int open_flags = + (env->me_flags & MDBX_EXCLUSIVE) ? O_RDWR : O_RDWR | O_CREAT; + int err = mdbx_openfile(lck_pathname, open_flags, mode, &env->me_lfd, + (env->me_flags & MDBX_EXCLUSIVE) ? true : false); if (err != MDBX_SUCCESS) { - if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0) + if (!(err == MDBX_ENOFILE && (env->me_flags & MDBX_EXCLUSIVE)) && + !(err == MDBX_EROFS && (env->me_flags & MDBX_RDONLY))) return err; - /* LY: without-lck mode (e.g. on read-only filesystem) */ + + /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ env->me_lfd = INVALID_HANDLE_VALUE; + const int rc = mdbx_lck_seize(env); + if (MDBX_IS_ERROR(rc)) + return rc; + env->me_oldest = &env->me_oldest_stub; env->me_maxreaders = UINT_MAX; - mdbx_debug("lck-setup: %s ", "lockless mode (readonly)"); - return MDBX_SUCCESS; +#ifdef MDBX_OSAL_LOCK + env->me_wmutex = &env->me_lckless_wmutex; +#endif + mdbx_debug("lck-setup:%s%s%s", " lck-less", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + return rc; } /* Try to get exclusive lock. If we succeed, then @@ -5600,8 +5630,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, if (MDBX_IS_ERROR(rc)) return rc; - mdbx_debug("lck-setup: %s ", - (rc == MDBX_RESULT_TRUE) ? "exclusive" : "shared"); + mdbx_debug("lck-setup:%s%s%s", " with-lck", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); uint64_t size; err = mdbx_filesize(env->me_lfd, &size); @@ -5626,7 +5657,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, size = wanna; } } else { - if (bExclusive) + if (env->me_flags & MDBX_EXCLUSIVE) return MDBX_BUSY; if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || size < env->me_os_psize) { @@ -5643,8 +5674,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, } env->me_maxreaders = (unsigned)maxreaders; - err = mdbx_mmap(MDBX_WRITEMAP | bExclusive, &env->me_lck_mmap, (size_t)size, - (size_t)size); + err = mdbx_mmap(MDBX_WRITEMAP, &env->me_lck_mmap, (size_t)size, (size_t)size); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -5693,6 +5723,9 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, mdbx_assert(env, !MDBX_IS_ERROR(rc)); env->me_oldest = &env->me_lck->mti_oldest; +#ifdef MDBX_OSAL_LOCK + env->me_wmutex = &env->me_lck->mti_wmutex; +#endif return rc; } @@ -5798,52 +5831,54 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, } mdbx_debug("opened dbenv %p", (void *)env); - const unsigned mode_flags = - MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC; - if (lck_rc == MDBX_RESULT_TRUE) { - env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY); - if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { - /* LY: downgrade lock only if exclusive access not requested. - * in case exclusive==1, just leave value as is. */ - rc = mdbx_lck_downgrade(env, true); - mdbx_debug("lck-downgrade-full: rc %i ", rc); - } else { - rc = mdbx_lck_downgrade(env, false); - mdbx_debug("lck-downgrade-partial: rc %i ", rc); - } - if (rc != MDBX_SUCCESS) - goto bailout; - } else { - if ((env->me_flags & MDBX_RDONLY) == 0) { - while (env->me_lck->mti_envmode == MDBX_RDONLY) { - if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode, - MDBX_RDONLY, - env->me_flags & mode_flags)) - break; - /* TODO: yield/relax cpu */ + if (env->me_lck) { + const unsigned mode_flags = + MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC; + if (lck_rc == MDBX_RESULT_TRUE) { + env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY); + if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { + /* LY: downgrade lock only if exclusive access not requested. + * in case exclusive==1, just leave value as is. */ + rc = mdbx_lck_downgrade(env, true); + mdbx_debug("lck-downgrade-full: rc %i ", rc); + } else { + rc = mdbx_lck_downgrade(env, false); + mdbx_debug("lck-downgrade-partial: rc %i ", rc); } - if ((env->me_lck->mti_envmode ^ env->me_flags) & mode_flags) { - mdbx_error("current mode/flags incompatible with requested"); - rc = MDBX_INCOMPATIBLE; + if (rc != MDBX_SUCCESS) goto bailout; + } else { + if ((env->me_flags & MDBX_RDONLY) == 0) { + while (env->me_lck->mti_envmode == MDBX_RDONLY) { + if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode, + MDBX_RDONLY, + env->me_flags & mode_flags)) + break; + /* TODO: yield/relax cpu */ + } + if ((env->me_lck->mti_envmode ^ env->me_flags) & mode_flags) { + mdbx_error("current mode/flags incompatible with requested"); + rc = MDBX_INCOMPATIBLE; + goto bailout; + } } } - } - if (env->me_lck && (env->me_flags & MDBX_NOTLS) == 0) { - rc = mdbx_rthc_alloc(&env->me_txkey, &env->me_lck->mti_readers[0], - &env->me_lck->mti_readers[env->me_maxreaders]); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - env->me_flags |= MDBX_ENV_TXKEY; + if ((env->me_flags & MDBX_NOTLS) == 0) { + rc = mdbx_rthc_alloc(&env->me_txkey, &env->me_lck->mti_readers[0], + &env->me_lck->mti_readers[env->me_maxreaders]); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + env->me_flags |= MDBX_ENV_TXKEY; + } } if ((flags & MDBX_RDONLY) == 0) { MDBX_txn *txn; int tsize = sizeof(MDBX_txn), - size = tsize + - env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + - sizeof(unsigned) + 1); + size = + tsize + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + + sizeof(unsigned) + 1); if ((env->me_pbuf = calloc(1, env->me_psize)) && (txn = calloc(1, size))) { txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); @@ -5946,7 +5981,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) { if (unlikely(env->me_signature != MDBX_ME_SIGNATURE)) return MDBX_EBADSIGN; - if (env->me_lck && (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0) { + if ((env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0) { if (env->me_txn0 && env->me_txn0->mt_owner && env->me_txn0->mt_owner != mdbx_thread_self()) return MDBX_BUSY; @@ -5982,6 +6017,8 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) { DeleteCriticalSection(&env->me_windowsbug_lock); #else mdbx_ensure(env, + mdbx_fastmutex_destroy(&env->me_lckless_wmutex) == MDBX_SUCCESS); + mdbx_ensure(env, mdbx_fastmutex_destroy(&env->me_remap_guard) == MDBX_SUCCESS); #endif /* Windows */ @@ -6087,7 +6124,7 @@ static int __hot mdbx_cmp_int_ua(const MDBX_val *a, const MDBX_val *b) { } while (pa != a->iov_base); return diff; } -#else /* __BYTE_ORDER__ */ +#else /* __BYTE_ORDER__ */ return memcmp(a->iov_base, b->iov_base, a->iov_len); #endif /* __BYTE_ORDER__ */ #endif /* UNALIGNED_OK */ @@ -6317,7 +6354,7 @@ static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret, mapped: p = pgno2page(env, pgno); -/* TODO: check p->mp_validator here */ + /* TODO: check p->mp_validator here */ done: *ret = p; @@ -7718,6 +7755,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, offset = env->me_psize - (unsigned)olddata.iov_len; flags |= F_DUPDATA | F_SUBDATA; dummy.md_root = mp->mp_pgno; + dummy.md_seq = dummy.md_merkle = 0; sub_root = mp; } if (mp != fp) { @@ -10040,9 +10078,8 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return MDBX_EINVAL; - if (unlikely(flags & - ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | - MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | + MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) return MDBX_EINVAL; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) @@ -10086,8 +10123,8 @@ typedef struct mdbx_copy { MDBX_env *mc_env; MDBX_txn *mc_txn; mdbx_condmutex_t mc_condmutex; - char *mc_wbuf[2]; - char *mc_over[2]; + uint8_t *mc_wbuf[2]; + uint8_t *mc_over[2]; size_t mc_wlen[2]; size_t mc_olen[2]; mdbx_filehandle_t mc_fd; @@ -10102,7 +10139,7 @@ typedef struct mdbx_copy { /* Dedicated writer thread for compacting copy. */ static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) { mdbx_copy *my = arg; - char *ptr; + uint8_t *ptr; int toggle = 0; int rc; @@ -10263,7 +10300,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { my->mc_wlen[toggle] += my->mc_env->me_psize; if (omp->mp_pages > 1) { my->mc_olen[toggle] = pgno2bytes(my->mc_env, omp->mp_pages - 1); - my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize; + my->mc_over[toggle] = (uint8_t *)omp + my->mc_env->me_psize; rc = mdbx_env_cthr_toggle(my, 1); if (unlikely(rc != MDBX_SUCCESS)) goto done; @@ -10343,23 +10380,26 @@ done: static int __cold mdbx_env_compact(MDBX_env *env, mdbx_filehandle_t fd) { MDBX_txn *txn = NULL; mdbx_thread_t thr; - mdbx_copy my; - memset(&my, 0, sizeof(my)); + mdbx_copy ctx; + memset(&ctx, 0, sizeof(ctx)); - int rc = mdbx_condmutex_init(&my.mc_condmutex); + int rc = mdbx_condmutex_init(&ctx.mc_condmutex); if (unlikely(rc != MDBX_SUCCESS)) return rc; - rc = mdbx_memalign_alloc(env->me_os_psize, MDBX_WBUF * 2, - (void **)&my.mc_wbuf[0]); + + const size_t buffer_size = pgno2bytes(env, NUM_METAS) + MDBX_WBUF * 2; + uint8_t *buffer = NULL; + rc = mdbx_memalign_alloc(env->me_os_psize, buffer_size, (void **)&buffer); if (unlikely(rc != MDBX_SUCCESS)) goto done; - memset(my.mc_wbuf[0], 0, MDBX_WBUF * 2); - my.mc_wbuf[1] = my.mc_wbuf[0] + MDBX_WBUF; - my.mc_next_pgno = NUM_METAS; - my.mc_env = env; - my.mc_fd = fd; - rc = mdbx_thread_create(&thr, mdbx_env_copythr, &my); + ctx.mc_wbuf[0] = buffer + pgno2bytes(env, NUM_METAS); + memset(ctx.mc_wbuf[0], 0, MDBX_WBUF * 2); + ctx.mc_wbuf[1] = ctx.mc_wbuf[0] + MDBX_WBUF; + ctx.mc_next_pgno = NUM_METAS; + ctx.mc_env = env; + ctx.mc_fd = fd; + rc = mdbx_thread_create(&thr, mdbx_env_copythr, &ctx); if (unlikely(rc != MDBX_SUCCESS)) goto done; @@ -10367,7 +10407,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, mdbx_filehandle_t fd) { if (unlikely(rc != MDBX_SUCCESS)) goto finish; - MDBX_page *meta = mdbx_init_metas(env, my.mc_wbuf[0]); + MDBX_page *const meta = mdbx_init_metas(env, buffer); /* Set metapage 1 with current main DB */ pgno_t new_root, root = txn->mt_dbs[MAIN_DBI].md_root; @@ -10409,25 +10449,38 @@ static int __cold mdbx_env_compact(MDBX_env *env, mdbx_filehandle_t fd) { /* update signature */ meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta); - my.mc_wlen[0] = pgno2bytes(env, NUM_METAS); - my.mc_txn = txn; - rc = mdbx_env_cwalk(&my, &root, 0); + ctx.mc_wlen[0] = pgno2bytes(env, NUM_METAS); + ctx.mc_txn = txn; + rc = mdbx_env_cwalk(&ctx, &root, 0); if (rc == MDBX_SUCCESS && root != new_root) { - mdbx_error("unexpected root %" PRIaPGNO " (%" PRIaPGNO ")", root, new_root); - rc = MDBX_PROBLEM; /* page leak or corrupt DB */ + if (root > new_root) { + mdbx_error("post-compactification root %" PRIaPGNO + " GT expected %" PRIaPGNO " (source DB corrupted)", + root, new_root); + rc = MDBX_CORRUPTED; /* page leak or corrupt DB */ + } else { + mdbx_error("post-compactification root %" PRIaPGNO + " LT expected %" PRIaPGNO " (page leak(s) in source DB)", + root, new_root); + /* fixup and rewrite metas */ + meta->mp_meta.mm_dbs[MAIN_DBI].md_root = root; + meta->mp_meta.mm_geo.next = meta->mp_meta.mm_geo.now = root + 1; + meta->mp_meta.mm_datasync_sign = mdbx_meta_sign(&meta->mp_meta); + rc = mdbx_pwrite(fd, buffer, pgno2bytes(env, NUM_METAS), 0); + } } finish: if (rc != MDBX_SUCCESS) - my.mc_error = rc; - mdbx_env_cthr_toggle(&my, 1 | MDBX_EOF); + ctx.mc_error = rc; + mdbx_env_cthr_toggle(&ctx, 1 | MDBX_EOF); rc = mdbx_thread_join(thr); mdbx_txn_abort(txn); done: - mdbx_memalign_free(my.mc_wbuf[0]); - mdbx_condmutex_destroy(&my.mc_condmutex); - return rc ? rc : my.mc_error; + mdbx_memalign_free(buffer); + mdbx_condmutex_destroy(&ctx.mc_condmutex); + return rc ? rc : ctx.mc_error; } /* Copy environment as-is. */ @@ -10482,25 +10535,25 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, return mdbx_env_copy_asis(env, fd); } -int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) { - char *lck_pathname; +int __cold mdbx_env_copy(MDBX_env *env, const char *dest_path, unsigned flags) { + char *dxb_pathname; mdbx_filehandle_t newfd = INVALID_HANDLE_VALUE; if (env->me_flags & MDBX_NOSUBDIR) { - lck_pathname = (char *)path; + dxb_pathname = (char *)dest_path; } else { - size_t len = strlen(path); + size_t len = strlen(dest_path); len += sizeof(MDBX_DATANAME); - lck_pathname = malloc(len); - if (!lck_pathname) + dxb_pathname = malloc(len); + if (!dxb_pathname) return MDBX_ENOMEM; - sprintf(lck_pathname, "%s" MDBX_DATANAME, path); + sprintf(dxb_pathname, "%s" MDBX_DATANAME, dest_path); } /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ - int rc = mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, + int rc = mdbx_openfile(dxb_pathname, O_WRONLY | O_CREAT | O_EXCL, 0640, &newfd, true); if (rc == MDBX_SUCCESS) { if (env->me_psize >= env->me_os_psize) { @@ -10515,15 +10568,17 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) { rc = mdbx_env_copy2fd(env, newfd, flags); } - if (!(env->me_flags & MDBX_NOSUBDIR)) - free(lck_pathname); - if (newfd != INVALID_HANDLE_VALUE) { int err = mdbx_closefile(newfd); if (rc == MDBX_SUCCESS && err != rc) rc = err; + if (rc != MDBX_SUCCESS) + (void)mdbx_removefile(dxb_pathname); } + if (dxb_pathname != dest_path) + free(dxb_pathname); + return rc; } @@ -10682,9 +10737,9 @@ int __cold mdbx_env_info(MDBX_env *env, MDBX_envinfo *arg, size_t bytes) { } static MDBX_cmp_func *mdbx_default_keycmp(unsigned flags) { - return (flags & MDBX_REVERSEKEY) ? mdbx_cmp_memnr : (flags & MDBX_INTEGERKEY) - ? mdbx_cmp_int_a2 - : mdbx_cmp_memn; + return (flags & MDBX_REVERSEKEY) + ? mdbx_cmp_memnr + : (flags & MDBX_INTEGERKEY) ? mdbx_cmp_int_a2 : mdbx_cmp_memn; } static MDBX_cmp_func *mdbx_default_datacmp(unsigned flags) { @@ -11220,28 +11275,31 @@ int __cold mdbx_reader_list(MDBX_env *env, MDBX_msg_func *func, void *ctx) { return MDBX_EBADSIGN; const MDBX_lockinfo *const lck = env->me_lck; - const unsigned snap_nreaders = lck->mti_numreaders; - for (unsigned i = 0; i < snap_nreaders; i++) { - if (lck->mti_readers[i].mr_pid) { - const txnid_t txnid = lck->mti_readers[i].mr_txnid; - if (txnid == ~(txnid_t)0) - snprintf(buf, sizeof(buf), "%10" PRIuPTR " %" PRIxPTR " -\n", - (uintptr_t)lck->mti_readers[i].mr_pid, - (uintptr_t)lck->mti_readers[i].mr_tid); - else - snprintf(buf, sizeof(buf), "%10" PRIuPTR " %" PRIxPTR " %" PRIaTXN "\n", - (uintptr_t)lck->mti_readers[i].mr_pid, - (uintptr_t)lck->mti_readers[i].mr_tid, txnid); - - if (first) { - first = 0; - rc = func(" pid thread txnid\n", ctx); + if (likely(lck)) { + const unsigned snap_nreaders = lck->mti_numreaders; + for (unsigned i = 0; i < snap_nreaders; i++) { + if (lck->mti_readers[i].mr_pid) { + const txnid_t txnid = lck->mti_readers[i].mr_txnid; + if (txnid == ~(txnid_t)0) + snprintf(buf, sizeof(buf), "%10" PRIuPTR " %" PRIxPTR " -\n", + (uintptr_t)lck->mti_readers[i].mr_pid, + (uintptr_t)lck->mti_readers[i].mr_tid); + else + snprintf(buf, sizeof(buf), + "%10" PRIuPTR " %" PRIxPTR " %" PRIaTXN "\n", + (uintptr_t)lck->mti_readers[i].mr_pid, + (uintptr_t)lck->mti_readers[i].mr_tid, txnid); + + if (first) { + first = 0; + rc = func(" pid thread txnid\n", ctx); + if (rc < 0) + break; + } + rc = func(buf, ctx); if (rc < 0) break; } - rc = func(buf, ctx); - if (rc < 0) - break; } } if (first) @@ -11306,6 +11364,13 @@ int __cold mdbx_reader_check0(MDBX_env *env, int rdt_locked, int *dead) { } MDBX_lockinfo *const lck = env->me_lck; + if (unlikely(lck == NULL)) { + /* exclusive mode */ + if (dead) + *dead = 0; + return MDBX_SUCCESS; + } + const unsigned snap_nreaders = lck->mti_numreaders; mdbx_pid_t *pids = alloca((snap_nreaders + 1) * sizeof(mdbx_pid_t)); pids[0] = 0; @@ -11420,7 +11485,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) { mdbx_assert(env, oldest < env->me_txn0->mt_txnid); mdbx_assert(env, oldest >= laggard); mdbx_assert(env, oldest >= env->me_oldest[0]); - if (oldest == laggard) + if (oldest == laggard || unlikely(env->me_lck == NULL /* exclusive mode */)) return oldest; if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL))) @@ -11874,9 +11939,8 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *new_data, if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return MDBX_EINVAL; - if (unlikely(flags & - ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | - MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | + MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) return MDBX_EINVAL; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/osal.c b/plugins/Dbx_mdbx/src/libmdbx/src/osal.c index 56df979872..c2fced0b4a 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/osal.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/osal.c @@ -1,4 +1,4 @@ -/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ +/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ /* * Copyright 2015-2018 Leonid Yuriev <leo@yuriev.ru> @@ -191,7 +191,7 @@ __cold void mdbx_panic(const char *fmt, ...) { abort(); } - /*----------------------------------------------------------------------------*/ +/*----------------------------------------------------------------------------*/ #ifndef mdbx_asprintf int mdbx_asprintf(char **strp, const char *fmt, ...) { @@ -408,6 +408,13 @@ int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) { /*----------------------------------------------------------------------------*/ +int mdbx_removefile(const char *pathname) { +#if defined(_WIN32) || defined(_WIN64) + return DeleteFileA(pathname) ? MDBX_SUCCESS : GetLastError(); +#else + return unlink(pathname) ? errno : MDBX_SUCCESS; +#endif +} int mdbx_openfile(const char *pathname, int flags, mode_t mode, mdbx_filehandle_t *fd, bool exclusive) { *fd = INVALID_HANDLE_VALUE; @@ -431,8 +438,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode, break; case O_RDWR: DesiredAccess = GENERIC_READ | GENERIC_WRITE; - ShareMode = - exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE); + ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); break; } @@ -856,13 +862,11 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { map->section = NULL; map->address = nullptr; - if (!(flags & MDBX_EXCLUSIVE)) { - NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags); - if (rc != MDBX_SUCCESS) - return rc; - } + NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags); + if (rc != MDBX_SUCCESS) + return rc; - NTSTATUS rc = mdbx_filesize(map->fd, &map->filesize); + rc = mdbx_filesize(map->fd, &map->filesize); if (rc != MDBX_SUCCESS) return rc; if ((flags & MDBX_RDONLY) == 0 && map->filesize != size) { @@ -878,13 +882,13 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { rc = NtCreateSection( &map->section, /* DesiredAccess */ - (flags & MDBX_WRITEMAP) + (flags & MDBX_WRITEMAP) ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, /* ObjectAttributes */ NULL, /* MaximumSize (InitialSize) */ &SectionSize, /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(rc)) return ntstatus2errcode(rc); @@ -898,7 +902,7 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); if (!NT_SUCCESS(rc)) { NtClose(map->section); map->section = 0; @@ -933,11 +937,6 @@ int mdbx_munmap(mdbx_mmap_t *map) { if (!NT_SUCCESS(rc)) ntstatus2errcode(rc); - if (map->filesize != map->current && - mdbx_filesize(map->fd, &map->filesize) == MDBX_SUCCESS && - map->filesize != map->current) - (void)mdbx_ftruncate(map->fd, map->current); - map->length = 0; map->current = 0; map->address = nullptr; @@ -963,8 +962,11 @@ int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { /* growth rw-section */ SectionSize.QuadPart = size; status = NtExtendSection(map->section, &SectionSize); - if (NT_SUCCESS(status)) - map->filesize = map->current = size; + if (NT_SUCCESS(status)) { + map->current = size; + if (map->filesize < size) + map->filesize = size; + } return ntstatus2errcode(status); } @@ -1040,14 +1042,14 @@ retry_file_and_section: status = NtCreateSection( &map->section, /* DesiredAccess */ - (flags & MDBX_WRITEMAP) + (flags & MDBX_WRITEMAP) ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, /* ObjectAttributes */ NULL, /* MaximumSize (InitialSize) */ &SectionSize, /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(status)) @@ -1072,7 +1074,7 @@ retry_mapview:; /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); if (!NT_SUCCESS(status)) { if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/osal.h b/plugins/Dbx_mdbx/src/libmdbx/src/osal.h index 41e53222d7..daa79064f9 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/osal.h +++ b/plugins/Dbx_mdbx/src/libmdbx/src/osal.h @@ -1,4 +1,4 @@ -/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ +/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ /* * Copyright 2015-2018 Leonid Yuriev <leo@yuriev.ru> @@ -479,6 +479,7 @@ int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length); int mdbx_openfile(const char *pathname, int flags, mode_t mode, mdbx_filehandle_t *fd, bool exclusive); int mdbx_closefile(mdbx_filehandle_t fd); +int mdbx_removefile(const char *pathname); typedef struct mdbx_mmap_param { union { diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/tools/mdbx_chk.c b/plugins/Dbx_mdbx/src/libmdbx/src/tools/mdbx_chk.c index 0fd23ae69f..51096c4053 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/tools/mdbx_chk.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/tools/mdbx_chk.c @@ -73,8 +73,7 @@ struct { } walk; uint64_t total_unused_bytes; -int exclusive = 2; -int envflags = MDBX_RDONLY; +int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE; MDBX_env *env; MDBX_txn *txn; @@ -706,7 +705,7 @@ void verbose_meta(int num, txnid_t txnid, uint64_t sign) { print(", stay"); if (txnid > envinfo.mi_recent_txnid && - (exclusive || (envflags & MDBX_RDONLY) == 0)) + (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); print("\n"); @@ -805,7 +804,7 @@ int main(int argc, char *argv[]) { envflags &= ~MDBX_RDONLY; break; case 'c': - exclusive = 0; + envflags &= ~MDBX_EXCLUSIVE; break; case 'd': dont_traversal = 1; @@ -853,7 +852,19 @@ int main(int argc, char *argv[]) { goto bailout; } - rc = mdbx_env_open_ex(env, envname, envflags, 0664, &exclusive); + rc = mdbx_env_open(env, envname, envflags, 0664); + if ((envflags & MDBX_EXCLUSIVE) && + (rc == MDBX_BUSY || +#if defined(_WIN32) || defined(_WIN64) + rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION +#else + rc == EBUSY +#endif + )) { + envflags &= ~MDBX_EXCLUSIVE; + rc = mdbx_env_open(env, envname, envflags, 0664); + } + if (rc) { error("mdbx_env_open failed, error %d %s\n", rc, mdbx_strerror(rc)); if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) @@ -861,7 +872,8 @@ int main(int argc, char *argv[]) { goto bailout; } if (verbose) - print(" - %s mode\n", exclusive ? "monopolistic" : "cooperative"); + print(" - %s mode\n", + (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if ((envflags & MDBX_RDONLY) == 0) { rc = mdbx_txn_lock(env, false); @@ -946,7 +958,7 @@ int main(int argc, char *argv[]) { ++problems_meta; } - if (exclusive > 1) { + if (envflags & MDBX_EXCLUSIVE) { if (verbose) print(" - performs full check recent-txn-id with meta-pages\n"); problems_meta += check_meta_head(true); @@ -1079,7 +1091,8 @@ int main(int argc, char *argv[]) { } if (problems_maindb == 0 && problems_freedb == 0) { - if (!dont_traversal && (exclusive || (envflags & MDBX_RDONLY) == 0)) { + if (!dont_traversal && + (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { if (walk.pgcount != lastpgno - freedb_pages) { error("used pages mismatch (%" PRIu64 " != %" PRIu64 ")\n", walk.pgcount, lastpgno - freedb_pages); diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/version.c b/plugins/Dbx_mdbx/src/libmdbx/src/version.c index aeb15bed4b..dfb4a5724c 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/version.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/version.c @@ -14,12 +14,12 @@ #include "./bits.h" -#if MDBX_VERSION_MAJOR != 0 || MDBX_VERSION_MINOR != 1 +#if MDBX_VERSION_MAJOR != 0 || MDBX_VERSION_MINOR != 2 #error "API version mismatch!" #endif -#define MDBX_VERSION_RELEASE 5 -#define MDBX_VERSION_REVISION 1 +#define MDBX_VERSION_RELEASE 0 +#define MDBX_VERSION_REVISION 2 /*LIBMDBX_EXPORTS*/ const mdbx_version_info mdbx_version = { MDBX_VERSION_MAJOR, diff --git a/plugins/Dbx_mdbx/src/libmdbx/test/base.h b/plugins/Dbx_mdbx/src/libmdbx/test/base.h index bc82ff26c0..b23f776aa3 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/test/base.h +++ b/plugins/Dbx_mdbx/src/libmdbx/test/base.h @@ -80,6 +80,10 @@ #include "../src/defs.h" #include "../src/osal.h" +#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__)) +#define __thread __declspec(thread) +#endif /* __thread */ + #ifdef _MSC_VER #pragma warning(pop) #pragma warning(disable : 4201) /* nonstandard extension used : \ |