diff options
author | George Hazan <ghazan@miranda.im> | 2018-06-20 16:16:03 +0300 |
---|---|---|
committer | George Hazan <ghazan@miranda.im> | 2018-06-20 16:16:03 +0300 |
commit | f559ca1427b16c1d9192f554490759a753c3bf3b (patch) | |
tree | bf0236f6603b3a4ddcf3e099b7de00b05e456275 /plugins/Dbx_mdbx | |
parent | 46ba46c99b1bf6849e9b8914ac9ed38aadeaf8e0 (diff) |
merge with libmdbx/xp
Diffstat (limited to 'plugins/Dbx_mdbx')
-rw-r--r-- | plugins/Dbx_mdbx/dbx_mdbx.vcxproj | 1 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/dbintf.cpp | 2 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/init.cpp | 4 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/README-RU.md | 4 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/README.md | 4 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/mdbx.h | 34 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/bits.h | 42 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/defs.h | 4 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c | 272 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c | 115 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/osal.c | 212 | ||||
-rw-r--r-- | plugins/Dbx_mdbx/src/libmdbx/src/osal.h | 66 |
12 files changed, 512 insertions, 248 deletions
diff --git a/plugins/Dbx_mdbx/dbx_mdbx.vcxproj b/plugins/Dbx_mdbx/dbx_mdbx.vcxproj index 54c0214bd5..0ef93cb590 100644 --- a/plugins/Dbx_mdbx/dbx_mdbx.vcxproj +++ b/plugins/Dbx_mdbx/dbx_mdbx.vcxproj @@ -46,6 +46,7 @@ <ClCompile>
<ExceptionHandling>Sync</ExceptionHandling>
<PreprocessorDefinitions Condition="'$(Configuration)'=='Debug'">MDB_DEBUG=5;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>MDBX_CONFIG_MANUAL_TLS_CALLBACK=1;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
</ItemDefinitionGroup>
</Project>
\ No newline at end of file diff --git a/plugins/Dbx_mdbx/src/dbintf.cpp b/plugins/Dbx_mdbx/src/dbintf.cpp index 5f8eaa4398..9fffe8f732 100644 --- a/plugins/Dbx_mdbx/src/dbintf.cpp +++ b/plugins/Dbx_mdbx/src/dbintf.cpp @@ -244,7 +244,7 @@ int CDbxMDBX::Map() if (rc != MDBX_SUCCESS)
return EGROKPRF_CANTREAD;
- unsigned int mode = MDBX_NOSUBDIR | MDBX_MAPASYNC | MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_COALESCE;
+ unsigned int mode = MDBX_NOSUBDIR | MDBX_MAPASYNC | MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_COALESCE | MDBX_EXCLUSIVE;
if (m_bReadOnly)
mode |= MDBX_RDONLY;
diff --git a/plugins/Dbx_mdbx/src/init.cpp b/plugins/Dbx_mdbx/src/init.cpp index 6a85913bde..8998211776 100644 --- a/plugins/Dbx_mdbx/src/init.cpp +++ b/plugins/Dbx_mdbx/src/init.cpp @@ -23,8 +23,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include "stdafx.h"
-EXTERN_C void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved);
-
CMPlugin g_plugin;
/////////////////////////////////////////////////////////////////////////////////////////
@@ -53,7 +51,7 @@ EXTERN_C void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved); BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD reason, LPVOID reserved)
{
- tls_callback(hInstDLL, reason, reserved);
+ mdbx_dll_callback(hInstDLL, reason, reserved);
return TRUE;
}
diff --git a/plugins/Dbx_mdbx/src/libmdbx/README-RU.md b/plugins/Dbx_mdbx/src/libmdbx/README-RU.md index 233545552d..f4ae5e8f14 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/README-RU.md +++ b/plugins/Dbx_mdbx/src/libmdbx/README-RU.md @@ -591,8 +591,8 @@ _libmdbx_ при этом не ведет WAL, а передает весь ко 13. Исправленный вариант `mdbx_cursor_count()`, возвращающий корректное количество дубликатов для всех типов таблиц и любого положения курсора. -14. Возможность открыть БД в эксклюзивном режиме посредством -`mdbx_env_open_ex()`, например в целях её проверки. +14. Возможность открыть БД в эксклюзивном режиме посредством флага +`MDBX_EXCLUSIVE`, например в целях её проверки. 15. Возможность закрыть БД в "грязном" состоянии (без сброса данных и формирования сильной точки фиксации) посредством `mdbx_env_close_ex()`. diff --git a/plugins/Dbx_mdbx/src/libmdbx/README.md b/plugins/Dbx_mdbx/src/libmdbx/README.md index 4820975679..1a046717ee 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/README.md +++ b/plugins/Dbx_mdbx/src/libmdbx/README.md @@ -20,8 +20,6 @@ libmdbx 6. [Asynchronous lazy data flushing](https://sites.fas.harvard.edu/~cs265/papers/kathuria-2008.pdf) to disk(s); 7. etc... -Don't miss [Java Native Interface](https://github.com/castortech/mdbxjni) by [Castor Technologies](https://castortech.com/). - ----- Nowadays MDBX intended for Linux, and support Windows (since @@ -405,7 +403,7 @@ Improvements over LMDB 13. Fixed `mdbx_cursor_count()`, which returns correct count of duplicated for all table types and any cursor position. -14. Ability to open DB in exclusive mode via `mdbx_env_open_ex()`, e.g. for integrity check. +14. Ability to open DB in exclusive mode with `MDBX_EXCLUSIVE` flag, e.g. for integrity check. 15. Ability to close DB in "dirty" state (without data flush and creation of steady synchronization point) via `mdbx_env_close_ex()`. diff --git a/plugins/Dbx_mdbx/src/libmdbx/mdbx.h b/plugins/Dbx_mdbx/src/libmdbx/mdbx.h index 9758fe5738..2f28623424 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/mdbx.h +++ b/plugins/Dbx_mdbx/src/libmdbx/mdbx.h @@ -85,6 +85,11 @@ #include <windows.h> #include <winnt.h> + +#ifndef FSCTL_GET_EXTERNAL_BACKING +#define FSCTL_GET_EXTERNAL_BACKING CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 196, METHOD_BUFFERED, FILE_ANY_ACCESS) +#endif + #ifndef __mode_t_defined typedef unsigned short mode_t; #endif @@ -203,6 +208,24 @@ typedef struct mdbx_build_info { extern LIBMDBX_API const mdbx_version_info mdbx_version; extern LIBMDBX_API const mdbx_build_info mdbx_build; +#if defined(_WIN32) || defined(_WIN64) + +/* Dll initialization callback for ability to dynamically load MDBX DLL by + * LoadLibrary() on Windows versions before Windows Vista. This function MUST be + * called once from DllMain() for each reason (DLL_PROCESS_ATTACH, + * DLL_PROCESS_DETACH, DLL_THREAD_ATTACH and DLL_THREAD_DETACH). Do this + * carefully and ONLY when actual Windows version don't support initialization + * via "TLS Directory" (e.g .CRT$XL[A-Z] sections in executable or dll file). */ + +#ifndef MDBX_CONFIG_MANUAL_TLS_CALLBACK +#define MDBX_CONFIG_MANUAL_TLS_CALLBACK 0 +#endif +#if MDBX_CONFIG_MANUAL_TLS_CALLBACK +void LIBMDBX_API NTAPI mdbx_dll_callback(PVOID module, DWORD reason, + PVOID reserved); +#endif /* MDBX_CONFIG_MANUAL_TLS_CALLBACK */ +#endif /* Windows */ + /* The name of the lock file in the DB environment */ #define MDBX_LOCKNAME "/mdbx.lck" /* The name of the data file in the DB environment */ @@ -270,9 +293,8 @@ typedef int(MDBX_cmp_func)(const MDBX_val *a, const MDBX_val *b); #define MDBX_MAPASYNC 0x100000u /* tie reader locktable slots to MDBX_txn objects instead of to threads */ #define MDBX_NOTLS 0x200000u -/* don't do any locking, caller must manage their own locks - * WARNING: libmdbx don't support this mode. */ -#define MDBX_NOLOCK__UNSUPPORTED 0x400000u +/* open DB in exclusive/monopolistic mode. */ +#define MDBX_EXCLUSIVE 0x400000u /* don't do readahead */ #define MDBX_NORDAHEAD 0x800000u /* don't initialize malloc'd memory before writing to datafile */ @@ -652,8 +674,6 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv); * - MDBX_EAGAIN - the environment was locked by another process. */ LIBMDBX_API int mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, mode_t mode); -LIBMDBX_API int mdbx_env_open_ex(MDBX_env *env, const char *path, - unsigned flags, mode_t mode, int *exclusive); /* Copy an MDBX environment to the specified path, with options. * @@ -1637,9 +1657,7 @@ typedef int MDBX_pgvisitor_func(uint64_t pgno, unsigned pgnumber, void *ctx, LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, void *ctx); -typedef struct mdbx_canary { - uint64_t x, y, z, v; -} mdbx_canary; +typedef struct mdbx_canary { uint64_t x, y, z, v; } mdbx_canary; LIBMDBX_API int mdbx_canary_put(MDBX_txn *txn, const mdbx_canary *canary); LIBMDBX_API int mdbx_canary_get(MDBX_txn *txn, mdbx_canary *canary); diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/bits.h b/plugins/Dbx_mdbx/src/libmdbx/src/bits.h index ccd4a581b2..f535d749de 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/bits.h +++ b/plugins/Dbx_mdbx/src/libmdbx/src/bits.h @@ -23,11 +23,6 @@ # undef NDEBUG #endif -/* Features under development */ -#ifndef MDBX_DEVEL -# define MDBX_DEVEL 1 -#endif - /*----------------------------------------------------------------------------*/ /* Should be defined before any includes */ @@ -39,6 +34,9 @@ #endif #ifdef _MSC_VER +# if _MSC_VER < 1400 +# error "Microsoft Visual C++ 8.0 (Visual Studio 2005) or later version is required" +# endif # ifndef _CRT_SECURE_NO_WARNINGS # define _CRT_SECURE_NO_WARNINGS # endif @@ -144,9 +142,9 @@ #define MDBX_MAGIC UINT64_C(/* 56-bit prime */ 0x59659DBDEF4C11) /* The version number for a database's datafile format. */ -#define MDBX_DATA_VERSION ((MDBX_DEVEL) ? 255 : 2) +#define MDBX_DATA_VERSION 2 /* The version number for a database's lockfile format. */ -#define MDBX_LOCK_VERSION ((MDBX_DEVEL) ? 255 : 2) +#define MDBX_LOCK_VERSION 2 /* handle for the DB used to track free pages. */ #define FREE_DBI 0 @@ -372,19 +370,19 @@ typedef struct MDBX_page { #define PAGEHDRSZ ((unsigned)offsetof(MDBX_page, mp_data)) /* The maximum size of a database page. - * - * It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. - * - * MDBX will use database pages < OS pages if needed. - * That causes more I/O in write transactions: The OS must - * know (read) the whole page before writing a partial page. - * - * Note that we don't currently support Huge pages. On Linux, - * regular data files cannot use Huge pages, and in general - * Huge pages aren't actually pageable. We rely on the OS - * demand-pager to read our data and page it out when memory - * pressure from other processes is high. So until OSs have - * actual paging support for Huge pages, they're not viable. */ +* +* It is 64K, but value-PAGEHDRSZ must fit in MDBX_page.mp_upper. +* +* MDBX will use database pages < OS pages if needed. +* That causes more I/O in write transactions: The OS must +* know (read) the whole page before writing a partial page. +* +* Note that we don't currently support Huge pages. On Linux, +* regular data files cannot use Huge pages, and in general +* Huge pages aren't actually pageable. We rely on the OS +* demand-pager to read our data and page it out when memory +* pressure from other processes is high. So until OSs have +* actual paging support for Huge pages, they're not viable. */ #define MAX_PAGESIZE 0x10000u #define MIN_PAGESIZE 512u @@ -470,8 +468,10 @@ typedef struct MDBX_lockinfo { (uint16_t)(MDBX_LOCKINFO_WHOLE_SIZE + MDBX_CACHELINE_SIZE - 1)) #define MDBX_DATA_MAGIC ((MDBX_MAGIC << 8) + MDBX_DATA_VERSION) +#define MDBX_DATA_DEBUG ((MDBX_MAGIC << 8) + 255) #define MDBX_LOCK_MAGIC ((MDBX_MAGIC << 8) + MDBX_LOCK_VERSION) +#define MDBX_LOCK_DEBUG ((MDBX_MAGIC << 8) + 255) /*----------------------------------------------------------------------------*/ /* Two kind lists of pages (aka PNL) */ @@ -784,7 +784,7 @@ struct MDBX_env { } me_dbgeo; /* */ #if defined(_WIN32) || defined(_WIN64) - MDBX_shlock me_remap_guard; + MDBX_srwlock me_remap_guard; /* Workaround for LockFileEx and WriteFile multithread bug */ CRITICAL_SECTION me_windowsbug_lock; #else diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/defs.h b/plugins/Dbx_mdbx/src/libmdbx/src/defs.h index 6da5a96363..b6076cc1b3 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/defs.h +++ b/plugins/Dbx_mdbx/src/libmdbx/src/defs.h @@ -103,10 +103,6 @@ /*----------------------------------------------------------------------------*/ -#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__)) -# define __thread __declspec(thread) -#endif /* __thread */ - #ifndef __alwaysinline # if defined(__GNUC__) || __has_attribute(always_inline) # define __alwaysinline __inline __attribute__((always_inline)) diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c b/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c index 209cfa07df..6e53212b20 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/lck-windows.c @@ -24,13 +24,17 @@ * LY */ -/*----------------------------------------------------------------------------*/ -/* rthc */ +static void mdbx_winnt_import(void); -void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved) { +#if !MDBX_CONFIG_MANUAL_TLS_CALLBACK +static +#endif /* !MDBX_CONFIG_MANUAL_TLS_CALLBACK */ + void NTAPI + mdbx_dll_callback(PVOID module, DWORD reason, PVOID reserved) { (void)reserved; switch (reason) { case DLL_PROCESS_ATTACH: + mdbx_winnt_import(); mdbx_rthc_global_init(); break; case DLL_PROCESS_DETACH: @@ -45,6 +49,46 @@ void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved) { } } +#if !MDBX_CONFIG_MANUAL_TLS_CALLBACK +/* *INDENT-OFF* */ +/* clang-format off */ +#if defined(_MSC_VER) +# pragma const_seg(push) +# pragma data_seg(push) + +# ifdef _WIN64 + /* kick a linker to create the TLS directory if not already done */ +# pragma comment(linker, "/INCLUDE:_tls_used") + /* Force some symbol references. */ +# pragma comment(linker, "/INCLUDE:mdbx_tls_anchor") + /* specific const-segment for WIN64 */ +# pragma const_seg(".CRT$XLB") + const +# else + /* kick a linker to create the TLS directory if not already done */ +# pragma comment(linker, "/INCLUDE:__tls_used") + /* Force some symbol references. */ +# pragma comment(linker, "/INCLUDE:_mdbx_tls_anchor") + /* specific data-segment for WIN32 */ +# pragma data_seg(".CRT$XLB") +# endif + + __declspec(allocate(".CRT$XLB")) PIMAGE_TLS_CALLBACK mdbx_tls_anchor = mdbx_dll_callback; +# pragma data_seg(pop) +# pragma const_seg(pop) + +#elif defined(__GNUC__) +# ifdef _WIN64 + const +# endif + PIMAGE_TLS_CALLBACK mdbx_tls_anchor __attribute__((section(".CRT$XLB"), used)) = mdbx_dll_callback; +#else +# error FIXME +#endif +/* *INDENT-ON* */ +/* clang-format on */ +#endif /* !MDBX_CONFIG_MANUAL_TLS_CALLBACK */ + /*----------------------------------------------------------------------------*/ #define LCK_SHARED 0 @@ -54,11 +98,17 @@ void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved) { static __inline BOOL flock(mdbx_filehandle_t fd, DWORD flags, uint64_t offset, size_t bytes) { - return TRUE; + OVERLAPPED ov; + ov.hEvent = 0; + ov.Offset = (DWORD)offset; + ov.OffsetHigh = HIGH_DWORD(offset); + return LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov); } -static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset, size_t bytes) { - return TRUE; +static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset, + size_t bytes) { + return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes, + HIGH_DWORD(bytes)); } /*----------------------------------------------------------------------------*/ @@ -82,9 +132,9 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { EnterCriticalSection(&env->me_windowsbug_lock); } - if (flock(env->me_fd, - dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) - : (LCK_EXCLUSIVE | LCK_WAITFOR), + if ((env->me_flags & MDBX_EXCLUSIVE) || + flock(env->me_fd, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT) + : (LCK_EXCLUSIVE | LCK_WAITFOR), LCK_BODY)) return MDBX_SUCCESS; int rc = GetLastError(); @@ -93,7 +143,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } void mdbx_txn_unlock(MDBX_env *env) { - int rc = funlock(env->me_fd, LCK_BODY); + int rc = (env->me_flags & MDBX_EXCLUSIVE) ? TRUE + : funlock(env->me_fd, LCK_BODY); LeaveCriticalSection(&env->me_windowsbug_lock); if (!rc) mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); @@ -111,26 +162,28 @@ void mdbx_txn_unlock(MDBX_env *env) { #define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN int mdbx_rdt_lock(MDBX_env *env) { - mdbx_shlock_acquireShared(&env->me_remap_guard); + mdbx_srwlock_AcquireShared(&env->me_remap_guard); if (env->me_lfd == INVALID_HANDLE_VALUE) return MDBX_SUCCESS; /* readonly database in readonly filesystem */ /* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */ - if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) + if ((env->me_flags & MDBX_EXCLUSIVE) || + flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) return MDBX_SUCCESS; int rc = GetLastError(); - mdbx_shlock_releaseShared(&env->me_remap_guard); + mdbx_srwlock_ReleaseShared(&env->me_remap_guard); return rc; } void mdbx_rdt_unlock(MDBX_env *env) { if (env->me_lfd != INVALID_HANDLE_VALUE) { /* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */ - if (!funlock(env->me_lfd, LCK_UPPER)) + if ((env->me_flags & MDBX_EXCLUSIVE) == 0 && + !funlock(env->me_lfd, LCK_UPPER)) mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError()); } - mdbx_shlock_releaseShared(&env->me_remap_guard); + mdbx_srwlock_ReleaseShared(&env->me_remap_guard); } static int suspend_and_append(mdbx_handle_array_t **array, @@ -309,7 +362,7 @@ static int internal_seize_lck(HANDLE lfd) { "?-E(middle) >> S-E(locked)", rc); /* 8) now on S-E (locked) or still on ?-E (middle), - * transite to S-? (used) or ?-? (free) */ + * transite to S-? (used) or ?-? (free) */ if (!funlock(lfd, LCK_UPPER)) mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_, "X-E(locked/middle) >> X-?(used/free)", GetLastError()); @@ -322,6 +375,9 @@ int mdbx_lck_seize(MDBX_env *env) { int rc; assert(env->me_fd != INVALID_HANDLE_VALUE); + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_RESULT_TRUE /* files were must be opened non-shareable */; + if (env->me_lfd == INVALID_HANDLE_VALUE) { /* LY: without-lck mode (e.g. on read-only filesystem) */ mdbx_jitter4testing(false); @@ -364,6 +420,9 @@ int mdbx_lck_downgrade(MDBX_env *env, bool complete) { assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE); + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_SUCCESS /* files were must be opened non-shareable */; + /* 1) must be at E-E (exclusive-write) */ if (!complete) { /* transite from E-E to E_? (exclusive-read) */ @@ -398,6 +457,10 @@ int mdbx_lck_upgrade(MDBX_env *env) { /* Transite from locked state (S-E) to exclusive-write (E-E) */ assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE); + assert((env->me_flags & MDBX_EXCLUSIVE) == 0); + + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_RESULT_TRUE /* files were must be opened non-shareable */; /* 1) must be at S-E (locked), transite to ?_E (middle) */ if (!funlock(env->me_lfd, LCK_LOWER)) @@ -436,7 +499,49 @@ int mdbx_lck_upgrade(MDBX_env *env) { } void mdbx_lck_destroy(MDBX_env *env) { - SetLastError(ERROR_SUCCESS); + int rc; + + if (env->me_lfd != INVALID_HANDLE_VALUE) { + /* double `unlock` for robustly remove overlapped shared/exclusive locks */ + while (funlock(env->me_lfd, LCK_LOWER)) + ; + rc = GetLastError(); + assert(rc == ERROR_NOT_LOCKED); + (void)rc; + SetLastError(ERROR_SUCCESS); + + while (funlock(env->me_lfd, LCK_UPPER)) + ; + rc = GetLastError(); + assert(rc == ERROR_NOT_LOCKED); + (void)rc; + SetLastError(ERROR_SUCCESS); + } + + if (env->me_fd != INVALID_HANDLE_VALUE) { + /* explicitly unlock to avoid latency for other processes (windows kernel + * releases such locks via deferred queues) */ + while (funlock(env->me_fd, LCK_BODY)) + ; + rc = GetLastError(); + assert(rc == ERROR_NOT_LOCKED); + (void)rc; + SetLastError(ERROR_SUCCESS); + + while (funlock(env->me_fd, LCK_META)) + ; + rc = GetLastError(); + assert(rc == ERROR_NOT_LOCKED); + (void)rc; + SetLastError(ERROR_SUCCESS); + + while (funlock(env->me_fd, LCK_WHOLE)) + ; + rc = GetLastError(); + assert(rc == ERROR_NOT_LOCKED); + (void)rc; + SetLastError(ERROR_SUCCESS); + } } /*----------------------------------------------------------------------------*/ @@ -488,87 +593,120 @@ int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) { } //---------------------------------------------------------------------------- -// shared lock -// Copyright (C) 1995-2002 Brad Wilson - -typedef void (WINAPI *pfnSrwFunc)(PSRWLOCK); - -static pfnSrwFunc fnLockInit = 0, fnLockShared = 0, fnUnlockShared = 0, fnLockExcl = 0, fnUnlockExcl = 0; - -void mdbx_shlock_init(MDBX_shlock *lck) { - HINSTANCE hInst = GetModuleHandleA("kernel32.dll"); - fnLockInit = (pfnSrwFunc)GetProcAddress(hInst, "InitializeSRWLock"); - if (fnLockInit != NULL) { - fnLockShared = (pfnSrwFunc)GetProcAddress(hInst, "AcquireSRWLockShared"); - fnUnlockShared = (pfnSrwFunc)GetProcAddress(hInst, "ReleaseSRWLockShared"); - fnLockExcl = (pfnSrwFunc)GetProcAddress(hInst, "AcquireSRWLockExclusive"); - fnUnlockExcl = (pfnSrwFunc)GetProcAddress(hInst, "ReleaseSRWLockExclusive"); +// Stub for slim read-write lock +// Copyright (C) 1995-2002 Brad Wilson - fnLockInit(&lck->srwLock); - } else - lck->readerCount = lck->writerCount = 0; +static void WINAPI stub_srwlock_Init(MDBX_srwlock *srwl) { + srwl->readerCount = srwl->writerCount = 0; } -void mdbx_shlock_acquireShared(MDBX_shlock *lck) { - if (fnLockShared) { - fnLockShared(&lck->srwLock); - return; - } +static void WINAPI stub_srwlock_AcquireShared(MDBX_srwlock *srwl) { + while (true) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); - while (1) { // If there's a writer already, spin without unnecessarily // interlocking the CPUs - - if (lck->writerCount != 0) { + if (srwl->writerCount != 0) { YieldProcessor(); continue; } // Add to the readers list - - _InterlockedIncrement((long *)&lck->readerCount); + _InterlockedIncrement(&srwl->readerCount); // Check for writers again (we may have been pre-empted). If // there are no writers writing or waiting, then we're done. - if (lck->writerCount == 0) + if (srwl->writerCount == 0) break; // Remove from the readers list, spin, try again - _InterlockedDecrement((long *)&lck->readerCount); + _InterlockedDecrement(&srwl->readerCount); YieldProcessor(); } } -void mdbx_shlock_releaseShared(MDBX_shlock *lck) { - if (fnUnlockShared) - fnUnlockShared(&lck->srwLock); - else - _InterlockedDecrement((long *)&lck->readerCount); +static void WINAPI stub_srwlock_ReleaseShared(MDBX_srwlock *srwl) { + assert(srwl->readerCount > 0); + _InterlockedDecrement(&srwl->readerCount); } -void mdbx_shlock_acquireExclusive(MDBX_shlock *lck) { - if (fnLockExcl) { - fnLockShared(&lck->srwLock); - return; - } +static void WINAPI stub_srwlock_AcquireExclusive(MDBX_srwlock *srwl) { + while (true) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); - // See if we can become the writer (expensive, because it inter- - // locks the CPUs, so writing should be an infrequent process) - while (_InterlockedExchange((long *)&lck->writerCount, 1) == 1) { - YieldProcessor(); + // If there's a writer already, spin without unnecessarily + // interlocking the CPUs + if (srwl->writerCount != 0) { + YieldProcessor(); + continue; + } + + // See if we can become the writer (expensive, because it inter- + // locks the CPUs, so writing should be an infrequent process) + if (_InterlockedExchange(&srwl->writerCount, 1) == 0) + break; } // Now we're the writer, but there may be outstanding readers. // Spin until there aren't any more; new readers will wait now // that we're the writer. - while (lck->readerCount != 0) { + while (srwl->readerCount != 0) { + assert(srwl->writerCount >= 0 && srwl->readerCount >= 0); YieldProcessor(); } } -void mdbx_shlock_releaseExclusive(MDBX_shlock *lck) { - if (fnUnlockExcl) - fnUnlockExcl(&lck->srwLock); - else - lck->writerCount = 0; +static void WINAPI stub_srwlock_ReleaseExclusive(MDBX_srwlock *srwl) { + assert(srwl->writerCount == 1 && srwl->readerCount >= 0); + srwl->writerCount = 0; +} + +MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared, + mdbx_srwlock_ReleaseShared, mdbx_srwlock_AcquireExclusive, + mdbx_srwlock_ReleaseExclusive; + +/*----------------------------------------------------------------------------*/ + +MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx; +MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW; +MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; +MDBX_NtFsControlFile mdbx_NtFsControlFile; + +static void mdbx_winnt_import(void) { + const HINSTANCE hKernel32dll = GetModuleHandleA("kernel32.dll"); + const MDBX_srwlock_function init = + (MDBX_srwlock_function)GetProcAddress(hKernel32dll, "InitializeSRWLock"); + if (init != NULL) { + mdbx_srwlock_Init = init; + mdbx_srwlock_AcquireShared = (MDBX_srwlock_function)GetProcAddress( + hKernel32dll, "AcquireSRWLockShared"); + mdbx_srwlock_ReleaseShared = (MDBX_srwlock_function)GetProcAddress( + hKernel32dll, "ReleaseSRWLockShared"); + mdbx_srwlock_AcquireExclusive = (MDBX_srwlock_function)GetProcAddress( + hKernel32dll, "AcquireSRWLockExclusive"); + mdbx_srwlock_ReleaseExclusive = (MDBX_srwlock_function)GetProcAddress( + hKernel32dll, "ReleaseSRWLockExclusive"); + } else { + mdbx_srwlock_Init = stub_srwlock_Init; + mdbx_srwlock_AcquireShared = stub_srwlock_AcquireShared; + mdbx_srwlock_ReleaseShared = stub_srwlock_ReleaseShared; + mdbx_srwlock_AcquireExclusive = stub_srwlock_AcquireExclusive; + mdbx_srwlock_ReleaseExclusive = stub_srwlock_ReleaseExclusive; + } + + mdbx_GetFileInformationByHandleEx = + (MDBX_GetFileInformationByHandleEx)GetProcAddress( + hKernel32dll, "GetFileInformationByHandleEx"); + + mdbx_GetVolumeInformationByHandleW = + (MDBX_GetVolumeInformationByHandleW)GetProcAddress( + hKernel32dll, "GetVolumeInformationByHandleW"); + + mdbx_GetFinalPathNameByHandleW = + (MDBX_GetFinalPathNameByHandleW)GetProcAddress( + hKernel32dll, "GetFinalPathNameByHandleW"); + + const HINSTANCE hNtdll = GetModuleHandleA("ntdll.dll"); + mdbx_NtFsControlFile = + (MDBX_NtFsControlFile)GetProcAddress(hNtdll, "NtFsControlFile"); } diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c b/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c index fa51bb2309..44b2093d0b 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/mdbx.c @@ -42,8 +42,8 @@ int __hot mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2, size_t n) { if (unlikely(n > 42 - /* LY: align followed access if reasonable possible */ - && (((uintptr_t)s1) & 7) != 0 && + /* LY: align followed access if reasonable possible */ && + (((uintptr_t)s1) & 7) != 0 && (((uintptr_t)s1) & 7) == (((uintptr_t)s2) & 7))) { if (((uintptr_t)s1) & 1) { const int diff = *(uint8_t *)s1 - *(uint8_t *)s2; @@ -1134,7 +1134,7 @@ const char *__cold mdbx_strerror(int errnum) { const char *msg = __mdbx_strerr(errnum); if (!msg) { #ifdef _MSC_VER - static __thread char buffer[1024]; + static char buffer[1024]; size_t size = FormatMessageA( FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buffer, @@ -2003,7 +2003,7 @@ static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno, /* Acquire guard in exclusive mode for: * - to avoid collision between read and write txns around env->me_dbgeo; * - to avoid attachment of new reading threads (see mdbx_rdt_lock); */ - mdbx_shlock_acquireExclusive(&env->me_remap_guard); + mdbx_srwlock_AcquireExclusive(&env->me_remap_guard); mdbx_handle_array_t *suspended = NULL; mdbx_handle_array_t array_onstack; int rc = MDBX_SUCCESS; @@ -2041,11 +2041,6 @@ static int mdbx_mapresize(MDBX_env *env, const pgno_t size_pgno, bailout: if (rc == MDBX_SUCCESS) { -#if defined(_WIN32) || defined(_WIN64) - assert(size_bytes == env->me_dxb_mmap.current); - assert(size_bytes <= env->me_dxb_mmap.filesize); - assert(limit_bytes == env->me_dxb_mmap.length); -#endif env->me_dbgeo.now = size_bytes; env->me_dbgeo.upper = limit_bytes; if (env->me_txn) { @@ -2083,7 +2078,7 @@ bailout: #if defined(_WIN32) || defined(_WIN64) int err = MDBX_SUCCESS; - mdbx_shlock_releaseExclusive(&env->me_remap_guard); + mdbx_srwlock_ReleaseExclusive(&env->me_remap_guard); if (suspended) { err = mdbx_resume_threads_after_remap(suspended); if (suspended != &array_onstack) @@ -4346,7 +4341,8 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta, return MDBX_INVALID; } - if (page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC) { + if (page.mp_meta.mm_magic_and_version != MDBX_DATA_MAGIC && + page.mp_meta.mm_magic_and_version != MDBX_DATA_DEBUG) { mdbx_error("meta[%u] has invalid magic/version %" PRIx64, meta_number, page.mp_meta.mm_magic_and_version); return ((page.mp_meta.mm_magic_and_version >> 8) != MDBX_MAGIC) @@ -4733,8 +4729,9 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, mdbx_assert(env, !mdbx_meta_eq(env, pending, meta2)); mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); - mdbx_ensure(env, target == head || mdbx_meta_txnid_stable(env, target) < - pending->mm_txnid_a); + mdbx_ensure(env, + target == head || + mdbx_meta_txnid_stable(env, target) < pending->mm_txnid_a); if (env->me_flags & MDBX_WRITEMAP) { mdbx_jitter4testing(true); if (likely(target != head)) { @@ -4912,7 +4909,7 @@ int __cold mdbx_env_create(MDBX_env **penv) { goto bailout; #if defined(_WIN32) || defined(_WIN64) - mdbx_shlock_init(&env->me_remap_guard); + mdbx_srwlock_Init(&env->me_remap_guard); InitializeCriticalSection(&env->me_windowsbug_lock); #else rc = mdbx_fastmutex_init(&env->me_remap_guard); @@ -5583,7 +5580,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, assert(env->me_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd == INVALID_HANDLE_VALUE); - int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd); + int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd, + (env->me_flags & MDBX_EXCLUSIVE) ? true : false); if (err != MDBX_SUCCESS) { if (err != MDBX_EROFS || (env->me_flags & MDBX_RDONLY) == 0) return err; @@ -5626,10 +5624,14 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, return err; size = wanna; } - } else if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || - size < env->me_os_psize) { - mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); - return MDBX_PROBLEM; + } else { + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_BUSY; + if (size > SSIZE_MAX || (size & (env->me_os_psize - 1)) || + size < env->me_os_psize) { + mdbx_notice("lck-file has invalid size %" PRIu64 " bytes", size); + return MDBX_PROBLEM; + } } const size_t maxreaders = @@ -5673,7 +5675,8 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, env->me_lck->mti_magic_and_version = MDBX_LOCK_MAGIC; env->me_lck->mti_os_and_format = MDBX_LOCK_FORMAT; } else { - if (env->me_lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { + if (env->me_lck->mti_magic_and_version != MDBX_LOCK_MAGIC && + env->me_lck->mti_magic_and_version != MDBX_LOCK_DEBUG) { mdbx_error("lock region has invalid magic/version"); return ((env->me_lck->mti_magic_and_version >> 8) != MDBX_MAGIC) ? MDBX_INVALID @@ -5699,24 +5702,27 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, MDBX_COALESCE | MDBX_PAGEPERTURB) #define CHANGELESS \ (MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \ - MDBX_LIFORECLAIM) + MDBX_LIFORECLAIM | MDBX_EXCLUSIVE) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE | CHANGELESS) #error "Persistent DB flags & env flags overlap, but both go in mm_flags" #endif -int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, - mode_t mode, int *exclusive) { +int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, + mode_t mode) { if (unlikely(!env || !path)) return MDBX_EINVAL; if (unlikely(env->me_signature != MDBX_ME_SIGNATURE)) return MDBX_EBADSIGN; - if (env->me_fd != INVALID_HANDLE_VALUE || - (flags & ~(CHANGEABLE | CHANGELESS))) + if (flags & ~(CHANGEABLE | CHANGELESS)) return MDBX_EINVAL; + if (env->me_fd != INVALID_HANDLE_VALUE || + (env->me_flags & MDBX_ENV_ACTIVE) != 0) + return MDBX_EPERM; + size_t len_full, len = strlen(path); if (flags & MDBX_NOSUBDIR) { len_full = len + sizeof(MDBX_LOCK_SUFFIX) + len + 1; @@ -5750,7 +5756,9 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, (env->me_dirtylist = calloc(MDBX_PNL_UM_SIZE, sizeof(MDBX_ID2))))) rc = MDBX_ENOMEM; } - env->me_flags = flags |= MDBX_ENV_ACTIVE; + + const uint32_t saved_me_flags = env->me_flags; + env->me_flags = flags | MDBX_ENV_ACTIVE; if (rc) goto bailout; @@ -5770,7 +5778,8 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, else oflags = O_RDWR | O_CREAT; - rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd); + rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd, + (env->me_flags & MDBX_EXCLUSIVE) ? true : false); if (rc != MDBX_SUCCESS) goto bailout; @@ -5791,7 +5800,7 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC; if (lck_rc == MDBX_RESULT_TRUE) { env->me_lck->mti_envmode = env->me_flags & (mode_flags | MDBX_RDONLY); - if (exclusive == NULL || *exclusive < 2) { + if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { /* LY: downgrade lock only if exclusive access not requested. * in case exclusive==1, just leave value as is. */ rc = mdbx_lck_downgrade(env, true); @@ -5803,10 +5812,6 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, if (rc != MDBX_SUCCESS) goto bailout; } else { - if (exclusive) { - /* LY: just indicate that is not an exclusive access. */ - *exclusive = 0; - } if ((env->me_flags & MDBX_RDONLY) == 0) { while (env->me_lck->mti_envmode == MDBX_RDONLY) { if (mdbx_atomic_compare_and_swap32(&env->me_lck->mti_envmode, @@ -5834,9 +5839,9 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, if ((flags & MDBX_RDONLY) == 0) { MDBX_txn *txn; int tsize = sizeof(MDBX_txn), - size = - tsize + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + - sizeof(unsigned) + 1); + size = tsize + + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + + sizeof(unsigned) + 1); if ((env->me_pbuf = calloc(1, env->me_psize)) && (txn = calloc(1, size))) { txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); @@ -5871,17 +5876,14 @@ int __cold mdbx_env_open_ex(MDBX_env *env, const char *path, unsigned flags, #endif bailout: - if (rc) + if (rc) { mdbx_env_close0(env); + env->me_flags = saved_me_flags; + } free(lck_pathname); return rc; } -int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags, - mode_t mode) { - return mdbx_env_open_ex(env, path, flags, mode, NULL); -} - /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ static void __cold mdbx_env_close0(MDBX_env *env) { if (!(env->me_flags & MDBX_ENV_ACTIVE)) @@ -5906,10 +5908,8 @@ static void __cold mdbx_env_close0(MDBX_env *env) { } mdbx_pnl_free(env->me_free_pgs); - if (env->me_flags & MDBX_ENV_TXKEY) { + if (env->me_flags & MDBX_ENV_TXKEY) mdbx_rthc_remove(env->me_txkey); - env->me_flags &= ~MDBX_ENV_TXKEY; - } if (env->me_map) { mdbx_munmap(&env->me_dxb_mmap); @@ -5925,7 +5925,6 @@ static void __cold mdbx_env_close0(MDBX_env *env) { if (env->me_lck) mdbx_munmap(&env->me_lck_mmap); - env->me_pid = 0; env->me_oldest = nullptr; mdbx_lck_destroy(env); @@ -5933,6 +5932,7 @@ static void __cold mdbx_env_close0(MDBX_env *env) { (void)mdbx_closefile(env->me_lfd); env->me_lfd = INVALID_HANDLE_VALUE; } + env->me_flags = 0; } int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) { @@ -5983,6 +5983,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) { mdbx_fastmutex_destroy(&env->me_remap_guard) == MDBX_SUCCESS); #endif /* Windows */ + env->me_pid = 0; env->me_signature = 0; free(env); @@ -6084,7 +6085,7 @@ static int __hot mdbx_cmp_int_ua(const MDBX_val *a, const MDBX_val *b) { } while (pa != a->iov_base); return diff; } -#else /* __BYTE_ORDER__ */ +#else /* __BYTE_ORDER__ */ return memcmp(a->iov_base, b->iov_base, a->iov_len); #endif /* __BYTE_ORDER__ */ #endif /* UNALIGNED_OK */ @@ -6314,7 +6315,7 @@ static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret, mapped: p = pgno2page(env, pgno); - /* TODO: check p->mp_validator here */ +/* TODO: check p->mp_validator here */ done: *ret = p; @@ -10037,8 +10038,9 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return MDBX_EINVAL; - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | - MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + if (unlikely(flags & + ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | + MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) return MDBX_EINVAL; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) @@ -10496,8 +10498,8 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *path, unsigned flags) { /* The destination path must exist, but the destination file must not. * We don't want the OS to cache the writes, since the source data is * already in the OS cache. */ - int rc = - mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, &newfd); + int rc = mdbx_openfile(lck_pathname, O_WRONLY | O_CREAT | O_EXCL, 0666, + &newfd, true); if (rc == MDBX_SUCCESS) { if (env->me_psize >= env->me_os_psize) { #ifdef F_NOCACHE /* __APPLE__ */ @@ -10678,9 +10680,9 @@ int __cold mdbx_env_info(MDBX_env *env, MDBX_envinfo *arg, size_t bytes) { } static MDBX_cmp_func *mdbx_default_keycmp(unsigned flags) { - return (flags & MDBX_REVERSEKEY) - ? mdbx_cmp_memnr - : (flags & MDBX_INTEGERKEY) ? mdbx_cmp_int_a2 : mdbx_cmp_memn; + return (flags & MDBX_REVERSEKEY) ? mdbx_cmp_memnr : (flags & MDBX_INTEGERKEY) + ? mdbx_cmp_int_a2 + : mdbx_cmp_memn; } static MDBX_cmp_func *mdbx_default_datacmp(unsigned flags) { @@ -11870,8 +11872,9 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *new_data, if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return MDBX_EINVAL; - if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | - MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) + if (unlikely(flags & + ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE | + MDBX_APPEND | MDBX_APPENDDUP | MDBX_CURRENT))) return MDBX_EINVAL; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/osal.c b/plugins/Dbx_mdbx/src/libmdbx/src/osal.c index 76433b716d..1a57d750b3 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/osal.c +++ b/plugins/Dbx_mdbx/src/libmdbx/src/osal.c @@ -17,7 +17,6 @@ #include "./bits.h" #if defined(_WIN32) || defined(_WIN64) -#include <winternl.h> static int waitstatus2errcode(DWORD result) { switch (result) { @@ -105,13 +104,31 @@ extern NTSTATUS NTAPI NtFreeVirtualMemory(IN HANDLE ProcessHandle, IN OUT PSIZE_T RegionSize, IN ULONG FreeType); +#ifndef WOF_CURRENT_VERSION +typedef struct _WOF_EXTERNAL_INFO { + DWORD Version; + DWORD Provider; +} WOF_EXTERNAL_INFO, *PWOF_EXTERNAL_INFO; +#endif /* WOF_CURRENT_VERSION */ + +#ifndef WIM_PROVIDER_CURRENT_VERSION +#define WIM_PROVIDER_HASH_SIZE 20 + +typedef struct _WIM_PROVIDER_EXTERNAL_INFO { + DWORD Version; + DWORD Flags; + LARGE_INTEGER DataSourceId; + BYTE ResourceHash[WIM_PROVIDER_HASH_SIZE]; +} WIM_PROVIDER_EXTERNAL_INFO, *PWIM_PROVIDER_EXTERNAL_INFO; +#endif /* WIM_PROVIDER_CURRENT_VERSION */ + #ifndef FILE_PROVIDER_CURRENT_VERSION typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 { ULONG Version; ULONG Algorithm; ULONG Flags; } FILE_PROVIDER_EXTERNAL_INFO_V1, *PFILE_PROVIDER_EXTERNAL_INFO_V1; -#endif +#endif /* FILE_PROVIDER_CURRENT_VERSION */ #ifndef STATUS_OBJECT_NOT_EXTERNALLY_BACKED #define STATUS_OBJECT_NOT_EXTERNALLY_BACKED ((NTSTATUS)0xC000046DL) @@ -120,14 +137,6 @@ typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 { #define STATUS_INVALID_DEVICE_REQUEST ((NTSTATUS)0xC0000010L) #endif -extern NTSTATUS -NtFsControlFile(IN HANDLE FileHandle, IN OUT HANDLE Event, - IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, - IN OUT PVOID ApcContext, OUT PIO_STATUS_BLOCK IoStatusBlock, - IN ULONG FsControlCode, IN OUT PVOID InputBuffer, - IN ULONG InputBufferLength, OUT OPTIONAL PVOID OutputBuffer, - IN ULONG OutputBufferLength); - #endif /* _WIN32 || _WIN64 */ /*----------------------------------------------------------------------------*/ @@ -400,19 +409,20 @@ int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) { /*----------------------------------------------------------------------------*/ int mdbx_openfile(const char *pathname, int flags, mode_t mode, - mdbx_filehandle_t *fd) { + mdbx_filehandle_t *fd, bool exclusive) { *fd = INVALID_HANDLE_VALUE; #if defined(_WIN32) || defined(_WIN64) (void)mode; - DWORD DesiredAccess; - DWORD ShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; + DWORD DesiredAccess, ShareMode; DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) { default: return ERROR_INVALID_PARAMETER; case O_RDONLY: DesiredAccess = GENERIC_READ; + ShareMode = + exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE); break; case O_WRONLY: /* assume for MDBX_env_copy() and friends output */ DesiredAccess = GENERIC_WRITE; @@ -421,6 +431,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode, break; case O_RDWR: DesiredAccess = GENERIC_READ | GENERIC_WRITE; + ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE); break; } @@ -459,7 +470,7 @@ int mdbx_openfile(const char *pathname, int flags, mode_t mode, } } #else - + (void)exclusive; #ifdef O_CLOEXEC flags |= O_CLOEXEC; #endif @@ -735,51 +746,118 @@ int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async) { #endif } -int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { - assert(size <= limit); +int mdbx_check4nonlocal(mdbx_filehandle_t handle, int flags) { #if defined(_WIN32) || defined(_WIN64) - NTSTATUS rc; - map->length = 0; - map->current = 0; - map->section = NULL; - map->address = nullptr; - - if (GetFileType(map->fd) != FILE_TYPE_DISK) + if (GetFileType(handle) != FILE_TYPE_DISK) return ERROR_FILE_OFFLINE; -#if defined(_WIN64) && defined(WOF_CURRENT_VERSION) - struct { - WOF_EXTERNAL_INFO wof_info; - union { - WIM_PROVIDER_EXTERNAL_INFO wim_info; - FILE_PROVIDER_EXTERNAL_INFO_V1 file_info; - }; - size_t reserved_for_microsoft_madness[42]; - } GetExternalBacking_OutputBuffer; - IO_STATUS_BLOCK StatusBlock; - rc = NtFsControlFile(map->fd, NULL, NULL, NULL, &StatusBlock, - FSCTL_GET_EXTERNAL_BACKING, NULL, 0, - &GetExternalBacking_OutputBuffer, - sizeof(GetExternalBacking_OutputBuffer)); - if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED && - rc != STATUS_INVALID_DEVICE_REQUEST) - return NT_SUCCESS(rc) ? ERROR_FILE_OFFLINE : ntstatus2errcode(rc); -#endif + if (mdbx_GetFileInformationByHandleEx) { + FILE_REMOTE_PROTOCOL_INFO RemoteProtocolInfo; + if (mdbx_GetFileInformationByHandleEx(handle, FileRemoteProtocolInfo, + &RemoteProtocolInfo, + sizeof(RemoteProtocolInfo))) { + + if ((RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_OFFLINE) && + !(flags & MDBX_RDONLY)) + return ERROR_FILE_OFFLINE; + if (!(RemoteProtocolInfo.Flags & REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK) && + !(flags & MDBX_EXCLUSIVE)) + return ERROR_REMOTE_STORAGE_MEDIA_ERROR; + } + } - WCHAR PathBuffer[INT16_MAX]; - typedef BOOL (WINAPI *pfnGetVolumeInformationByHandle)(HANDLE, LPWSTR, DWORD, LPDWORD, LPDWORD, LPDWORD, LPWSTR, DWORD); - pfnGetVolumeInformationByHandle pvol = (pfnGetVolumeInformationByHandle)GetProcAddress(GetModuleHandleA("kernel32.dll"), "GetVolumeInformationByHandleW"); - if (pvol) { + if (mdbx_NtFsControlFile) { + NTSTATUS rc; + struct { + WOF_EXTERNAL_INFO wof_info; + union { + WIM_PROVIDER_EXTERNAL_INFO wim_info; + FILE_PROVIDER_EXTERNAL_INFO_V1 file_info; + }; + size_t reserved_for_microsoft_madness[42]; + } GetExternalBacking_OutputBuffer; + IO_STATUS_BLOCK StatusBlock; + rc = mdbx_NtFsControlFile(handle, NULL, NULL, NULL, &StatusBlock, + FSCTL_GET_EXTERNAL_BACKING, NULL, 0, + &GetExternalBacking_OutputBuffer, + sizeof(GetExternalBacking_OutputBuffer)); + if (NT_SUCCESS(rc)) { + if (!(flags & MDBX_EXCLUSIVE)) + return ERROR_REMOTE_STORAGE_MEDIA_ERROR; + } else if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED && + rc != STATUS_INVALID_DEVICE_REQUEST) + return ntstatus2errcode(rc); + } + + if (mdbx_GetVolumeInformationByHandleW && mdbx_GetFinalPathNameByHandleW) { + WCHAR PathBuffer[INT16_MAX]; DWORD VolumeSerialNumber, FileSystemFlags; - if (!pvol(map->fd, PathBuffer, INT16_MAX, &VolumeSerialNumber, NULL, &FileSystemFlags, NULL, 0)) + if (!mdbx_GetVolumeInformationByHandleW(handle, PathBuffer, INT16_MAX, + &VolumeSerialNumber, NULL, + &FileSystemFlags, NULL, 0)) return GetLastError(); - + if ((flags & MDBX_RDONLY) == 0) { - if (FileSystemFlags & (FILE_SEQUENTIAL_WRITE_ONCE | FILE_READ_ONLY_VOLUME | - FILE_VOLUME_IS_COMPRESSED)) - return ERROR_FILE_OFFLINE; + if (FileSystemFlags & (FILE_SEQUENTIAL_WRITE_ONCE | + FILE_READ_ONLY_VOLUME | FILE_VOLUME_IS_COMPRESSED)) + return ERROR_REMOTE_STORAGE_MEDIA_ERROR; + } + + if (!mdbx_GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | VOLUME_NAME_NT)) + return GetLastError(); + + if (_wcsnicmp(PathBuffer, L"\\Device\\Mup\\", 12) == 0) { + if (!(flags & MDBX_EXCLUSIVE)) + return ERROR_REMOTE_STORAGE_MEDIA_ERROR; + } else if (mdbx_GetFinalPathNameByHandleW(handle, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | + VOLUME_NAME_DOS)) { + UINT DriveType = GetDriveTypeW(PathBuffer); + if (DriveType == DRIVE_NO_ROOT_DIR && + wcsncmp(PathBuffer, L"\\\\?\\", 4) == 0 && + wcsncmp(PathBuffer + 5, L":\\", 2) == 0) { + PathBuffer[7] = 0; + DriveType = GetDriveTypeW(PathBuffer + 4); + } + switch (DriveType) { + case DRIVE_CDROM: + if (flags & MDBX_RDONLY) + break; + // fall through + case DRIVE_UNKNOWN: + case DRIVE_NO_ROOT_DIR: + case DRIVE_REMOTE: + default: + if (!(flags & MDBX_EXCLUSIVE)) + return ERROR_REMOTE_STORAGE_MEDIA_ERROR; + // fall through + case DRIVE_REMOVABLE: + case DRIVE_FIXED: + case DRIVE_RAMDISK: + break; + } } } +#else + (void)handle; + /* TODO: check for NFS handle ? */ + (void)flags; +#endif + return MDBX_SUCCESS; +} + +int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { + assert(size <= limit); +#if defined(_WIN32) || defined(_WIN64) + map->length = 0; + map->current = 0; + map->section = NULL; + map->address = nullptr; + + NTSTATUS rc = mdbx_check4nonlocal(map->fd, flags); + if (rc != MDBX_SUCCESS) + return rc; rc = mdbx_filesize(map->fd, &map->filesize); if (rc != MDBX_SUCCESS) @@ -796,14 +874,13 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { SectionSize.QuadPart = size; rc = NtCreateSection( &map->section, - /* DesiredAccess */ - (flags & MDBX_WRITEMAP) + /* DesiredAccess */ (flags & MDBX_WRITEMAP) ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, /* ObjectAttributes */ NULL, /* MaximumSize (InitialSize) */ &SectionSize, - /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + /* SectionPageProtection */ (flags & MDBX_RDONLY) ? PAGE_READONLY + : PAGE_READWRITE, /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(rc)) return ntstatus2errcode(rc); @@ -816,8 +893,8 @@ int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { /* SectionOffset */ NULL, &ViewSize, /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, - /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + /* Win32Protect */ (flags & MDBX_WRITEMAP) ? PAGE_READWRITE + : PAGE_READONLY); if (!NT_SUCCESS(rc)) { NtClose(map->section); map->section = 0; @@ -852,6 +929,11 @@ int mdbx_munmap(mdbx_mmap_t *map) { if (!NT_SUCCESS(rc)) ntstatus2errcode(rc); + if (map->filesize != map->current && + mdbx_filesize(map->fd, &map->filesize) == MDBX_SUCCESS && + map->filesize != map->current) + (void)mdbx_ftruncate(map->fd, map->current); + map->length = 0; map->current = 0; map->address = nullptr; @@ -877,11 +959,8 @@ int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size, size_t limit) { /* growth rw-section */ SectionSize.QuadPart = size; status = NtExtendSection(map->section, &SectionSize); - if (NT_SUCCESS(status)) { - map->current = size; - if (map->filesize < size) - map->filesize = size; - } + if (NT_SUCCESS(status)) + map->filesize = map->current = size; return ntstatus2errcode(status); } @@ -956,15 +1035,14 @@ retry_file_and_section: SectionSize.QuadPart = size; status = NtCreateSection( &map->section, - /* DesiredAccess */ - (flags & MDBX_WRITEMAP) + /* DesiredAccess */ (flags & MDBX_WRITEMAP) ? SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE | SECTION_MAP_WRITE : SECTION_QUERY | SECTION_MAP_READ | SECTION_EXTEND_SIZE, /* ObjectAttributes */ NULL, /* MaximumSize (InitialSize) */ &SectionSize, - /* SectionPageProtection */ - (flags & MDBX_RDONLY) ? PAGE_READONLY : PAGE_READWRITE, + /* SectionPageProtection */ (flags & MDBX_RDONLY) ? PAGE_READONLY + : PAGE_READWRITE, /* AllocationAttributes */ SEC_RESERVE, map->fd); if (!NT_SUCCESS(status)) @@ -988,8 +1066,8 @@ retry_mapview:; /* SectionOffset */ NULL, &ViewSize, /* InheritDisposition */ ViewUnmap, /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, - /* Win32Protect */ - (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY); + /* Win32Protect */ (flags & MDBX_WRITEMAP) ? PAGE_READWRITE + : PAGE_READONLY); if (!NT_SUCCESS(status)) { if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018 && diff --git a/plugins/Dbx_mdbx/src/libmdbx/src/osal.h b/plugins/Dbx_mdbx/src/libmdbx/src/osal.h index be3d4980e6..41e53222d7 100644 --- a/plugins/Dbx_mdbx/src/libmdbx/src/osal.h +++ b/plugins/Dbx_mdbx/src/libmdbx/src/osal.h @@ -65,9 +65,11 @@ /* Systems includes */ #if defined(_WIN32) || defined(_WIN64) +#define WIN32_LEAN_AND_MEAN #include <tlhelp32.h> #include <windows.h> #include <winnt.h> +#include <winternl.h> #define HAVE_SYS_STAT_H #define HAVE_SYS_TYPES_H typedef HANDLE mdbx_thread_t; @@ -475,7 +477,7 @@ int mdbx_filesize_sync(mdbx_filehandle_t fd); int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length); int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length); int mdbx_openfile(const char *pathname, int flags, mode_t mode, - mdbx_filehandle_t *fd); + mdbx_filehandle_t *fd, bool exclusive); int mdbx_closefile(mdbx_filehandle_t fd); typedef struct mdbx_mmap_param { @@ -508,6 +510,7 @@ int mdbx_suspend_threads_before_remap(MDBX_env *env, int mdbx_resume_threads_after_remap(mdbx_handle_array_t *array); #endif /* Windows */ int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async); +int mdbx_check4nonlocal(mdbx_filehandle_t handle, int flags); static __inline mdbx_pid_t mdbx_getpid(void) { #if defined(_WIN32) || defined(_WIN64) @@ -560,21 +563,52 @@ LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); int mdbx_rpid_set(MDBX_env *env); int mdbx_rpid_clear(MDBX_env *env); -typedef struct MDBX_shlock { - union { - struct { - __declspec(align(64)) long volatile readerCount; - __declspec(align(64)) long volatile writerCount; - }; - RTL_SRWLOCK srwLock; - }; -} MDBX_shlock; - -void mdbx_shlock_init(MDBX_shlock *lck); -void mdbx_shlock_acquireShared(MDBX_shlock *lck); -void mdbx_shlock_releaseShared(MDBX_shlock *lck); -void mdbx_shlock_acquireExclusive(MDBX_shlock *lck); -void mdbx_shlock_releaseExclusive(MDBX_shlock *lck); +#if defined(_WIN32) || defined(_WIN64) +typedef struct MDBX_srwlock { + union { + struct { + long volatile readerCount; + long volatile writerCount; + }; + RTL_SRWLOCK native; + }; +} MDBX_srwlock; + +typedef void(WINAPI *MDBX_srwlock_function)(MDBX_srwlock *); +extern MDBX_srwlock_function mdbx_srwlock_Init, mdbx_srwlock_AcquireShared, + mdbx_srwlock_ReleaseShared, mdbx_srwlock_AcquireExclusive, + mdbx_srwlock_ReleaseExclusive; + +typedef BOOL(WINAPI *MDBX_GetFileInformationByHandleEx)( + _In_ HANDLE hFile, _In_ FILE_INFO_BY_HANDLE_CLASS FileInformationClass, + _Out_ LPVOID lpFileInformation, _In_ DWORD dwBufferSize); +extern MDBX_GetFileInformationByHandleEx mdbx_GetFileInformationByHandleEx; + +typedef BOOL(WINAPI *MDBX_GetVolumeInformationByHandleW)( + _In_ HANDLE hFile, _Out_opt_ LPWSTR lpVolumeNameBuffer, + _In_ DWORD nVolumeNameSize, _Out_opt_ LPDWORD lpVolumeSerialNumber, + _Out_opt_ LPDWORD lpMaximumComponentLength, + _Out_opt_ LPDWORD lpFileSystemFlags, + _Out_opt_ LPWSTR lpFileSystemNameBuffer, _In_ DWORD nFileSystemNameSize); + +extern MDBX_GetVolumeInformationByHandleW mdbx_GetVolumeInformationByHandleW; + +typedef DWORD(WINAPI *MDBX_GetFinalPathNameByHandleW)(_In_ HANDLE hFile, + _Out_ LPWSTR lpszFilePath, + _In_ DWORD cchFilePath, + _In_ DWORD dwFlags); +extern MDBX_GetFinalPathNameByHandleW mdbx_GetFinalPathNameByHandleW; + +typedef NTSTATUS(NTAPI *MDBX_NtFsControlFile)( + IN HANDLE FileHandle, IN OUT HANDLE Event, + IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, IN OUT PVOID ApcContext, + OUT PIO_STATUS_BLOCK IoStatusBlock, IN ULONG FsControlCode, + IN OUT PVOID InputBuffer, IN ULONG InputBufferLength, + OUT OPTIONAL PVOID OutputBuffer, IN ULONG OutputBufferLength); + +extern MDBX_NtFsControlFile mdbx_NtFsControlFile; + +#endif /* Windows */ /* Checks reader by pid. * |