diff options
author | George Hazan <ghazan@miranda.im> | 2017-10-08 23:19:54 +0300 |
---|---|---|
committer | George Hazan <ghazan@miranda.im> | 2017-10-08 23:20:01 +0300 |
commit | 7aeec32a33811541b330a75352958267f22fa0a3 (patch) | |
tree | 397d770b0956933ea4ec4963943ea9827e2cf881 /plugins/Dbx_mdb/src/mdbx/osal.c | |
parent | 73b775b4863e6498bc7794c9de27ef88529c835f (diff) |
moving lmdb to mdbx engine
Diffstat (limited to 'plugins/Dbx_mdb/src/mdbx/osal.c')
-rw-r--r-- | plugins/Dbx_mdb/src/mdbx/osal.c | 1002 |
1 files changed, 1002 insertions, 0 deletions
diff --git a/plugins/Dbx_mdb/src/mdbx/osal.c b/plugins/Dbx_mdb/src/mdbx/osal.c new file mode 100644 index 0000000000..d9026cfadf --- /dev/null +++ b/plugins/Dbx_mdb/src/mdbx/osal.c @@ -0,0 +1,1002 @@ +/* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ + +/* + * Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru> + * and other libmdbx authors: please see AUTHORS file. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * <http://www.OpenLDAP.org/license.html>. + */ + +#include "./bits.h" + +#if defined(_WIN32) || defined(_WIN64) +#include <winternl.h> + +static int waitstatus2errcode(DWORD result) { + switch (result) { + case WAIT_OBJECT_0: + return MDBX_SUCCESS; + case WAIT_FAILED: + return GetLastError(); + case WAIT_ABANDONED: + return ERROR_ABANDONED_WAIT_0; + case WAIT_IO_COMPLETION: + return ERROR_USER_APC; + case WAIT_TIMEOUT: + return ERROR_TIMEOUT; + default: + return ERROR_UNHANDLED_ERROR; + } +} + +/* Map a result from an NTAPI call to WIN32 error code. */ +static int ntstatus2errcode(NTSTATUS status) { + DWORD dummy; + OVERLAPPED ov; + memset(&ov, 0, sizeof(ov)); + ov.Internal = status; + return GetOverlappedResult(NULL, &ov, &dummy, FALSE) ? MDBX_SUCCESS + : GetLastError(); +} + +/* We use native NT APIs to setup the memory map, so that we can + * let the DB file grow incrementally instead of always preallocating + * the full size. These APIs are defined in <wdm.h> and <ntifs.h> + * but those headers are meant for driver-level development and + * conflict with the regular user-level headers, so we explicitly + * declare them here. Using these APIs also means we must link to + * ntdll.dll, which is not linked by default in user code. */ +#pragma comment(lib, "ntdll.lib") + +extern NTSTATUS NTAPI NtCreateSection( + OUT PHANDLE SectionHandle, IN ACCESS_MASK DesiredAccess, + IN OPTIONAL POBJECT_ATTRIBUTES ObjectAttributes, + IN OPTIONAL PLARGE_INTEGER MaximumSize, IN ULONG SectionPageProtection, + IN ULONG AllocationAttributes, IN OPTIONAL HANDLE FileHandle); + +extern NTSTATUS NTAPI NtExtendSection(IN HANDLE SectionHandle, + IN PLARGE_INTEGER NewSectionSize); + +typedef enum _SECTION_INHERIT { ViewShare = 1, ViewUnmap = 2 } SECTION_INHERIT; + +extern NTSTATUS NTAPI NtMapViewOfSection( + IN HANDLE SectionHandle, IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, + IN ULONG_PTR ZeroBits, IN SIZE_T CommitSize, + IN OUT OPTIONAL PLARGE_INTEGER SectionOffset, IN OUT PSIZE_T ViewSize, + IN SECTION_INHERIT InheritDisposition, IN ULONG AllocationType, + IN ULONG Win32Protect); + +extern NTSTATUS NTAPI NtUnmapViewOfSection(IN HANDLE ProcessHandle, + IN OPTIONAL PVOID BaseAddress); + +extern NTSTATUS NTAPI NtClose(HANDLE Handle); + +extern NTSTATUS NTAPI NtAllocateVirtualMemory( + IN HANDLE ProcessHandle, IN OUT PVOID *BaseAddress, IN ULONG ZeroBits, + IN OUT PULONG RegionSize, IN ULONG AllocationType, IN ULONG Protect); + +extern NTSTATUS NTAPI NtFreeVirtualMemory(IN HANDLE ProcessHandle, + IN PVOID *BaseAddress, + IN OUT PULONG RegionSize, + IN ULONG FreeType); + +#ifndef FILE_PROVIDER_CURRENT_VERSION +typedef struct _FILE_PROVIDER_EXTERNAL_INFO_V1 { + ULONG Version; + ULONG Algorithm; + ULONG Flags; +} FILE_PROVIDER_EXTERNAL_INFO_V1, *PFILE_PROVIDER_EXTERNAL_INFO_V1; +#endif + +#ifndef STATUS_OBJECT_NOT_EXTERNALLY_BACKED +#define STATUS_OBJECT_NOT_EXTERNALLY_BACKED ((NTSTATUS)0xC000046DL) +#endif +#ifndef STATUS_INVALID_DEVICE_REQUEST +#define STATUS_INVALID_DEVICE_REQUEST ((NTSTATUS)0xC0000010L) +#endif + +extern NTSTATUS +NtFsControlFile(IN HANDLE FileHandle, IN OUT HANDLE Event, + IN OUT PVOID /* PIO_APC_ROUTINE */ ApcRoutine, + IN OUT PVOID ApcContext, OUT PIO_STATUS_BLOCK IoStatusBlock, + IN ULONG FsControlCode, IN OUT PVOID InputBuffer, + IN ULONG InputBufferLength, OUT OPTIONAL PVOID OutputBuffer, + IN ULONG OutputBufferLength); + +#endif /* _WIN32 || _WIN64 */ + +/*----------------------------------------------------------------------------*/ + +#ifndef _MSC_VER +/* Prototype should match libc runtime. ISO POSIX (2003) & LSB 3.1 */ +__nothrow __noreturn void __assert_fail(const char *assertion, const char *file, + unsigned line, const char *function); +#else +__extern_C __declspec(dllimport) void __cdecl _assert(char const *message, + char const *filename, + unsigned line); +#endif /* _MSC_VER */ + +#ifndef mdbx_assert_fail +void __cold mdbx_assert_fail(const MDBX_env *env, const char *msg, + const char *func, int line) { +#if MDBX_DEBUG + if (env && env->me_assert_func) { + env->me_assert_func(env, msg, func, line); + return; + } +#else + (void)env; +#endif /* MDBX_DEBUG */ + + if (mdbx_debug_logger) + mdbx_debug_log(MDBX_DBG_ASSERT, func, line, "assert: %s\n", msg); +#ifndef _MSC_VER + __assert_fail(msg, "mdbx", line, func); +#else + _assert(msg, func, line); +#endif /* _MSC_VER */ +} +#endif /* mdbx_assert_fail */ + +__cold void mdbx_panic(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); +#ifdef _MSC_VER + if (IsDebuggerPresent()) { + OutputDebugString("\r\n" FIXME "\r\n"); + FatalExit(ERROR_UNHANDLED_ERROR); + } +#elif _XOPEN_SOURCE >= 700 || _POSIX_C_SOURCE >= 200809L || \ + (__GLIBC_PREREQ(1, 0) && !__GLIBC_PREREQ(2, 10) && defined(_GNU_SOURCE)) + vdprintf(STDERR_FILENO, fmt, ap); +#else +#error FIXME +#endif + va_end(ap); + abort(); +} + +/*----------------------------------------------------------------------------*/ + +#ifndef mdbx_asprintf +int mdbx_asprintf(char **strp, const char *fmt, ...) { + va_list ap, ones; + + va_start(ap, fmt); + va_copy(ones, ap); +#ifdef _MSC_VER + int needed = _vscprintf(fmt, ap); +#elif defined(vsnprintf) || defined(_BSD_SOURCE) || _XOPEN_SOURCE >= 500 || \ + defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L + int needed = vsnprintf(nullptr, 0, fmt, ap); +#else +#error FIXME +#endif + va_end(ap); + + if (unlikely(needed < 0 || needed >= INT_MAX)) { + *strp = nullptr; + va_end(ones); + return needed; + } + + *strp = malloc(needed + 1); + if (unlikely(*strp == nullptr)) { + va_end(ones); + SetLastError(MDBX_ENOMEM); + return -1; + } + +#if defined(vsnprintf) || defined(_BSD_SOURCE) || _XOPEN_SOURCE >= 500 || \ + defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L + int actual = vsnprintf(*strp, needed + 1, fmt, ones); +#else +#error FIXME +#endif + va_end(ones); + + assert(actual == needed); + if (unlikely(actual < 0)) { + free(*strp); + *strp = nullptr; + } + return actual; +} +#endif /* mdbx_asprintf */ + +#ifndef mdbx_memalign_alloc +int mdbx_memalign_alloc(size_t alignment, size_t bytes, void **result) { +#if _MSC_VER + *result = _aligned_malloc(bytes, alignment); + return *result ? MDBX_SUCCESS : MDBX_ENOMEM /* ERROR_OUTOFMEMORY */; +#elif __GLIBC_PREREQ(2, 16) || __STDC_VERSION__ >= 201112L + *result = memalign(alignment, bytes); + return *result ? MDBX_SUCCESS : errno; +#elif _POSIX_VERSION >= 200112L + *result = nullptr; + return posix_memalign(result, alignment, bytes); +#else +#error FIXME +#endif +} +#endif /* mdbx_memalign_alloc */ + +#ifndef mdbx_memalign_free +void mdbx_memalign_free(void *ptr) { +#if _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} +#endif /* mdbx_memalign_free */ + +/*----------------------------------------------------------------------------*/ + +int mdbx_condmutex_init(mdbx_condmutex_t *condmutex) { +#if defined(_WIN32) || defined(_WIN64) + int rc = MDBX_SUCCESS; + condmutex->event = NULL; + condmutex->mutex = CreateMutex(NULL, FALSE, NULL); + if (!condmutex->mutex) + return GetLastError(); + + condmutex->event = CreateEvent(NULL, FALSE, FALSE, NULL); + if (!condmutex->event) { + rc = GetLastError(); + (void)CloseHandle(condmutex->mutex); + condmutex->mutex = NULL; + } + return rc; +#else + memset(condmutex, 0, sizeof(mdbx_condmutex_t)); + int rc = pthread_mutex_init(&condmutex->mutex, NULL); + if (rc == 0) { + rc = pthread_cond_init(&condmutex->cond, NULL); + if (rc != 0) + (void)pthread_mutex_destroy(&condmutex->mutex); + } + return rc; +#endif +} + +static bool is_allzeros(const void *ptr, size_t bytes) { + const uint8_t *u8 = ptr; + for (size_t i = 0; i < bytes; ++i) + if (u8[i] != 0) + return false; + return true; +} + +int mdbx_condmutex_destroy(mdbx_condmutex_t *condmutex) { + int rc = MDBX_EINVAL; +#if defined(_WIN32) || defined(_WIN64) + if (condmutex->event) { + rc = CloseHandle(condmutex->event) ? MDBX_SUCCESS : GetLastError(); + if (rc == MDBX_SUCCESS) + condmutex->event = NULL; + } + if (condmutex->mutex) { + rc = CloseHandle(condmutex->mutex) ? MDBX_SUCCESS : GetLastError(); + if (rc == MDBX_SUCCESS) + condmutex->mutex = NULL; + } +#else + if (!is_allzeros(&condmutex->cond, sizeof(condmutex->cond))) { + rc = pthread_cond_destroy(&condmutex->cond); + if (rc == 0) + memset(&condmutex->cond, 0, sizeof(condmutex->cond)); + } + if (!is_allzeros(&condmutex->mutex, sizeof(condmutex->mutex))) { + rc = pthread_mutex_destroy(&condmutex->mutex); + if (rc == 0) + memset(&condmutex->mutex, 0, sizeof(condmutex->mutex)); + } +#endif + return rc; +} + +int mdbx_condmutex_lock(mdbx_condmutex_t *condmutex) { +#if defined(_WIN32) || defined(_WIN64) + DWORD code = WaitForSingleObject(condmutex->mutex, INFINITE); + return waitstatus2errcode(code); +#else + return pthread_mutex_lock(&condmutex->mutex); +#endif +} + +int mdbx_condmutex_unlock(mdbx_condmutex_t *condmutex) { +#if defined(_WIN32) || defined(_WIN64) + return ReleaseMutex(condmutex->mutex) ? MDBX_SUCCESS : GetLastError(); +#else + return pthread_mutex_unlock(&condmutex->mutex); +#endif +} + +int mdbx_condmutex_signal(mdbx_condmutex_t *condmutex) { +#if defined(_WIN32) || defined(_WIN64) + return SetEvent(condmutex->event) ? MDBX_SUCCESS : GetLastError(); +#else + return pthread_cond_signal(&condmutex->cond); +#endif +} + +int mdbx_condmutex_wait(mdbx_condmutex_t *condmutex) { +#if defined(_WIN32) || defined(_WIN64) + DWORD code = + SignalObjectAndWait(condmutex->mutex, condmutex->event, INFINITE, FALSE); + if (code == WAIT_OBJECT_0) + code = WaitForSingleObject(condmutex->mutex, INFINITE); + return waitstatus2errcode(code); +#else + return pthread_cond_wait(&condmutex->cond, &condmutex->mutex); +#endif +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_fastmutex_init(mdbx_fastmutex_t *fastmutex) { +#if defined(_WIN32) || defined(_WIN64) + InitializeCriticalSection(fastmutex); + return MDBX_SUCCESS; +#else + return pthread_mutex_init(fastmutex, NULL); +#endif +} + +int mdbx_fastmutex_destroy(mdbx_fastmutex_t *fastmutex) { +#if defined(_WIN32) || defined(_WIN64) + DeleteCriticalSection(fastmutex); + return MDBX_SUCCESS; +#else + return pthread_mutex_destroy(fastmutex); +#endif +} + +int mdbx_fastmutex_acquire(mdbx_fastmutex_t *fastmutex) { +#if defined(_WIN32) || defined(_WIN64) + EnterCriticalSection(fastmutex); + return MDBX_SUCCESS; +#else + return pthread_mutex_lock(fastmutex); +#endif +} + +int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) { +#if defined(_WIN32) || defined(_WIN64) + LeaveCriticalSection(fastmutex); + return MDBX_SUCCESS; +#else + return pthread_mutex_unlock(fastmutex); +#endif +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_openfile(const char *pathname, int flags, mode_t mode, + mdbx_filehandle_t *fd) { + *fd = INVALID_HANDLE_VALUE; +#if defined(_WIN32) || defined(_WIN64) + (void)mode; + + DWORD DesiredAccess; + DWORD ShareMode = FILE_SHARE_READ | FILE_SHARE_WRITE; + DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL; + switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) { + default: + return ERROR_INVALID_PARAMETER; + case O_RDONLY: + DesiredAccess = GENERIC_READ; + break; + case O_WRONLY: /* assume for MDBX_env_copy() and friends output */ + DesiredAccess = GENERIC_WRITE; + ShareMode = 0; + FlagsAndAttributes |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH; + break; + case O_RDWR: + DesiredAccess = GENERIC_READ | GENERIC_WRITE; + break; + } + + DWORD CreationDisposition; + switch (flags & (O_EXCL | O_CREAT)) { + default: + return ERROR_INVALID_PARAMETER; + case 0: + CreationDisposition = OPEN_EXISTING; + break; + case O_EXCL | O_CREAT: + CreationDisposition = CREATE_NEW; + FlagsAndAttributes |= FILE_ATTRIBUTE_NOT_CONTENT_INDEXED; + break; + case O_CREAT: + CreationDisposition = OPEN_ALWAYS; + FlagsAndAttributes |= FILE_ATTRIBUTE_NOT_CONTENT_INDEXED; + break; + } + + *fd = CreateFileA(pathname, DesiredAccess, ShareMode, NULL, + CreationDisposition, FlagsAndAttributes, NULL); + + if (*fd == INVALID_HANDLE_VALUE) + return GetLastError(); + if ((flags & O_CREAT) && GetLastError() != ERROR_ALREADY_EXISTS) { + /* set FILE_ATTRIBUTE_NOT_CONTENT_INDEXED for new file */ + DWORD FileAttributes = GetFileAttributesA(pathname); + if (FileAttributes == INVALID_FILE_ATTRIBUTES || + !SetFileAttributesA(pathname, FileAttributes | + FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)) { + int rc = GetLastError(); + CloseHandle(*fd); + *fd = INVALID_HANDLE_VALUE; + return rc; + } + } +#else + +#ifdef O_CLOEXEC + flags |= O_CLOEXEC; +#endif + *fd = open(pathname, flags, mode); + if (*fd < 0) + return errno; +#if defined(FD_CLOEXEC) && defined(F_GETFD) + flags = fcntl(*fd, F_GETFD); + if (flags >= 0) + (void)fcntl(*fd, F_SETFD, flags | FD_CLOEXEC); +#endif +#endif + return MDBX_SUCCESS; +} + +int mdbx_closefile(mdbx_filehandle_t fd) { +#if defined(_WIN32) || defined(_WIN64) + return CloseHandle(fd) ? MDBX_SUCCESS : GetLastError(); +#else + return (close(fd) == 0) ? MDBX_SUCCESS : errno; +#endif +} + +int mdbx_pread(mdbx_filehandle_t fd, void *buf, size_t bytes, uint64_t offset) { + if (bytes > MAX_WRITE) + return MDBX_EINVAL; +#if defined(_WIN32) || defined(_WIN64) + + OVERLAPPED ov; + ov.hEvent = 0; + ov.Offset = (DWORD)offset; + ov.OffsetHigh = HIGH_DWORD(offset); + + DWORD read = 0; + if (unlikely(!ReadFile(fd, buf, (DWORD)bytes, &read, &ov))) { + int rc = GetLastError(); + return (rc == MDBX_SUCCESS) ? /* paranoia */ ERROR_READ_FAULT : rc; + } +#else + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), + "libmdbx requires 64-bit file I/O on 64-bit systems"); + intptr_t read = pread(fd, buf, bytes, offset); + if (read < 0) { + int rc = errno; + return (rc == MDBX_SUCCESS) ? /* paranoia */ MDBX_EIO : rc; + } +#endif + return (bytes == (size_t)read) ? MDBX_SUCCESS : MDBX_ENODATA; +} + +int mdbx_pwrite(mdbx_filehandle_t fd, const void *buf, size_t bytes, + uint64_t offset) { +#if defined(_WIN32) || defined(_WIN64) + if (bytes > MAX_WRITE) + return ERROR_INVALID_PARAMETER; + + OVERLAPPED ov; + ov.hEvent = 0; + ov.Offset = (DWORD)offset; + ov.OffsetHigh = HIGH_DWORD(offset); + + DWORD written; + if (likely(WriteFile(fd, buf, (DWORD)bytes, &written, &ov))) + return (bytes == written) ? MDBX_SUCCESS : MDBX_EIO /* ERROR_WRITE_FAULT */; + return GetLastError(); +#else + int rc; + intptr_t written; + do { + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), + "libmdbx requires 64-bit file I/O on 64-bit systems"); + written = pwrite(fd, buf, bytes, offset); + if (likely(bytes == (size_t)written)) + return MDBX_SUCCESS; + rc = errno; + } while (rc == EINTR); + return (written < 0) ? rc : MDBX_EIO /* Use which error code (ENOSPC)? */; +#endif +} + +int mdbx_pwritev(mdbx_filehandle_t fd, struct iovec *iov, int iovcnt, + uint64_t offset, size_t expected_written) { +#if defined(_WIN32) || defined(_WIN64) + size_t written = 0; + for (int i = 0; i < iovcnt; ++i) { + int rc = mdbx_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + written += iov[i].iov_len; + offset += iov[i].iov_len; + } + return (expected_written == written) ? MDBX_SUCCESS + : MDBX_EIO /* ERROR_WRITE_FAULT */; +#else + int rc; + intptr_t written; + do { + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), + "libmdbx requires 64-bit file I/O on 64-bit systems"); + written = pwritev(fd, iov, iovcnt, offset); + if (likely(expected_written == (size_t)written)) + return MDBX_SUCCESS; + rc = errno; + } while (rc == EINTR); + return (written < 0) ? rc : MDBX_EIO /* Use which error code? */; +#endif +} + +int mdbx_write(mdbx_filehandle_t fd, const void *buf, size_t bytes) { +#ifdef SIGPIPE + sigset_t set, old; + sigemptyset(&set); + sigaddset(&set, SIGPIPE); + int rc = pthread_sigmask(SIG_BLOCK, &set, &old); + if (rc != 0) + return rc; +#endif + + const char *ptr = buf; + for (;;) { + size_t chunk = (MAX_WRITE < bytes) ? MAX_WRITE : bytes; +#if defined(_WIN32) || defined(_WIN64) + DWORD written; + if (unlikely(!WriteFile(fd, ptr, (DWORD)chunk, &written, NULL))) + return GetLastError(); +#else + intptr_t written = write(fd, ptr, chunk); + if (written < 0) { + int rc = errno; +#ifdef SIGPIPE + if (rc == EPIPE) { + /* Collect the pending SIGPIPE, otherwise at least OS X + * gives it to the process on thread-exit (ITS#8504). */ + int tmp; + sigwait(&set, &tmp); + written = 0; + continue; + } + pthread_sigmask(SIG_SETMASK, &old, NULL); +#endif + return rc; + } +#endif + if (likely(bytes == (size_t)written)) { +#ifdef SIGPIPE + pthread_sigmask(SIG_SETMASK, &old, NULL); +#endif + return MDBX_SUCCESS; + } + ptr += written; + bytes -= written; + } +} + +int mdbx_filesync(mdbx_filehandle_t fd, bool filesize_changed) { +#if defined(_WIN32) || defined(_WIN64) + (void)filesize_changed; + return FlushFileBuffers(fd) ? MDBX_SUCCESS : GetLastError(); +#elif __GLIBC_PREREQ(2, 16) || _BSD_SOURCE || _XOPEN_SOURCE || \ + (__GLIBC_PREREQ(2, 8) && _POSIX_C_SOURCE >= 200112L) + for (;;) { +/* LY: It is no reason to use fdatasync() here, even in case + * no such bug in a kernel. Because "no-bug" mean that a kernel + * internally do nearly the same, e.g. fdatasync() == fsync() + * when no-kernel-bug and file size was changed. + * + * So, this code is always safe and without appreciable + * performance degradation. + * + * For more info about of a corresponding fdatasync() bug + * see http://www.spinics.net/lists/linux-ext4/msg33714.html */ +#if _POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500 || \ + defined(_POSIX_SYNCHRONIZED_IO) + if (!filesize_changed && fdatasync(fd) == 0) + return MDBX_SUCCESS; +#else + (void)filesize_changed; +#endif + if (fsync(fd) == 0) + return MDBX_SUCCESS; + int rc = errno; + if (rc != EINTR) + return rc; + } +#else +#error FIXME +#endif +} + +int mdbx_filesize_sync(mdbx_filehandle_t fd) { +#if defined(_WIN32) || defined(_WIN64) + (void)fd; + /* Nothing on Windows (i.e. newer 100% steady) */ + return MDBX_SUCCESS; +#else + for (;;) { + if (fsync(fd) == 0) + return MDBX_SUCCESS; + int rc = errno; + if (rc != EINTR) + return rc; + } +#endif +} + +int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length) { +#if defined(_WIN32) || defined(_WIN64) + BY_HANDLE_FILE_INFORMATION info; + if (!GetFileInformationByHandle(fd, &info)) + return GetLastError(); + *length = info.nFileSizeLow | (uint64_t)info.nFileSizeHigh << 32; +#else + struct stat st; + + STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), + "libmdbx requires 64-bit file I/O on 64-bit systems"); + if (fstat(fd, &st)) + return errno; + + *length = st.st_size; +#endif + return MDBX_SUCCESS; +} + +int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length) { +#if defined(_WIN32) || defined(_WIN64) + LARGE_INTEGER li; + li.QuadPart = length; + return (SetFilePointerEx(fd, li, NULL, FILE_BEGIN) && SetEndOfFile(fd)) + ? MDBX_SUCCESS + : GetLastError(); +#else + STATIC_ASSERT_MSG(sizeof(off_t) >= sizeof(size_t), + "libmdbx requires 64-bit file I/O on 64-bit systems"); + return ftruncate(fd, length) == 0 ? MDBX_SUCCESS : errno; +#endif +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_thread_key_create(mdbx_thread_key_t *key) { +#if defined(_WIN32) || defined(_WIN64) + *key = TlsAlloc(); + return (*key != TLS_OUT_OF_INDEXES) ? MDBX_SUCCESS : GetLastError(); +#else + return pthread_key_create(key, mdbx_rthc_dtor); +#endif +} + +void mdbx_thread_key_delete(mdbx_thread_key_t key) { +#if defined(_WIN32) || defined(_WIN64) + mdbx_ensure(NULL, TlsFree(key)); +#else + mdbx_ensure(NULL, pthread_key_delete(key) == 0); +#endif +} + +void *mdbx_thread_rthc_get(mdbx_thread_key_t key) { +#if defined(_WIN32) || defined(_WIN64) + return TlsGetValue(key); +#else + return pthread_getspecific(key); +#endif +} + +void mdbx_thread_rthc_set(mdbx_thread_key_t key, const void *value) { +#if defined(_WIN32) || defined(_WIN64) + mdbx_ensure(NULL, TlsSetValue(key, (void *)value)); +#else + mdbx_ensure(NULL, pthread_setspecific(key, value) == 0); +#endif +} + +int mdbx_thread_create(mdbx_thread_t *thread, + THREAD_RESULT(THREAD_CALL *start_routine)(void *), + void *arg) { +#if defined(_WIN32) || defined(_WIN64) + *thread = CreateThread(NULL, 0, start_routine, arg, 0, NULL); + return *thread ? MDBX_SUCCESS : GetLastError(); +#else + return pthread_create(thread, NULL, start_routine, arg); +#endif +} + +int mdbx_thread_join(mdbx_thread_t thread) { +#if defined(_WIN32) || defined(_WIN64) + DWORD code = WaitForSingleObject(thread, INFINITE); + return waitstatus2errcode(code); +#else + void *unused_retval = &unused_retval; + return pthread_join(thread, &unused_retval); +#endif +} + +/*----------------------------------------------------------------------------*/ + +int mdbx_msync(mdbx_mmap_t *map, size_t offset, size_t length, int async) { + uint8_t *ptr = (uint8_t *)map->address + offset; +#if defined(_WIN32) || defined(_WIN64) + if (FlushViewOfFile(ptr, length) && (async || FlushFileBuffers(map->fd))) + return MDBX_SUCCESS; + return GetLastError(); +#else + const int mode = async ? MS_ASYNC : MS_SYNC; + return (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno; +#endif +} + +int mdbx_mmap(int flags, mdbx_mmap_t *map, size_t must, size_t limit) { + assert(must <= limit); +#if defined(_WIN32) || defined(_WIN64) + map->length = 0; + map->current = 0; + map->section = NULL; + map->address = nullptr; + + if (GetFileType(map->fd) != FILE_TYPE_DISK) + return ERROR_FILE_OFFLINE; + + FILE_REMOTE_PROTOCOL_INFO RemoteProtocolInfo; + if (GetFileInformationByHandleEx(map->fd, FileRemoteProtocolInfo, + &RemoteProtocolInfo, + sizeof(RemoteProtocolInfo))) { + if ((RemoteProtocolInfo.Flags & (REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK | + REMOTE_PROTOCOL_INFO_FLAG_OFFLINE)) != + REMOTE_PROTOCOL_INFO_FLAG_LOOPBACK) + return ERROR_FILE_OFFLINE; + } + + NTSTATUS rc; +#ifdef _WIN64 + struct { + WOF_EXTERNAL_INFO wof_info; + union { + WIM_PROVIDER_EXTERNAL_INFO wim_info; + FILE_PROVIDER_EXTERNAL_INFO_V1 file_info; + }; + size_t reserved_for_microsoft_madness[42]; + } GetExternalBacking_OutputBuffer; + IO_STATUS_BLOCK StatusBlock; + rc = NtFsControlFile(map->fd, NULL, NULL, NULL, &StatusBlock, + FSCTL_GET_EXTERNAL_BACKING, NULL, 0, + &GetExternalBacking_OutputBuffer, + sizeof(GetExternalBacking_OutputBuffer)); + if (rc != STATUS_OBJECT_NOT_EXTERNALLY_BACKED && + rc != STATUS_INVALID_DEVICE_REQUEST) + return NT_SUCCESS(rc) ? ERROR_FILE_OFFLINE : ntstatus2errcode(rc); +#endif + + WCHAR PathBuffer[INT16_MAX]; + DWORD VolumeSerialNumber, FileSystemFlags; + if (!GetVolumeInformationByHandleW(map->fd, PathBuffer, INT16_MAX, + &VolumeSerialNumber, NULL, + &FileSystemFlags, NULL, 0)) + return GetLastError(); + + if ((flags & MDBX_RDONLY) == 0) { + if (FileSystemFlags & (FILE_SEQUENTIAL_WRITE_ONCE | FILE_READ_ONLY_VOLUME | + FILE_VOLUME_IS_COMPRESSED)) + return ERROR_FILE_OFFLINE; + } + + if (!GetFinalPathNameByHandleW(map->fd, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | VOLUME_NAME_NT)) + return GetLastError(); + + if (_wcsnicmp(PathBuffer, L"\\Device\\Mup\\", 12) == 0) + return ERROR_FILE_OFFLINE; + + if (GetFinalPathNameByHandleW(map->fd, PathBuffer, INT16_MAX, + FILE_NAME_NORMALIZED | VOLUME_NAME_DOS)) { + UINT DriveType = GetDriveTypeW(PathBuffer); + if (DriveType == DRIVE_NO_ROOT_DIR && + wcsncmp(PathBuffer, L"\\\\?\\", 4) == 0 && + wcsncmp(PathBuffer + 5, L":\\", 2) == 0) { + PathBuffer[7] = 0; + DriveType = GetDriveTypeW(PathBuffer + 4); + } + switch (DriveType) { + case DRIVE_CDROM: + if (flags & MDBX_RDONLY) + break; + // fall through + case DRIVE_UNKNOWN: + case DRIVE_NO_ROOT_DIR: + case DRIVE_REMOTE: + default: + return ERROR_FILE_OFFLINE; + case DRIVE_REMOVABLE: + case DRIVE_FIXED: + case DRIVE_RAMDISK: + break; + } + } + + rc = NtCreateSection( + &map->section, + /* DesiredAccess */ SECTION_MAP_READ | SECTION_EXTEND_SIZE | + ((flags & MDBX_WRITEMAP) ? SECTION_MAP_WRITE : 0), + /* ObjectAttributes */ NULL, /* MaximumSize */ NULL, + /* SectionPageProtection */ (flags & MDBX_RDONLY) ? PAGE_READONLY + : PAGE_READWRITE, + /* AllocationAttributes */ SEC_RESERVE, map->fd); + + if (!NT_SUCCESS(rc)) + return ntstatus2errcode(rc); + + map->address = nullptr; + SIZE_T ViewSize = (flags & MDBX_RDONLY) ? must : limit; + rc = NtMapViewOfSection( + map->section, GetCurrentProcess(), &map->address, + /* ZeroBits */ 0, + /* CommitSize */ must, + /* SectionOffset */ NULL, &ViewSize, + /* InheritDisposition */ ViewUnmap, + /* AllocationType */ (flags & MDBX_RDONLY) ? 0 : MEM_RESERVE, + /* Win32Protect */ (flags & MDBX_WRITEMAP) ? PAGE_READWRITE + : PAGE_READONLY); + + if (!NT_SUCCESS(rc)) { + NtClose(map->section); + map->section = 0; + map->address = nullptr; + return ntstatus2errcode(rc); + } + assert(map->address != MAP_FAILED); + + uint64_t filesize; + rc = mdbx_filesize(map->fd, &filesize); + if (rc != MDBX_SUCCESS) { + NtClose(map->section); + NtUnmapViewOfSection(GetCurrentProcess(), map->address); + map->section = 0; + map->address = nullptr; + return rc; + } + + map->current = (must < filesize) ? must : (size_t)filesize; + map->length = ViewSize; + return MDBX_SUCCESS; +#else + (void)must; + map->address = mmap( + NULL, limit, (flags & MDBX_WRITEMAP) ? PROT_READ | PROT_WRITE : PROT_READ, + MAP_SHARED, map->fd, 0); + if (likely(map->address != MAP_FAILED)) { + map->length = limit; + return MDBX_SUCCESS; + } + map->length = 0; + map->address = nullptr; + return errno; +#endif +} + +int mdbx_munmap(mdbx_mmap_t *map) { +#if defined(_WIN32) || defined(_WIN64) + if (map->section) + NtClose(map->section); + NTSTATUS rc = NtUnmapViewOfSection(GetCurrentProcess(), map->address); + if (!NT_SUCCESS(rc)) + ntstatus2errcode(rc); + map->length = 0; + map->current = 0; + map->address = nullptr; +#else + if (unlikely(munmap(map->address, map->length))) + return errno; + map->length = 0; + map->address = nullptr; +#endif + return MDBX_SUCCESS; +} + +int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t atleast, size_t limit) { + assert(atleast <= limit); +#if defined(_WIN32) || defined(_WIN64) + if (limit < map->length) { + /* Windows is unable shrinking a mapped section */ + return ERROR_USER_MAPPED_FILE; + } + if (limit > map->length) { + /* extend */ + LARGE_INTEGER new_size; + new_size.QuadPart = limit; + NTSTATUS rc = NtExtendSection(map->section, &new_size); + if (!NT_SUCCESS(rc)) + return ntstatus2errcode(rc); + map->length = limit; + } + if (atleast < map->current) { + /* Windows is unable shrinking a mapped file */ + uint8_t *ptr = (uint8_t *)map->address + atleast; + if (!VirtualFree(ptr, map->current - atleast, MEM_DECOMMIT)) + return MDBX_RESULT_TRUE; + + map->current = atleast; + int rc = mdbx_ftruncate(map->fd, atleast); + return (rc != MDBX_SUCCESS) ? MDBX_RESULT_TRUE : rc; + } + if (atleast > map->current) { + /* growth */ + uint8_t *ptr = (uint8_t *)map->address + map->current; + if (ptr != + VirtualAlloc(ptr, atleast - map->current, MEM_COMMIT, + (flags & MDBX_WRITEMAP) ? PAGE_READWRITE : PAGE_READONLY)) + return GetLastError(); + map->current = atleast; + } + + uint64_t filesize; + int rc = mdbx_filesize(map->fd, &filesize); + if (rc != MDBX_SUCCESS) + return rc; + if (filesize < atleast) { + rc = mdbx_ftruncate(map->fd, atleast); + if (rc != MDBX_SUCCESS) + return rc; + } + return MDBX_SUCCESS; +#else + (void)flags; + if (limit != map->length) { + void *ptr = mremap(map->address, map->length, limit, MREMAP_MAYMOVE); + if (ptr == MAP_FAILED) + return errno; + map->address = ptr; + map->length = limit; + } + return mdbx_ftruncate(map->fd, atleast); +#endif +} + +/*----------------------------------------------------------------------------*/ + +__cold void mdbx_osal_jitter(bool tiny) { + for (;;) { +#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ + defined(__x86_64__) + const unsigned salt = 277u * (unsigned)__rdtsc(); +#else + const unsigned salt = rand(); +#endif + + const unsigned coin = salt % (tiny ? 29u : 43u); + if (coin < 43 / 3) + break; +#if defined(_WIN32) || defined(_WIN64) + SwitchToThread(); + if (coin > 43 * 2 / 3) + Sleep(1); +#else + sched_yield(); + if (coin > 43 * 2 / 3) + usleep(coin); +#endif + } +} |