diff options
Diffstat (limited to 'libs/libmdbx/src')
49 files changed, 3928 insertions, 1827 deletions
diff --git a/libs/libmdbx/src/.circleci/config.yml b/libs/libmdbx/src/.circleci/config.yml index 91e11a4bfe..6d46ecefd3 100644 --- a/libs/libmdbx/src/.circleci/config.yml +++ b/libs/libmdbx/src/.circleci/config.yml @@ -2,7 +2,7 @@ version: 2 jobs: build: docker: - - image: circleci/buildpack-deps:artful + - image: circleci/buildpack-deps:20.04 environment: - TESTDB: /tmp/test.db - TESTLOG: /tmp/test.log diff --git a/libs/libmdbx/src/.cirrus.yml b/libs/libmdbx/src/.cirrus.yml new file mode 100644 index 0000000000..c08e316c37 --- /dev/null +++ b/libs/libmdbx/src/.cirrus.yml @@ -0,0 +1,6 @@ +freebsd_instance: + image_family: freebsd-12-1-snap + +task: + install_script: pkg install -y gmake bash git + script: git fetch --tags && gmake check diff --git a/libs/libmdbx/src/.github/workflows/release-assets.yml b/libs/libmdbx/src/.github/workflows/release-assets.yml new file mode 100644 index 0000000000..934ab93b39 --- /dev/null +++ b/libs/libmdbx/src/.github/workflows/release-assets.yml @@ -0,0 +1,54 @@ +# Based on the https://github.com/actions/upload-release-asset example + +on: + push: + # Sequence of patterns matched against refs/tags + tags: + - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 + +name: Upload Release Asset + +jobs: + build: + name: Upload Release Asset + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Build assets + run: | + make release-assets + - id: name + run: | + echo "::set-output name=tarball::$(ls *.tar.gz)" + echo "::set-output name=zip::$(ls *.zip)" + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + draft: true + prerelease: true + - name: Upload tarball + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ steps.name.outputs.tarball }} + asset_name: amalgamated.tar.gz + # asset_label: Amalgamated source tarball + asset_content_type: application/tar+gzip + - name: Upload zip + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.create_release.outputs.upload_url }} + asset_path: ${{ steps.name.outputs.zip }} + asset_name: amalgamated.zip + # asset_label: Amalgamated source zip-archive + asset_content_type: application/zip diff --git a/libs/libmdbx/src/.travis.yml b/libs/libmdbx/src/.travis.yml index c1bd6da5e9..e736d01341 100644 --- a/libs/libmdbx/src/.travis.yml +++ b/libs/libmdbx/src/.travis.yml @@ -1,61 +1,88 @@ language: c cpp +sudo: false + +env: + global: + - secure: "M+W+heGGyRQJoBq2W0uqWVrpL4KBXmL0MFL7FSs7f9vmAaDyEgziUXeZRj3GOKzW4kTef3LpIeiu9SmvqSMoQivGGiomZShqPVl045o/OUgRCAT7Al1RLzEZ0efSHpIPf0PZ6byEf6GR2ML76OfuL6JxTVdnz8iVyO2sgLE1HbX1VeB+wgd/jfMeOBhCCXskfK6MLyZihfMYsiYZYSaV98ZDhDLSlzuuRIgzb0bMi8aL6AErs0WLW0NelRBeHkKPYfAUc85pdQHscgrJw6Rh/zT6+8BQ/q5f4IgWhiu4xoRg3Ngl7SNoedRQh93ADM3UG2iGl6HDFpVORaXcFWKAtuYY+kHQ0HB84BRYpQmeBuXNpltsfxQ3d1Q3u0RlE45zRvmr2+X1mFnkcNUAWISLPbsOUlriDQM8irGwRpho77/uYnRC00bJsHW//s6+uPf9zrAw1nI4f0y3PAWukGF/xs6HAI3FZPsuSSnx18Tj3Opgbc9Spop+V3hkhdiJoPGpNKTkFX4ZRXfkPgoRVJmtp4PpbpH0Ps/mCriKjMEfGGi0HcVCi0pEGLXiecdqJ5KPg5+22zNycEujQBJcNTKd9shN+R3glrbmhAxTEzGdGwxXXJ2ybwJ2PWJLMYZ7g98nLyX+uQPaA3BlsbYJHNeS5283/9pJsd9DzfHKsN2nFSc=" + +addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - cmake + - clang-format + update: true matrix: include: - os: linux - dist: trusty + dist: focal + compiler: gcc env: CC=cc CXX=c++ - os: linux - dist: trusty + dist: focal compiler: clang env: CC=clang CXX=clang++ - os: linux - dist: xenial + dist: bionic compiler: gcc - env: CC=gcc CXX=g++ + env: CC=cc CXX=c++ - os: linux dist: bionic compiler: clang env: CC=clang CXX=clang++ + - os: linux + dist: xenial + compiler: gcc + env: CC=cc CXX=c++ + - os: linux + dist: xenial + compiler: clang + env: CC=clang CXX=clang++ - os: osx - osx_image: xcode11 + osx_image: xcode11.3 env: CC=cc CXX=c++ - os: osx osx_image: xcode9.4 env: CC=cc CXX=c++ -script: > - if [ "${COVERITY_SCAN_BRANCH}" != 1 ]; then - git fetch --unshallow --tags --prune && - git submodule foreach --recursive git fetch --unshallow --tags --prune && - (if which clang-format-6.0 > /dev/null && make reformat && [[ -n $(git diff) ]]; - then - echo "You must run 'make reformat' before submitting a pull request"; - echo ""; - git diff; - exit -1; - fi) && +before_script: | + if [ "${TRAVIS_BRANCH}" = "coverity_scan" ]; then + # call Coverity Scan manually of addons.coverity_scan for first job only + if [ "${TRAVIS_JOB_NUMBER}" = "${TRAVIS_BUILD_NUMBER}.1" ]; then + export COVERITY_SCAN_BRANCH=1 + echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- + curl -s 'https://scan.coverity.com/scripts/travisci_build_coverity_scan.sh' -o coverity_scan.sh + else + echo 'echo "Skip CoverityScan for unrelated os/compiler"' > coverity_scan.sh + fi + fi + +script: | + ${CC} --version + ${CXX} --version + git fetch --unshallow --tags --prune || exit 1 + if [ ! -s ./coverity_scan.sh ]; then make --keep-going all && MALLOC_CHECK_=7 MALLOC_PERTURB_=42 make --keep-going check else - [ ! -s cov-int/scm_log.txt ] || cat cov-int/scm_log.txt; - fi && sleep 3 + COVERITY_SCAN_PROJECT_NAME="ReOpen/libmdbx" \ + COVERITY_SCAN_NOTIFICATION_EMAIL="leo@yuriev.ru" \ + COVERITY_SCAN_BUILD_COMMAND_PREPEND="" \ + COVERITY_SCAN_BUILD_COMMAND="make MDBX_OPTIONS=-DMDBX_DEBUG=2 build-test" \ + COVERITY_SCAN_BRANCH_PATTERN="$TRAVIS_BRANCH" \ + bash ./coverity_scan.sh || cat cov-int/scm_log.txt + fi -env: - global: - - secure: "M+W+heGGyRQJoBq2W0uqWVrpL4KBXmL0MFL7FSs7f9vmAaDyEgziUXeZRj3GOKzW4kTef3LpIeiu9SmvqSMoQivGGiomZShqPVl045o/OUgRCAT7Al1RLzEZ0efSHpIPf0PZ6byEf6GR2ML76OfuL6JxTVdnz8iVyO2sgLE1HbX1VeB+wgd/jfMeOBhCCXskfK6MLyZihfMYsiYZYSaV98ZDhDLSlzuuRIgzb0bMi8aL6AErs0WLW0NelRBeHkKPYfAUc85pdQHscgrJw6Rh/zT6+8BQ/q5f4IgWhiu4xoRg3Ngl7SNoedRQh93ADM3UG2iGl6HDFpVORaXcFWKAtuYY+kHQ0HB84BRYpQmeBuXNpltsfxQ3d1Q3u0RlE45zRvmr2+X1mFnkcNUAWISLPbsOUlriDQM8irGwRpho77/uYnRC00bJsHW//s6+uPf9zrAw1nI4f0y3PAWukGF/xs6HAI3FZPsuSSnx18Tj3Opgbc9Spop+V3hkhdiJoPGpNKTkFX4ZRXfkPgoRVJmtp4PpbpH0Ps/mCriKjMEfGGi0HcVCi0pEGLXiecdqJ5KPg5+22zNycEujQBJcNTKd9shN+R3glrbmhAxTEzGdGwxXXJ2ybwJ2PWJLMYZ7g98nLyX+uQPaA3BlsbYJHNeS5283/9pJsd9DzfHKsN2nFSc=" - -before_install: - - echo -n | openssl s_client -connect scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- - - ${CC} --version - - ${CXX} --version - -addons: - coverity_scan: - project: - name: "ReOpen/libmdbx" - version: 0.1 - description: "Build submitted via Travis CI" - notification_email: leo@yuriev.ru - build_command_prepend: "git fetch --unshallow --tags --prune && make dist" - build_command: "make MDBX_OPTIONS=-DMDBX_DEBUG=2 -C dist all" - branch_pattern: coverity_scan +after_script: | + if [ "${TRAVIS_BRANCH}" != "coverity_scan" -a "${TRAVIS_JOB_NUMBER}" = "${TRAVIS_BUILD_NUMBER}.1" ] && make reformat && [[ -n $(git diff) ]]; then + echo "You must run 'make reformat' before submitting a pull request" + echo "-------------------------------------------------------------------------------" + git diff + sleep 1 + echo "-------------------------------------------------------------------------------" + sleep 1 + exit -1 + fi + echo "-------------------------------------------------------------------------------" + sleep 1 diff --git a/libs/libmdbx/src/CMakeLists.txt b/libs/libmdbx/src/CMakeLists.txt index a17bd35ab6..40355c27d3 100644 --- a/libs/libmdbx/src/CMakeLists.txt +++ b/libs/libmdbx/src/CMakeLists.txt @@ -38,6 +38,9 @@ cmake_minimum_required(VERSION 3.8.2) cmake_policy(PUSH) cmake_policy(VERSION 3.8.2) +if(NOT CMAKE_VERSION VERSION_LESS 3.15) + cmake_policy(SET CMP0091 NEW) +endif() if(NOT CMAKE_VERSION VERSION_LESS 3.13) cmake_policy(SET CMP0077 NEW) endif() @@ -253,17 +256,23 @@ else() add_custom_target(ctags DEPENDS tags) endif(CTAGS) - # Enable 'make reformat' target. - find_program(CLANG_FORMAT - NAMES clang-format-11.0 clang-format-10.0 clang-format-9.0 clang-format-8.0 clang-format) - if(CLANG_FORMAT AND UNIX) - add_custom_target(reformat - VERBATIM - COMMAND - git ls-files | - grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | - xargs ${CLANG_FORMAT} -i --style=file - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + if(UNIX) + find_program(CLANG_FORMAT + NAMES clang-format-12 clang-format-11 clang-format-10 clang-format) + if(CLANG_FORMAT) + execute_process(COMMAND ${CLANG_FORMAT} "--version" OUTPUT_VARIABLE clang_format_version_info) + string(REGEX MATCH "version ([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" clang_format_version_info CLANG_FORMAT_VERSION) + if(clang_format_version_info AND NOT CLANG_FORMAT_VERSION VERSION_LESS 10.0) + # Enable 'make reformat' target. + add_custom_target(reformat + VERBATIM + COMMAND + git ls-files | + grep -E \\.\(c|cxx|cc|cpp|h|hxx|hpp\)\(\\.in\)?\$ | + xargs ${CLANG_FORMAT} -i --style=file + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) + endif() + endif() endif() if(NOT "${PROJECT_BINARY_DIR}" STREQUAL "${PROJECT_SOURCE_DIR}") @@ -369,6 +378,7 @@ add_mdbx_option(MDBX_BUILD_SHARED_LIBRARY "Build libmdbx as shared library (DLL) add_mdbx_option(MDBX_BUILD_TOOLS "Build MDBX tools (mdbx_chk/stat/dump/load/copy)" ${MDBX_BUILD_TOOLS_DEFAULT}) add_mdbx_option(MDBX_TXN_CHECKOWNER "Checking transaction matches the calling thread inside libmdbx's API" ON) add_mdbx_option(MDBX_TXN_CHECKPID "Paranoid checking PID inside libmdbx's API" AUTO) +add_mdbx_option(MDBX_HUGE_TRANSACTIONS "Support for huge write-transactions" OFF) mark_as_advanced(MDBX_TXN_CHECKPID) if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") add_mdbx_option(MDBX_DISABLE_GNU_SOURCE "Don't use GNU/Linux libc extensions" OFF) @@ -410,7 +420,7 @@ endif() ################################################################################ # Get version -fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}/VERSION" FALSE) +fetch_version(MDBX "${CMAKE_CURRENT_SOURCE_DIR}" FALSE) message(STATUS "libmdbx version is ${MDBX_VERSION}") # sources list @@ -459,7 +469,7 @@ macro(libmdbx_setup_libs TARGET MODE) target_link_libraries(${TARGET} ${MODE} ${CMAKE_THREAD_LIBS_INIT}) if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") target_link_libraries(${TARGET} ${MODE} ntdll.lib) - if(MDBX_NTDLL_EXTRA_IMPLIB) + if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_AVOID_CRT) target_link_libraries(${TARGET} ${MODE} ntdll_extra) endif() elseif(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Solaris") @@ -498,6 +508,13 @@ if(MDBX_BUILD_SHARED_LIBRARY) target_compile_definitions(mdbx PRIVATE LIBMDBX_EXPORTS MDBX_BUILD_SHARED_LIBRARY=1 INTERFACE LIBMDBX_IMPORTS) target_setup_options(mdbx) libmdbx_setup_libs(mdbx PRIVATE) + if(MSVC) + if(MDBX_NTDLL_EXTRA_IMPLIB AND MDBX_AVOID_CRT) + set_property(TARGET mdbx PROPERTY LINKER_FLAGS "/NODEFAULTLIB") + else() + set_property(TARGET mdbx PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL") + endif() + endif() if(CC_HAS_VISIBILITY AND (LTO_ENABLED OR INTERPROCEDURAL_OPTIMIZATION)) set_target_properties(mdbx PROPERTIES LINK_FLAGS "-fvisibility=hidden") endif() @@ -557,45 +574,21 @@ endif() if(MDBX_BUILD_SHARED_LIBRARY) if(CMAKE_VERSION VERSION_LESS 3.12) install(TARGETS mdbx EXPORT libmdbx - RUNTIME - DESTINATION bin - COMPONENT runtime - LIBRARY - DESTINATION lib - COMPONENT runtime - OBJECTS - DESTINATION lib - COMPONENT devel - ARCHIVE - DESTINATION lib - COMPONENT devel - PUBLIC_HEADER - DESTINATION include - COMPONENT devel - INCLUDES - DESTINATION include - COMPONENT devel) + RUNTIME DESTINATION bin COMPONENT runtime + LIBRARY DESTINATION bin COMPONENT runtime + OBJECTS DESTINATION lib COMPONENT devel + ARCHIVE DESTINATION lib COMPONENT devel + PUBLIC_HEADER DESTINATION include COMPONENT devel + INCLUDES DESTINATION include COMPONENT devel) else() install(TARGETS mdbx EXPORT libmdbx - RUNTIME - DESTINATION bin - COMPONENT runtime - LIBRARY - DESTINATION lib - COMPONENT runtime + RUNTIME DESTINATION bin COMPONENT runtime + LIBRARY DESTINATION bin COMPONENT runtime NAMELINK_COMPONENT devel - OBJECTS - DESTINATION lib - COMPONENT devel - ARCHIVE - DESTINATION lib - COMPONENT devel - PUBLIC_HEADER - DESTINATION include - COMPONENT devel - INCLUDES - DESTINATION include - COMPONENT devel) + OBJECTS DESTINATION lib COMPONENT devel + ARCHIVE DESTINATION lib COMPONENT devel + PUBLIC_HEADER DESTINATION include COMPONENT devel + INCLUDES DESTINATION include COMPONENT devel) endif() endif(MDBX_BUILD_SHARED_LIBRARY) @@ -626,45 +619,21 @@ endif(MDBX_BUILD_TOOLS) if(MDBX_INSTALL_STATIC) if(CMAKE_VERSION VERSION_LESS 3.12) install(TARGETS mdbx-static EXPORT libmdbx - RUNTIME - DESTINATION bin - COMPONENT runtime - LIBRARY - DESTINATION lib - COMPONENT runtime - OBJECTS - DESTINATION lib - COMPONENT devel - ARCHIVE - DESTINATION lib - COMPONENT devel - PUBLIC_HEADER - DESTINATION include - COMPONENT devel - INCLUDES - DESTINATION include - COMPONENT devel) + RUNTIME DESTINATION bin COMPONENT runtime + LIBRARY DESTINATION bin COMPONENT runtime + OBJECTS DESTINATION lib COMPONENT devel + ARCHIVE DESTINATION lib COMPONENT devel + PUBLIC_HEADER DESTINATION include COMPONENT devel + INCLUDES DESTINATION include COMPONENT devel) else() install(TARGETS mdbx-static EXPORT libmdbx - RUNTIME - DESTINATION bin - COMPONENT runtime - LIBRARY - DESTINATION lib - COMPONENT runtime + RUNTIME DESTINATION bin COMPONENT runtime + LIBRARY DESTINATION bin COMPONENT runtime NAMELINK_COMPONENT devel - OBJECTS - DESTINATION lib - COMPONENT devel - ARCHIVE - DESTINATION lib - COMPONENT devel - PUBLIC_HEADER - DESTINATION include - COMPONENT devel - INCLUDES - DESTINATION include - COMPONENT devel) + OBJECTS DESTINATION lib COMPONENT devel + ARCHIVE DESTINATION lib COMPONENT devel + PUBLIC_HEADER DESTINATION include COMPONENT devel + INCLUDES DESTINATION include COMPONENT devel) endif() endif(MDBX_INSTALL_STATIC) diff --git a/libs/libmdbx/src/ChangeLog.md b/libs/libmdbx/src/ChangeLog.md new file mode 100644 index 0000000000..1c4beb635b --- /dev/null +++ b/libs/libmdbx/src/ChangeLog.md @@ -0,0 +1,113 @@ +v0.9.x (in the development): + - TODO: Native bindings for C++. + - TODO: Packages for AltLinux, Fedora/RHEL, Debian/Ubuntu. + +v0.8.1 2020-06-12: + - Minor change versioning. The last number in version now mean the number of commits since last release/tag. + - Provide ChangeLog file. + - Fix for using libmdbx as C-only sub-project with CMake. + - Fix `mdbx_env_set_geometry()` for case it called from opened environment outside of write transaction. + - Add support for huge transactions and `MDBX_HUGE_TRANSACTIONS` build-option (default `OFF`). + - Refine LTO (link time optimization) for clang. + - Force enabling exceptions handling for MSVC (`/EHsc` option). + +v0.8.0 2020-06-05: + - Support for Android/Bionic. + - Support for iOS. + - Auto-handling `MDBX_NOSUBDIR` while opening for any exists database. + - Engage github-actions to make release-assets. + - Clarify API description. + - Extended keygen-cases in stochastic test. + - Fix fetching of first/lower key from LEAF2-page during page merge. + - Fix missing comma in array of error messages. + - Fix div-by-zero while copy-with-compaction for non-resizeable environment. + - Fixes & enhancements for custom-comparators. + - Fix `MDBX_AVOID_CRT` option and missing `ntdll.def`. + - Fix `mdbx_env_close()` to work correctly called concurrently from several threads. + - Fix null-deref in an ASAN-enabled builds while opening environment with error and/or read-only. + - Fix AddressSanitizer errors after closing environment. + - Fix/workaround to avoid GCC 10.x pedantic warnings. + - Fix using `ENODATA` for FreeBSD. + - Avoid invalidation of DBI-handle(s) when it just closing. + - Avoid using `pwritev()` for single-writes (up to 10% speedup for some kernels & scenarios). + - Avoiding `MDBX_UTTERLY_NOSYNC` as result of flags merge. + - Add `mdbx_dbi_dupsort_depthmask()` function. + - Add `MDBX_CP_FORCE_RESIZEABLE` option. + - Add deprecated `MDBX_MAP_RESIZED` for compatibility. + - Add `MDBX_BUILD_TOOLS` option (default `ON`). + - Refine `mdbx_dbi_open_ex()` to safe concurrently opening the same handle from difference threads. + - Truncate clk-file during environment closing. So zero-length lck file indicates that environment was closed properly. + - Refine `mdbx_update_gc()` for huge transactions with small sizes of database page. + - Extends dump/load to support all MDBX attributes. + - Avoid upsertion the same key-value data, fix related assertions. + - Rework min/max length checking for keys & values. + - Checking the order of keys on all pages during checking. + - Support `CFLAGS_EXTRA` make-option for convenience. + - Preserve last txnid while copy with compactification. + - Auto-reset running transaction in mdbx_txn_renew(). + - Automatically abort errored transaction in mdbx_txn_commit(). + - Auto-choose pagesize for a large databases. + - Rearrange source files, rework build, options-support by CMake. + - Crutch for WSL1 (Windows subsystem for Linux). + - Refine install/uninstall targets. + - Support for Valgrind 3.14 and later. + - Add check-analyzer check-ubsan check-asan check-leak targets to Makefile. + - Minor fix/workaround to avoid UBSAN traps for `memcpy(ptr, NULL, 0)`. + - Avoid some GCC-analyzer false-positive warnings. + +v0.7.0 2020-03-18: + - Workarounds for Wine (Windows compatibility layer for Linux). + - `MDBX_MAP_RESIZED` renamed to `MDBX_UNABLE_EXTEND_MAPSIZE`. + - Clarify API description, fix typos. + - Speedup runtime checks in debug/checked builds. + - Added checking for read/write transactions overlapping for the same thread, added `MDBX_TXN_OVERLAPPING` error and `MDBX_DBG_LEGACY_OVERLAP` option. + - Added `mdbx_key_from_jsonInteger()`, `mdbx_key_from_double()`, `mdbx_key_from_float()`, `mdbx_key_from_int64()` and `mdbx_key_from_int32()` functions. See `mdbx.h` for description. + - Fix compatibility (use zero for invalid DBI). + - Refine/clarify error messages. + - Avoids extra error messages "bad txn" from mdbx_chk when DB is corrupted. + +v0.6.0 2020-01-21: + - Fix `mdbx_load` utility for custom comparators. + - Fix checks related to `MDBX_APPEND` flag inside `mdbx_cursor_put()`. + - Refine/fix dbi_bind() internals. + - Refine/fix handling STATUS_CONFLICTING_ADDRESSES. + - Rework `MDBX_DBG_DUMP` option to avoid disk I/O performance degradation. + - Add built-in help to test tool. + - Fix `mdbx_env_set_geometry()` for large page size. + - Fix env_set_geometry() for large pagesize. + - Clarify API description & comments, fix typos. + +v0.5.0 2019-12-31: + - Fix returning MDBX_RESULT_TRUE from page_alloc(). + - Fix false-positive ASAN issue. + - Fix assertion for `MDBX_NOTLS` option. + - Rework MADV_DONTNEED threshold. + - Fix `mdbx_chk` utility for don't checking some numbers if walking of B-tree was disabled. + - Use page's mp_txnid for basic integrity checking. + - Add MDBX_FORCE_ASSERTIONS built-time option. + - Rework MDBX_DBG_DUMP to avoid performance degradation. + - Rename MDBX_NOSYNC to MDBX_SAFE_NOSYNC for clarity. + - Interpret `ERROR_ACCESS_DENIED` from `OpenProcess()` as 'process exists'. + - Avoid using FILE_FLAG_NO_BUFFERING for compatibility with small database pages. + - Added install section for CMake. + +v0.4.0 2019-12-02: + - Support for Mac OSX, FreeBSD, NetBSD, OpenBSD, DragonFly BSD, OpenSolaris, OpenIndiana (AIX and HP-UX pending). + - Use bootid for decisions of rollback. + - Counting retired pages and extended transaction info. + - Add MDBX_ACCEDE flag for database opening. + - Using OFD-locks and tracking for in-process multi-opening. + - Hot backup into pipe. + - Support for cmake & amalgamated sources. + - Fastest internal sort implementation. + - New internal dirty-list implementation with lazy sorting. + - Support for lazy-sync-to-disk with polling. + - Extended key length. + - Last update transaction number for each sub-database. + - Automatic read ahead enabling/disabling. + - More auto-compactification. + - Using -fsanitize=undefined and -Wpedantic options. + - Rework page merging. + - Nested transactions. + - API description. + - Checking for non-local filesystems to avoid DB corruption. diff --git a/libs/libmdbx/src/GNUmakefile b/libs/libmdbx/src/GNUmakefile index 6aeb1f92ee..22a1c7874d 100644 --- a/libs/libmdbx/src/GNUmakefile +++ b/libs/libmdbx/src/GNUmakefile @@ -26,7 +26,7 @@ CFLAGS ?= -O2 -g -Wall -Werror -Wextra -Wpedantic -ffunction-sections -fPIC -fv # -Wno-tautological-compare # HINT: Try append '--no-as-needed,-lrt' for ability to built with modern glibc, but then run with the old. -LIBS ?= $(shell uname | grep -qi SunOS && echo "-lkstat") $(shell uname | grep -qi -e Darwin -e OpenBSD || echo "-lrt") +LIBS ?= $(shell uname | grep -qi SunOS && echo "-lkstat") $(shell uname | grep -qi -e Darwin -e OpenBSD || echo "-lrt") $(shell uname | grep -qi Windows && echo "-lntdll") LDFLAGS ?= $(shell $(LD) --help 2>/dev/null | grep -q -- --gc-sections && echo '-Wl,--gc-sections,-z,relro,-O1')$(shell $(LD) --help 2>/dev/null | grep -q -- -dead_strip && echo '-Wl,-dead_strip') EXE_LDFLAGS ?= -pthread @@ -113,64 +113,91 @@ define uname2titer esac endef -DIST_EXTRA := LICENSE README.md CMakeLists.txt GNUmakefile Makefile VERSION config.h.in \ +DIST_EXTRA := LICENSE README.md CMakeLists.txt GNUmakefile Makefile ChangeLog.md VERSION config.h.in ntdll.def \ $(addprefix man1/, $(MANPAGES)) cmake/compiler.cmake cmake/profile.cmake cmake/utils.cmake DIST_SRC := mdbx.h mdbx.c $(addsuffix .c, $(TOOLS)) TEST_DB ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.db -TEST_LOG ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.log +TEST_LOG ?= $(shell [ -d /dev/shm ] && echo /dev/shm || echo /tmp)/mdbx-test.log.gz TEST_OSAL := $(shell $(uname2osal)) TEST_ITER := $(shell $(uname2titer)) TEST_SRC := test/osal-$(TEST_OSAL).cc $(filter-out $(wildcard test/osal-*.cc), $(wildcard test/*.cc)) TEST_INC := $(wildcard test/*.h) TEST_OBJ := $(patsubst %.cc,%.o,$(TEST_SRC)) CXX ?= g++ -CXXSTD ?= $(shell $(CXX) -std=c++27 -c test/test.cc -o /dev/null 2>/dev/null && echo -std=c++17 || echo -std=c++11) +CXXSTD ?= $(shell $(CXX) -std=c++17 -c test/test.cc -o /dev/null 2>/dev/null && echo -std=c++17 || echo -std=c++11) CXXFLAGS := $(CXXSTD) $(filter-out -std=gnu11,$(CFLAGS)) TAR ?= $(shell which gnu-tar || echo tar) +ZIP ?= $(shell which zip || echo "echo 'Please install zip'") +CLANG_FORMAT ?= $(shell (which clang-format-12 || which clang-format-11 || which clang-format-10 || which clang-format) 2>/dev/null) + +reformat: + @if [ -n "$(CLANG_FORMAT)" ]; then \ + git ls-files | grep -E '\.(c|cxx|cc|cpp|h|hxx|hpp)(\.in)?$$' | xargs -r $(CLANG_FORMAT) -i --style=file; \ + else \ + echo "clang-format version 8..12 not found for 'reformat'"; \ + fi MAN_SRCDIR := src/man1/ ALLOY_DEPS := $(wildcard src/*) -MDBX_VERSION_GIT = ${shell set -o pipefail; git describe --tags | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)|\1|p' || echo 'Please fetch tags and/or install latest git version'} -MDBX_REVISION_GIT = $(shell git rev-list --count --no-merges HEAD || echo 'Please fetch tags and/or install latest git version') +MDBX_GIT_VERSION = ${shell set -o pipefail; git describe --tags | sed -n 's|^v*\([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\)\(.*\)|\1|p' || echo 'Please fetch tags and/or install latest git version'} +MDBX_GIT_REVISION = $(shell git rev-list --count HEAD ^`git tag --sort=-version:refname | sed -n '/^\(v[0-9]\+\.[0-9]\+\.[0-9]\+\)*/p;q'`) MDBX_GIT_TIMESTAMP = $(shell git show --no-patch --format=%cI HEAD || echo 'Please install latest get version') MDBX_GIT_DESCRIBE = $(shell git describe --tags --long --dirty=-dirty || echo 'Please fetch tags and/or install latest git version') MDBX_VERSION_SUFFIX = $(shell set -o pipefail; echo -n '$(MDBX_GIT_DESCRIBE)' | tr -c -s '[a-zA-Z0-9]' _) MDBX_BUILD_SOURCERY = $(shell set -o pipefail; $(MAKE) -s src/version.c && (openssl dgst -r -sha256 src/version.c || sha256sum src/version.c || shasum -a 256 src/version.c) 2>/dev/null | cut -d ' ' -f 1 || echo 'Please install openssl or sha256sum or shasum')_$(MDBX_VERSION_SUFFIX) +MDBX_DIST_DIR = libmdbx-$(MDBX_VERSION_SUFFIX) -check: test mdbx_example dist +check: test dist -test: all mdbx_example mdbx_test +test: build-test rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ - (./mdbx_test --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \ + (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=$(TEST_ITER) --pathname=$(TEST_DB) --dont-cleanup-after basic && \ ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=12 --pathname=$(TEST_DB) --dont-cleanup-after basic) \ - | tee -a $(TEST_LOG) | tail -n 42) \ + | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy -mdbx_example: mdbx.h example/example-mdbx.c libmdbx.$(SO_SUFFIX) - $(CC) $(CFLAGS) -I. example/example-mdbx.c ./libmdbx.$(SO_SUFFIX) -o $@ - test-singleprocess: all mdbx_test rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ - (./mdbx_test --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \ + (./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --hill && \ ./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=42 --pathname=$(TEST_DB) --dont-cleanup-after --nested) \ - | tee -a $(TEST_LOG) | tail -n 42) \ + | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ && ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy test-fault: all mdbx_test - rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --inject-writefault=42 --dump-config --dont-cleanup-after basic | tee -a $(TEST_LOG) | tail -n 42) \ + rm -f $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --inject-writefault=42 --dump-config --dont-cleanup-after basic \ + | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) \ ; ./mdbx_chk -vvnw $(TEST_DB) && ([ ! -e $(TEST_DB)-copy ] || ./mdbx_chk -vvn $(TEST_DB)-copy) VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt -memcheck test-valgrind: all mdbx_test - @echo "$(MDBX_OPTIONS)" | grep -q MDBX_USE_VALGRIND || echo "WARNING: Please build libmdbx with -DMDBX_USE_VALGRIND to avoid false-positives from Valgrind !!!" >&2 - rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; \ - ($(VALGRIND) ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ +memcheck test-valgrind: + $(MAKE) clean && $(MAKE) CFLAGS_EXTRA="-Ofast -DMDBX_USE_VALGRIND" build-test && \ + rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG) && (set -o pipefail; ( \ + $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ $(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ - $(VALGRIND) ./mdbx_test --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after basic) \ - | tee -a $(TEST_LOG) | tail -n 42) \ - && $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && ./mdbx_chk -vvn $(TEST_DB)-copy + $(VALGRIND) ./mdbx_test --mode=-writemap,-mapasync,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after basic && \ + $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \ + $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB)-copy \ + ) | tee >(gzip --stdout > $(TEST_LOG)) | tail -n 42) + +gcc-analyzer: + @echo "NOTE: There a lot of false-positive warnings at 2020-05-01 by pre-release GCC-10 (20200328, Red Hat 10.0.1-0.11)" + $(MAKE) --always-make CFLAGS_EXTRA="-Og -fanalyzer -Wno-error" build-test + +test-ubsan: + $(MAKE) clean && $(MAKE) CFLAGS_EXTRA="-Ofast -fsanitize=undefined -fsanitize-undefined-trap-on-error" check + +test-asan: + $(MAKE) clean && $(MAKE) CFLAGS_EXTRA="-Os -fsanitize=address" check + +test-leak: + $(MAKE) clean && $(MAKE) CFLAGS_EXTRA="-fsanitize=leak" check + +mdbx_example: mdbx.h example/example-mdbx.c libmdbx.$(SO_SUFFIX) + $(CC) $(CFLAGS) -I. example/example-mdbx.c ./libmdbx.$(SO_SUFFIX) -o $@ + +build-test: all mdbx_example mdbx_test define test-rule $(patsubst %.cc,%.o,$(1)): $(1) $(TEST_INC) mdbx.h $(lastword $(MAKEFILE_LIST)) @@ -193,10 +220,10 @@ src/version.c: src/version.c.in $(lastword $(MAKEFILE_LIST)) $(git_DIR)/HEAD $(g -e "s|@MDBX_GIT_TREE@|$(shell git show --no-patch --format=%T HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_COMMIT@|$(shell git show --no-patch --format=%H HEAD || echo 'Please install latest get version')|" \ -e "s|@MDBX_GIT_DESCRIBE@|$(MDBX_GIT_DESCRIBE)|" \ - -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_VERSION_GIT)' | cut -d . -f 1)|" \ - -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_VERSION_GIT)' | cut -d . -f 2)|" \ - -e "s|\$${MDBX_VERSION_RELEASE}|$(shell echo '$(MDBX_VERSION_GIT)' | cut -d . -f 3)|" \ - -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_REVISION_GIT)|" \ + -e "s|\$${MDBX_VERSION_MAJOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 1)|" \ + -e "s|\$${MDBX_VERSION_MINOR}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 2)|" \ + -e "s|\$${MDBX_VERSION_RELEASE}|$(shell echo '$(MDBX_GIT_VERSION)' | cut -d . -f 3)|" \ + -e "s|\$${MDBX_VERSION_REVISION}|$(MDBX_GIT_REVISION)|" \ src/version.c.in > $@ src/config.h: src/version.c $(lastword $(MAKEFILE_LIST)) @@ -213,13 +240,18 @@ mdbx-dylib.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $( mdbx-static.o: src/config.h src/version.c src/alloy.c $(ALLOY_DEPS) $(lastword $(MAKEFILE_LIST)) $(CC) $(CFLAGS) $(MDBX_OPTIONS) '-DMDBX_CONFIG_H="config.h"' -ULIBMDBX_EXPORTS -c src/alloy.c -o $@ -.PHONY: dist +.PHONY: dist release-assets dist: libmdbx-sources-$(MDBX_VERSION_SUFFIX).tar.gz $(lastword $(MAKEFILE_LIST)) -libmdbx-sources-$(MDBX_VERSION_SUFFIX).tar.gz: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) $(addprefix dist/man1/,$(MANPAGES)) - $(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | gzip -c > $@ \ +release-assets: libmdbx-sources-$(MDBX_VERSION_SUFFIX).tar.gz libmdbx-sources-$(MDBX_VERSION_SUFFIX).zip + +libmdbx-sources-$(MDBX_VERSION_SUFFIX).tar.gz: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) + $(TAR) -c $(shell LC_ALL=C $(TAR) --help | grep -q -- '--owner' && echo '--owner=0 --group=0') -f - -C dist $(DIST_SRC) $(DIST_EXTRA) | gzip -c -9 > $@ \ && rm dist/@tmp-shared_internals.inc +libmdbx-sources-$(MDBX_VERSION_SUFFIX).zip: $(addprefix dist/, $(DIST_SRC) $(DIST_EXTRA)) + rm -rf $@ && (cd dist && $(ZIP) -9 ../$@ $(DIST_SRC) $(DIST_EXTRA)) || rm -rf $@ + dist/mdbx.h: mdbx.h src/version.c $(lastword $(MAKEFILE_LIST)) mkdir -p dist && cp $< $@ @@ -255,10 +287,13 @@ dist/$(1): $(1) mkdir -p $$(dir $$@) && sed -e '/^#> dist-cutoff-begin/,/^#< dist-cutoff-end/d' $$< > $$@ endef -$(foreach file,$(filter-out man1/% VERSION %.in,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) +$(foreach file,$(filter-out man1/% VERSION %.in ntdll.def,$(DIST_EXTRA)),$(eval $(call dist-extra-rule,$(file)))) dist/VERSION: src/version.c - mkdir -p dist/ && echo "$(MDBX_VERSION_GIT).$(MDBX_REVISION_GIT)" > $@ + mkdir -p dist/ && echo "$(MDBX_GIT_VERSION).$(MDBX_GIT_REVISION)" > $@ + +dist/ntdll.def: src/ntdll.def + mkdir -p dist/cmake/ && cp $< $@ dist/config.h.in: src/config.h.in mkdir -p dist/cmake/ && cp $< $@ diff --git a/libs/libmdbx/src/Makefile b/libs/libmdbx/src/Makefile index 0595d23524..1898655ec2 100644 --- a/libs/libmdbx/src/Makefile +++ b/libs/libmdbx/src/Makefile @@ -1,4 +1,4 @@ -all install mdbx tools strip clean test check dist test-singleprocess test-fault memcheck test-valgrind cross-gcc cross-qemu bench bench-quartet clean-bench: +all bench bench-quartet build-test check clean clean-bench cross-gcc cross-qemu dist gcc-analyzer install mdbx memcheck reformat release-assets strip test test-asan test-fault test-leak test-singleprocess test-ubsan test-valgrind tools: @CC=$(CC) \ CXX=`if test -n "$(CXX)" && which "$(CXX)" > /dev/null; then echo "$(CXX)"; elif test -n "$(CCC)" && which "$(CCC)" > /dev/null; then echo "$(CCC)"; else echo "c++"; fi` \ `which gmake || which gnumake || echo 'echo "GNU Make is required"; exit 2;'` \ diff --git a/libs/libmdbx/src/README.md b/libs/libmdbx/src/README.md index 9685648bb4..6f02ab467d 100644 --- a/libs/libmdbx/src/README.md +++ b/libs/libmdbx/src/README.md @@ -61,8 +61,10 @@ _MithrilDB_ is rightly relevant name. [![https://t.me/libmdbx](https://raw.githubusercontent.com/wiki/erthink/libmdbx/img/telegram.png)](https://t.me/libmdbx) [![Build Status](https://travis-ci.org/erthink/libmdbx.svg?branch=master)](https://travis-ci.org/erthink/libmdbx) -[![Build status](https://ci.appveyor.com/api/projects/status/ue94mlopn50dqiqg/branch/master?svg=true)](https://ci.appveyor.com/project/erthink/libmdbx/branch/master) +[![Build status](https://ci.appveyor.com/api/projects/status/ue94mlopn50dqiqg/branch/master?svg=true)](https://ci.appveyor.com/project/leo-yuriev/libmdbx/branch/master) +[![CircleCI](https://circleci.com/gh/erthink/libmdbx/tree/master.svg?style=svg)](https://circleci.com/gh/erthink/libmdbx/tree/master) [![Coverity Scan Status](https://scan.coverity.com/projects/12915/badge.svg)](https://scan.coverity.com/projects/reopen-libmdbx) +[![Build Status](https://api.cirrus-ci.com/github/erthink/libmdbx.svg)](https://cirrus-ci.com/github/erthink/libmdbx) *The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.* @@ -263,7 +265,7 @@ pre-opening is not needed. > contrary to LMDB. Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode available to match LMDB behaviour, > and for a special use-cases. -6. On **MacOS** the `fcntl(F_FULLFSYNC)` syscall is used _by +6. On **MacOS & iOS** the `fcntl(F_FULLFSYNC)` syscall is used _by default_ to synchronize data with the disk, as this is [the only way to guarantee data durability](https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html) @@ -384,8 +386,8 @@ the basic tests. ### Windows For build _libmdbx_ on Windows the _original_ CMake and [Microsoft Visual -Studio](https://en.wikipedia.org/wiki/Microsoft_Visual_Studio) are -recommended. +Studio 2019](https://en.wikipedia.org/wiki/Microsoft_Visual_Studio) are +recommended. Otherwise do not forget add `ntdll.lib` to linking. Building by MinGW, MSYS or Cygwin is potentially possible. However, these scripts are not tested and will probably require you to modify the @@ -484,7 +486,7 @@ Here showed sum of performance metrics in 3 benchmarks: ## Read Scalability Summary performance with concurrent read/search queries in 1-2-4-8 -threads on 4 CPU cores machine. +threads on machine with 4 logical CPU in HyperThreading mode (i.e. actually 2 physical CPU cores). ![Comparison #2: Read Scalability](https://raw.githubusercontent.com/wiki/erthink/libmdbx/img/perf-slide-2.png) diff --git a/libs/libmdbx/src/appveyor.yml b/libs/libmdbx/src/appveyor.yml index efdeae648d..15fd7dbb25 100644 --- a/libs/libmdbx/src/appveyor.yml +++ b/libs/libmdbx/src/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.7.0.{build} +version: 0.8.0.{build} environment: matrix: diff --git a/libs/libmdbx/src/cmake/compiler.cmake b/libs/libmdbx/src/cmake/compiler.cmake index a3bca1ac29..544a22d9fb 100644 --- a/libs/libmdbx/src/cmake/compiler.cmake +++ b/libs/libmdbx/src/cmake/compiler.cmake @@ -64,11 +64,11 @@ endmacro(check_compiler_flag) # We support building with Clang and gcc. First check # what we're using for build. -if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID STREQUAL "Clang") +if(CMAKE_C_COMPILER_LOADED AND CMAKE_C_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") set(CMAKE_COMPILER_IS_CLANG ON) set(CMAKE_COMPILER_IS_GNUCC OFF) endif() -if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXx_COMPILER_ID STREQUAL "Clang") +if(CMAKE_CXX_COMPILER_LOADED AND CMAKE_CXX_COMPILER_ID MATCHES ".*[Cc][Ll][Aa][Nn][Gg].*") set(CMAKE_COMPILER_IS_CLANG ON) set(CMAKE_COMPILER_IS_GNUCXX OFF) endif() @@ -161,40 +161,47 @@ if(NOT DEFINED IOS) endif() endif() +if(NOT DEFINED CMAKE_TARGET_BITNESS) + if (CMAKE_SIZEOF_VOID_P LESS 4) + set(CMAKE_TARGET_BITNESS 16) + elseif (CMAKE_SIZEOF_VOID_P LESS 8) + set(CMAKE_TARGET_BITNESS 32) + else() + set(CMAKE_TARGET_BITNESS 64) + endif() +endif() + if(CMAKE_COMPILER_IS_ELBRUSC OR CMAKE_SYSTEM_PROCESSOR MATCHES "e2k.*|E2K.*|elbrus.*|ELBRUS.*") set(E2K TRUE) set(CMAKE_SYSTEM_ARCH "Elbrus") -elseif((MSVC64 OR MINGW64) AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif((MSVC64 OR MINGW64) AND CMAKE_TARGET_BITNESS EQUAL 64) set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING)) set(X86_32 TRUE) set(CMAKE_SYSTEM_ARCH "x86") -elseif(CMAKE_COMPILER_IS_ELBRUSC OR CMAKE_SYSTEM_PROCESSOR MATCHES "e2k.*|E2K.*|elbrus.*|ELBRUS.*") - set(E2K TRUE) - set(CMAKE_SYSTEM_ARCH "Elbrus") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" AND CMAKE_TARGET_BITNESS EQUAL 64) set(X86_64 TRUE) set(CMAKE_SYSTEM_ARCH "x86_64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*") set(X86_32 TRUE) set(CMAKE_SYSTEM_ARCH "x86") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|ARM64.*)" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|ARM64.*)" AND CMAKE_TARGET_BITNESS EQUAL 64) set(AARCH64 TRUE) set(CMAKE_SYSTEM_ARCH "ARM64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)") set(ARM32 TRUE) set(CMAKE_SYSTEM_ARCH "ARM") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le.*" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le.*" AND CMAKE_TARGET_BITNESS EQUAL 64) set(PPC64LE TRUE) set(CMAKE_SYSTEM_ARCH "PPC64LE") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64.*" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64.*" AND CMAKE_TARGET_BITNESS EQUAL 64) set(PPC64 TRUE) set(CMAKE_SYSTEM_ARCH "PPC64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc).*") set(PPC32 TRUE) set(CMAKE_SYSTEM_ARCH "PPC") -elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips|MIPS)64.*" AND CMAKE_SIZEOF_VOID_P EQUAL 8) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips|MIPS)64.*" AND CMAKE_TARGET_BITNESS EQUAL 64) set(MIPS64 TRUE) set(CMAKE_SYSTEM_ARCH "MIPS64") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(mips|MIPS).*") @@ -213,7 +220,9 @@ else() check_compiler_flag("-Wextra" CC_HAS_WEXTRA) check_compiler_flag("-Werror" CC_HAS_WERROR) check_compiler_flag("-fexceptions" CC_HAS_FEXCEPTIONS) - check_cxx_compiler_flag("-fcxx-exceptions" CC_HAS_FCXX_EXCEPTIONS) + if(CMAKE_CXX_COMPILER_LOADED) + check_cxx_compiler_flag("-fcxx-exceptions" CC_HAS_FCXX_EXCEPTIONS) + endif() check_compiler_flag("-funwind-tables" CC_HAS_FUNWIND_TABLES) check_compiler_flag("-fno-omit-frame-pointer" CC_HAS_FNO_OMIT_FRAME_POINTER) check_compiler_flag("-fno-common" CC_HAS_FNO_COMMON) @@ -229,10 +238,17 @@ else() # Check for an omp support set(CMAKE_REQUIRED_FLAGS "-fopenmp -Werror") - check_cxx_source_compiles("int main(void) { - #pragma omp parallel - return 0; - }" HAVE_OPENMP) + if(CMAKE_CXX_COMPILER_LOADED) + check_cxx_source_compiles("int main(void) { + #pragma omp parallel + return 0; + }" HAVE_OPENMP) + else() + check_c_source_compiles("int main(void) { + #pragma omp parallel + return 0; + }" HAVE_OPENMP) + endif() set(CMAKE_REQUIRED_FLAGS "") endif() @@ -312,61 +328,81 @@ if(CMAKE_COMPILER_IS_CLANG) execute_process(COMMAND ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} -print-search-dirs OUTPUT_VARIABLE clang_search_dirs RESULT_VARIABLE clang_probe_result ERROR_QUIET) - unset(clang_bindir) - unset(clang_libdir) + unset(clang_bindirs) + unset(clang_libdirs) if(clang_probe_result EQUAL 0) - string(REGEX MATCH "^(.*programs: =)([^:]*:)*([^:]+/llvm[-.0-9]+/bin[^:]*)(:[^:]*)*(\n.+)$" clang_bindir_valid ${clang_search_dirs}) - if(clang_bindir_valid) - string(REGEX REPLACE "^(.*programs: =)([^:]*:)*([^:]+/llvm[-.0-9]+/bin[^:]*)(:[^:]*)*(\n.+)$" "\\3" clang_bindir ${clang_search_dirs}) - if(CMAKE_SYSTEM_NAME STREQUAL "WINDOWS") - set(clang_libdir ${clang_bindir}) - else() - get_filename_component(clang_libdir "${clang_bindir}/../lib" REALPATH) - endif() + string(REGEX MATCH "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) + if(regexp_valid) + string(REGEX REPLACE "(^|\n.*)(.*programs: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) + string(REPLACE ":" ";" list "${list}") + #set(clang_bindirs "") + foreach(dir IN LISTS list) + get_filename_component(dir "${dir}" REALPATH) + list(APPEND clang_bindirs "${dir}") + endforeach() + list(REMOVE_DUPLICATES clang_bindirs) + endif() + string(REGEX MATCH "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" regexp_valid ${clang_search_dirs}) + if(regexp_valid) + string(REGEX REPLACE "(^|\n.*)(.*libraries: =)([^\n]+)((\n.*)|$)" "\\3" list ${clang_search_dirs}) + string(REPLACE ":" ";" list "${list}") + #set(clang_libdirs "") + foreach(dir IN LISTS list) + get_filename_component(dir "${dir}" REALPATH) + list(APPEND clang_libdirs "${dir}") + endforeach() + list(REMOVE_DUPLICATES clang_libdirs) endif() else() - get_filename_component(clang_bindir ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) + get_filename_component(clang_bindirs ${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER} DIRECTORY) if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows") - set(clang_libdir ${clang_bindir}) + set(clang_libdirs ${clang_bindirs}) else() - get_filename_component(clang_libdir "${clang_bindir}/../lib" REALPATH) + get_filename_component(clang_libdirs "${CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER}/../lib" REALPATH) endif() endif() - if(clang_bindir AND clang_libdir) - message(STATUS "Found CLANG/LLVM directories: ${clang_bindir}, ${clang_libdir}") + if(clang_bindirs AND clang_libdirs) + message(STATUS "Found CLANG/LLVM directories: ${clang_bindirs}, ${clang_libdirs}") else() message(STATUS "Could NOT find CLANG/LLVM directories (bin and/or lib).") endif() - if(NOT CMAKE_CLANG_LD AND clang_bindir) - find_program(CMAKE_CLANG_LD NAMES lld-link ld.lld ld64.lld lld-link llvm-link link llvm-ld ld PATHS ${clang_bindir} NO_DEFAULT_PATH) + if(NOT CMAKE_CLANG_LD AND clang_bindirs) + find_program(CMAKE_CLANG_LD NAMES lld-link ld.lld "ld${CMAKE_TARGET_BITNESS}.lld" lld llvm-link llvm-ld PATHS ${clang_bindirs} NO_DEFAULT_PATH) endif() - if(NOT CMAKE_CLANG_AR AND clang_bindir) - find_program(CMAKE_CLANG_AR NAMES llvm-ar ar PATHS ${clang_bindir} NO_DEFAULT_PATH) + if(NOT CMAKE_CLANG_AR AND clang_bindirs) + find_program(CMAKE_CLANG_AR NAMES llvm-ar ar PATHS ${clang_bindirs} NO_DEFAULT_PATH) endif() - if(NOT CMAKE_CLANG_NM AND clang_bindir) - find_program(CMAKE_CLANG_NM NAMES llvm-nm nm PATHS ${clang_bindir} NO_DEFAULT_PATH) + if(NOT CMAKE_CLANG_NM AND clang_bindirs) + find_program(CMAKE_CLANG_NM NAMES llvm-nm nm PATHS ${clang_bindirs} NO_DEFAULT_PATH) endif() - if(NOT CMAKE_CLANG_RANLIB AND clang_bindir) - find_program(CMAKE_CLANG_RANLIB NAMES llvm-ranlib ranlib PATHS ${clang_bindir} NO_DEFAULT_PATH) + if(NOT CMAKE_CLANG_RANLIB AND clang_bindirs) + find_program(CMAKE_CLANG_RANLIB NAMES llvm-ranlib ranlib PATHS ${clang_bindirs} NO_DEFAULT_PATH) endif() set(clang_lto_plugin_name "LLVMgold${CMAKE_SHARED_LIBRARY_SUFFIX}") - if(NOT CMAKE_LD_GOLD AND clang_bindir) - find_program(CMAKE_LD_GOLD NAMES ld.gold PATHS) + if(NOT CMAKE_LD_GOLD AND clang_bindirs) + find_program(CMAKE_LD_GOLD NAMES ld.gold PATHS ${clang_bindirs}) endif() - if(NOT CLANG_LTO_PLUGIN AND clang_libdir) - find_file(CLANG_LTO_PLUGIN ${clang_lto_plugin_name} PATH ${clang_libdir} NO_DEFAULT_PATH) + if(NOT CLANG_LTO_PLUGIN AND clang_libdirs) + find_file(CLANG_LTO_PLUGIN ${clang_lto_plugin_name} PATHS ${clang_libdirs} NO_DEFAULT_PATH) endif() + if(CLANG_LTO_PLUGIN) message(STATUS "Found CLANG/LLVM's plugin for LTO: ${CLANG_LTO_PLUGIN}") else() message(STATUS "Could NOT find CLANG/LLVM's plugin (${clang_lto_plugin_name}) for LTO.") endif() - if(CMAKE_CLANG_LD AND CMAKE_CLANG_AR AND CMAKE_CLANG_NM AND CMAKE_CLANG_RANLIB) - message(STATUS "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}") + if(CMAKE_CLANG_LD) + message(STATUS "Found CLANG/LLVM's linker for LTO: ${CMAKE_CLANG_LD}") + else() + message(STATUS "Could NOT find CLANG/LLVM's linker (lld, llvm-ld, llvm-link) for LTO.") + endif() + + if(CMAKE_CLANG_AR AND CMAKE_CLANG_RANLIB AND CMAKE_CLANG_NM) + message(STATUS "Found CLANG/LLVM's binutils for LTO: ${CMAKE_CLANG_AR}, ${CMAKE_CLANG_RANLIB}, ${CMAKE_CLANG_NM}") else() message(STATUS "Could NOT find CLANG/LLVM's binutils (ar, ranlib, nm) for LTO.") endif() @@ -378,12 +414,19 @@ if(CMAKE_COMPILER_IS_CLANG) unset(clang_search_dirs) endif() - if((CLANG_LTO_PLUGIN AND CMAKE_LD_GOLD) OR - (CMAKE_CLANG_LD AND CMAKE_CLANG_AR AND CMAKE_CLANG_NM AND CMAKE_CLANG_RANLIB)) + if (CMAKE_CLANG_AR AND CMAKE_CLANG_NM AND CMAKE_CLANG_RANLIB + AND ((CLANG_LTO_PLUGIN AND CMAKE_LD_GOLD) + OR (CMAKE_CLANG_LD + AND NOT (CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux" + AND CMAKE_SYSTEM_NAME STREQUAL "Linux")) + OR APPLE)) set(CLANG_LTO_AVAILABLE TRUE) message(STATUS "Link-Time Optimization by CLANG/LLVM is available") elseif(CMAKE_TOOLCHAIN_FILE AND NOT CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 7.0) set(CLANG_LTO_AVAILABLE TRUE) + if(NOT CMAKE_CLANG_LD) + set(CMAKE_CLANG_LD ${CMAKE_LINKER}) + endif() if(NOT CMAKE_CLANG_AR) set(CMAKE_CLANG_AR ${CMAKE_AR}) endif() @@ -442,9 +485,6 @@ endif() macro(setup_compile_flags) # save initial C/CXX flags if(NOT INITIAL_CMAKE_FLAGS_SAVED) - if(MSVC) - string(REGEX REPLACE "^(.*)(/EHsc)( *)(.*)$" "\\1/EHs\\3\\4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - endif() set(INITIAL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) set(INITIAL_CMAKE_C_FLAGS ${CMAKE_C_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) set(INITIAL_CMAKE_EXE_LINKER_FLAGS ${CMAKE_EXE_LINKER_FLAGS} CACHE STRING "Initial CMake's flags" FORCE) @@ -468,6 +508,22 @@ macro(setup_compile_flags) if(CC_HAS_FCXX_EXCEPTIONS) add_compile_flags("CXX" "-fcxx-exceptions -frtti") endif() + if(MSVC) + # checks for /EHa or /clr options exists, + # i.e. is enabled structured async WinNT exceptions + string(REGEX MATCH "^(.* )*[-/]EHc*a( .*)*$" msvc_async_eh_enabled "${CXX_FLAGS}" "${C_FLAGS}") + string(REGEX MATCH "^(.* )*[-/]clr( .*)*$" msvc_clr_enabled "${CXX_FLAGS}" "${C_FLAGS}") + # remote any /EH? options + string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" CXX_FLAGS "${CXX_FLAGS}") + string(REGEX REPLACE "( *[-/]-*EH[csa]+ *)+" "" C_FLAGS "${C_FLAGS}") + if (msvc_clr_enabled STREQUAL "") + if(NOT msvc_async_eh_enabled STREQUAL "") + add_compile_flags("C;CXX" "/EHa") + else() + add_compile_flags("C;CXX" "/EHsc") + endif() + endif() + endif(MSVC) if(CC_HAS_WNO_ATTRIBUTES AND CMAKE_COMPILER_IS_GNU${CMAKE_PRIMARY_LANG} AND CMAKE_${CMAKE_PRIMARY_LANG}_COMPILER_VERSION VERSION_LESS 9) diff --git a/libs/libmdbx/src/cmake/utils.cmake b/libs/libmdbx/src/cmake/utils.cmake index 13add097dc..8e98267f4e 100644 --- a/libs/libmdbx/src/cmake/utils.cmake +++ b/libs/libmdbx/src/cmake/utils.cmake @@ -61,7 +61,7 @@ macro(set_source_files_compile_flags) unset(_lang) endmacro(set_source_files_compile_flags) -macro(fetch_version name version_file parent_scope) +macro(fetch_version name source_root_directory parent_scope) set(${name}_VERSION "") set(${name}_GIT_DESCRIBE "") set(${name}_GIT_TIMESTAMP "") @@ -69,11 +69,11 @@ macro(fetch_version name version_file parent_scope) set(${name}_GIT_COMMIT "") set(${name}_GIT_REVISION 0) set(${name}_GIT_VERSION "") - if(GIT AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + if(GIT AND EXISTS "${source_root_directory}/.git") execute_process(COMMAND ${GIT} describe --tags --long --dirty=-dirty OUTPUT_VARIABLE ${name}_GIT_DESCRIBE OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_DESCRIBE" STREQUAL "") message(FATAL_ERROR "Please fetch tags and/or install latest version of git ('describe --tags --long --dirty' failed)") @@ -82,13 +82,13 @@ macro(fetch_version name version_file parent_scope) execute_process(COMMAND ${GIT} show --no-patch --format=%cI HEAD OUTPUT_VARIABLE ${name}_GIT_TIMESTAMP OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_TIMESTAMP" STREQUAL "%cI") execute_process(COMMAND ${GIT} show --no-patch --format=%ci HEAD OUTPUT_VARIABLE ${name}_GIT_TIMESTAMP OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_TIMESTAMP" STREQUAL "%ci") message(FATAL_ERROR "Please install latest version of git ('show --no-patch --format=%cI HEAD' failed)") @@ -98,7 +98,7 @@ macro(fetch_version name version_file parent_scope) execute_process(COMMAND ${GIT} show --no-patch --format=%T HEAD OUTPUT_VARIABLE ${name}_GIT_TREE OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_TREE" STREQUAL "") message(FATAL_ERROR "Please install latest version of git ('show --no-patch --format=%T HEAD' failed)") @@ -107,19 +107,36 @@ macro(fetch_version name version_file parent_scope) execute_process(COMMAND ${GIT} show --no-patch --format=%H HEAD OUTPUT_VARIABLE ${name}_GIT_COMMIT OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_COMMIT" STREQUAL "") message(FATAL_ERROR "Please install latest version of git ('show --no-patch --format=%H HEAD' failed)") endif() - execute_process(COMMAND ${GIT} rev-list --count --no-merges HEAD + execute_process(COMMAND ${GIT} tag --sort=-version:refname + OUTPUT_VARIABLE tag_list + OUTPUT_STRIP_TRAILING_WHITESPACE + WORKING_DIRECTORY ${source_root_directory} + RESULT_VARIABLE rc) + if(rc) + message(FATAL_ERROR "Please install latest version of git ('tag --sort=-version:refname' failed)") + endif() + string(REGEX REPLACE "\n" ";" tag_list "${tag_list}") + set(last_release_tag "") + set(git_revlist_arg "HEAD") + foreach(tag IN LISTS tag_list) + if(NOT last_release_tag) + string(REGEX MATCH "^v[0-9]+(\.[0-9]+)+" last_release_tag "${tag}") + set(git_revlist_arg "${tag}..HEAD") + endif() + endforeach(tag) + execute_process(COMMAND ${GIT} rev-list --count "${git_revlist_arg}" OUTPUT_VARIABLE ${name}_GIT_REVISION OUTPUT_STRIP_TRAILING_WHITESPACE - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${source_root_directory} RESULT_VARIABLE rc) if(rc OR "${name}_GIT_REVISION" STREQUAL "") - message(FATAL_ERROR "Please install latest version of git ('rev-list --count --no-merges HEAD' failed)") + message(FATAL_ERROR "Please install latest version of git ('rev-list --count ${git_revlist_arg}' failed)") endif() string(REGEX MATCH "^(v)?([0-9]+)\\.([0-9]+)\\.([0-9]+)(.*)?" git_version_valid "${${name}_GIT_DESCRIBE}") @@ -136,8 +153,8 @@ macro(fetch_version name version_file parent_scope) endif() endif() - if(NOT ${name}_GIT_VERSION OR NOT ${name}_GIT_TIMESTAMP OR NOT ${name}_GIT_REVISION) - if(GIT AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git") + if(NOT ${name}_GIT_VERSION OR NOT ${name}_GIT_TIMESTAMP OR ${name}_GIT_REVISION STREQUAL "") + if(GIT AND EXISTS "${source_root_directory}/.git") message(WARNING "Unable to retrive ${name} version from git.") endif() set(${name}_GIT_VERSION "0;0;0;0") @@ -145,8 +162,9 @@ macro(fetch_version name version_file parent_scope) set(${name}_GIT_REVISION 0) # Try to get version from VERSION file + set(version_file "${source_root_directory}/VERSION") if(EXISTS "${version_file}") - file(STRINGS "${version_file}" ${name}_VERSION) + file(STRINGS "${version_file}" ${name}_VERSION LIMIT_COUNT 1 LIMIT_INPUT 42) endif() if(NOT ${name}_VERSION) @@ -168,32 +186,19 @@ macro(fetch_version name version_file parent_scope) list(GET ${name}_VERSION_LIST 2 "${name}_VERSION_RELEASE") list(GET ${name}_VERSION_LIST 3 "${name}_VERSION_REVISION") - if(parent_scope) - set(${name}_VERSION_MAJOR ${${name}_VERSION_MAJOR} PARENT_SCOPE) - set(${name}_VERSION_MINOR ${${name}_VERSION_MINOR} PARENT_SCOPE) - set(${name}_VERSION_RELEASE ${${name}_VERSION_RELEASE} PARENT_SCOPE) - set(${name}_VERSION_REVISION ${${name}_VERSION_REVISION} PARENT_SCOPE) - set(${name}_VERSION ${${name}_VERSION} PARENT_SCOPE) - - set(${name}_GIT_DESCRIBE ${${name}_GIT_DESCRIBE} PARENT_SCOPE) - set(${name}_GIT_TIMESTAMP ${${name}_GIT_TIMESTAMP} PARENT_SCOPE) - set(${name}_GIT_TREE ${${name}_GIT_TREE} PARENT_SCOPE) - set(${name}_GIT_COMMIT ${${name}_GIT_COMMIT} PARENT_SCOPE) - set(${name}_GIT_REVISION ${${name}_GIT_REVISION} PARENT_SCOPE) - set(${name}_GIT_VERSION ${${name}_GIT_VERSION} PARENT_SCOPE) - else() - set(${name}_VERSION_MAJOR ${${name}_VERSION_MAJOR}) - set(${name}_VERSION_MINOR ${${name}_VERSION_MINOR}) - set(${name}_VERSION_RELEASE ${${name}_VERSION_RELEASE}) - set(${name}_VERSION_REVISION ${${name}_VERSION_REVISION}) - set(${name}_VERSION ${${name}_VERSION}) - - set(${name}_GIT_DESCRIBE ${${name}_GIT_DESCRIBE}) - set(${name}_GIT_TIMESTAMP ${${name}_GIT_TIMESTAMP}) - set(${name}_GIT_TREE ${${name}_GIT_TREE}) - set(${name}_GIT_COMMIT ${${name}_GIT_COMMIT}) - set(${name}_GIT_REVISION ${${name}_GIT_REVISION}) - set(${name}_GIT_VERSION ${${name}_GIT_VERSION}) + if(${parent_scope}) + set(${name}_VERSION_MAJOR "${${name}_VERSION_MAJOR}" PARENT_SCOPE) + set(${name}_VERSION_MINOR "${${name}_VERSION_MINOR}" PARENT_SCOPE) + set(${name}_VERSION_RELEASE "${${name}_VERSION_RELEASE}" PARENT_SCOPE) + set(${name}_VERSION_REVISION "${${name}_VERSION_REVISION}" PARENT_SCOPE) + set(${name}_VERSION "${${name}_VERSION}" PARENT_SCOPE) + + set(${name}_GIT_DESCRIBE "${${name}_GIT_DESCRIBE}" PARENT_SCOPE) + set(${name}_GIT_TIMESTAMP "${${name}_GIT_TIMESTAMP}" PARENT_SCOPE) + set(${name}_GIT_TREE "${${name}_GIT_TREE}" PARENT_SCOPE) + set(${name}_GIT_COMMIT "${${name}_GIT_COMMIT}" PARENT_SCOPE) + set(${name}_GIT_REVISION "${${name}_GIT_REVISION}" PARENT_SCOPE) + set(${name}_GIT_VERSION "${${name}_GIT_VERSION}" PARENT_SCOPE) endif() endmacro(fetch_version) diff --git a/libs/libmdbx/src/config.h b/libs/libmdbx/src/config.h index 9d21e964d5..23fea98bd0 100644 --- a/libs/libmdbx/src/config.h +++ b/libs/libmdbx/src/config.h @@ -12,6 +12,7 @@ /* #undef MDBX_FORCE_ASSERTIONS */ /* Common */ +#define MDBX_HUGE_TRANSACTIONS 0 #define MDBX_TXN_CHECKOWNER 1 #define MDBX_TXN_CHECKPID_AUTO #ifndef MDBX_TXN_CHECKPID_AUTO @@ -27,8 +28,8 @@ #endif /* Windows */ -#define MDBX_AVOID_CRT 0 #define MDBX_CONFIG_MANUAL_TLS_CALLBACK 1 +#define MDBX_AVOID_CRT 0 /* MacOS & iOS */ #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY 0 @@ -41,12 +42,12 @@ #endif /* Build Info */ -#define MDBX_BUILD_TIMESTAMP "2020-04-25T08:31:34Z" +#define MDBX_BUILD_TIMESTAMP "2020-06-14T10:50:19Z" #define MDBX_BUILD_TARGET "x86_64-Windows" /* #undef MDBX_BUILD_TYPE */ -#define MDBX_BUILD_COMPILER "MSVC-19.25.28614.0" -#define MDBX_BUILD_FLAGS "/DWIN32 /D_WINDOWS /W3 /Gy /W4 /utf-8 /WX /GL MDBX_BUILD_SHARED_LIBRARY=0" -#define MDBX_BUILD_SOURCERY 51d296e8c57b5c01204315d0ea2160b23cf3eb40509644354cc038e3f67d61d4_v0_7_0_39_gca8fa31 +#define MDBX_BUILD_COMPILER "MSVC-19.26.28806.0" +#define MDBX_BUILD_FLAGS "/DWIN32 /D_WINDOWS /W3 /EHsc /Gy /W4 /utf-8 /WX /GL MDBX_BUILD_SHARED_LIBRARY=0" +#define MDBX_BUILD_SOURCERY d9a19ecb1e6bd3a67910c162b2e9c2dc476d665c134d041af742fa0196b19a3e_v0_8_1_1_g0c5496d /* *INDENT-ON* */ /* clang-format on */ diff --git a/libs/libmdbx/src/mdbx.h b/libs/libmdbx/src/mdbx.h index c7a6ab3f43..0c6b3de65b 100644 --- a/libs/libmdbx/src/mdbx.h +++ b/libs/libmdbx/src/mdbx.h @@ -658,9 +658,9 @@ typedef pthread_t mdbx_tid_t; /*----------------------------------------------------------------------------*/ -/* MDBX version 0.7.0, released 2020-03-18 */ +/* MDBX version 0.8.0, released 2020-06-05 */ #define MDBX_VERSION_MAJOR 0 -#define MDBX_VERSION_MINOR 7 +#define MDBX_VERSION_MINOR 8 #ifndef LIBMDBX_API #if defined(LIBMDBX_EXPORTS) @@ -924,7 +924,9 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf, * - without MDBX_NOSUBDIR = in a filesystem we have the MDBX-directory with * given pathname, within that a pair of MDBX-files with predefined names. * - * This flag affects only at environment opening and can't be changed after. */ + * This flag affects only at new environment creating by mdbx_env_open(), + * otherwise at opening an existing environment libmdbx will choice this + * automatically. */ #define MDBX_NOSUBDIR 0x4000u /* MDBX_RDONLY = read only mode. @@ -1375,6 +1377,7 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf, /**** ENVIRONMENT COPY FLAGS **************************************************/ /* Compacting: Omit free space from copy, and renumber all pages sequentially */ #define MDBX_CP_COMPACT 1u +#define MDBX_CP_FORCE_RESIZEABLE 2u /**** CURSOR OPERATIONS ******************************************************** * @@ -1385,7 +1388,8 @@ typedef enum MDBX_cursor_op { MDBX_FIRST_DUP, /* MDBX_DUPSORT-only: Position at first data item * of current key. */ MDBX_GET_BOTH, /* MDBX_DUPSORT-only: Position at key/data pair. */ - MDBX_GET_BOTH_RANGE, /* MDBX_DUPSORT-only: position at key, nearest data. */ + MDBX_GET_BOTH_RANGE, /* MDBX_DUPSORT-only: Position at given key and at first + * data greater than or equal to specified data. */ MDBX_GET_CURRENT, /* Return key/data at current cursor position */ MDBX_GET_MULTIPLE, /* MDBX_DUPFIXED-only: Return up to a page of duplicate * data items from current cursor position. @@ -2430,20 +2434,37 @@ LIBMDBX_API uint64_t mdbx_txn_id(const MDBX_txn *txn); /* Commit all the operations of a transaction into the database. * - * The transaction handle is freed. It and its cursors must not be used again - * after this call, except with mdbx_cursor_renew() and mdbx_cursor_close(). - * - * A cursor must be closed explicitly always, before or after its transaction - * ends. It can be reused with mdbx_cursor_renew() before finally closing it. + * If the current thread is not eligible to manage the transaction then + * the MDBX_THREAD_MISMATCH error will returned. Otherwise the transaction + * will be committed and its handle is freed. If the transaction cannot + * be committed, it will be aborted with the corresponding error returned. + * Thus, a result other than MDBX_THREAD_MISMATCH means that the transaction + * is terminated: + * - Resources are released; + * - Transaction handle is invalid; + * - Cursor(s) associated with transaction must not be used, except with + * mdbx_cursor_renew() and mdbx_cursor_close(). + * Such cursor(s) must be closed explicitly by mdbx_cursor_close() before + * or after transaction commit, either can be reused with mdbx_cursor_renew() + * until it will be explicitly closed by mdbx_cursor_close(). * * [in] txn A transaction handle returned by mdbx_txn_begin(). * * Returns A non-zero error value on failure and 0 on success, some * possible errors are: - * - MDBX_EINVAL = an invalid parameter was specified. - * - MDBX_ENOSPC = no more disk space. - * - MDBX_EIO = a low-level I/O error occurred while writing. - * - MDBX_ENOMEM = out of memory. */ + * - MDBX_RESULT_TRUE = transaction was aborted since it should be aborted + * due to previous errors. + * - MDBX_PANIC = a fatal error occurred earlier and the environment + * must be shut down. + * - MDBX_BAD_TXN = transaction is already fihished or never began. + * - MDBX_EBADSIGN = transaction object has invalid signature, + * e.g. transaction was already terminated + * or memory was corrupted. + * - MDBX_THREAD_MISMATCH = given transaction is not owned by current thread. + * - MDBX_EINVAL = transaction handle is NULL. + * - MDBX_ENOSPC = no more disk space. + * - MDBX_EIO = a system-level I/O error occurred while writing. + * - MDBX_ENOMEM = out of memory. */ LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn); /* Abandon all the operations of the transaction instead of saving them. @@ -2451,12 +2472,30 @@ LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn); * The transaction handle is freed. It and its cursors must not be used again * after this call, except with mdbx_cursor_renew() and mdbx_cursor_close(). * - * A cursor must be closed explicitly always, before or after its transaction - * ends. It can be reused with mdbx_cursor_renew() before finally closing it. + * If the current thread is not eligible to manage the transaction then + * the MDBX_THREAD_MISMATCH error will returned. Otherwise the transaction + * will be aborted and its handle is freed. Thus, a result other than + * MDBX_THREAD_MISMATCH means that the transaction is terminated: + * - Resources are released; + * - Transaction handle is invalid; + * - Cursor(s) associated with transaction must not be used, except with + * mdbx_cursor_renew() and mdbx_cursor_close(). + * Such cursor(s) must be closed explicitly by mdbx_cursor_close() before + * or after transaction abort, either can be reused with mdbx_cursor_renew() + * until it will be explicitly closed by mdbx_cursor_close(). * * [in] txn A transaction handle returned by mdbx_txn_begin(). * - * Returns A non-zero error value on failure and 0 on success. */ + * Returns A non-zero error value on failure and 0 on success, some + * possible errors are: + * - MDBX_PANIC = a fatal error occurred earlier and the environment + * must be shut down. + * - MDBX_BAD_TXN = transaction is already fihished or never began. + * - MDBX_EBADSIGN = transaction object has invalid signature, + * e.g. transaction was already terminated + * or memory was corrupted. + * - MDBX_THREAD_MISMATCH = given transaction is not owned by current thread. + * - MDBX_EINVAL = transaction handle is NULL. */ LIBMDBX_API int mdbx_txn_abort(MDBX_txn *txn); /* Reset a read-only transaction. @@ -2479,7 +2518,16 @@ LIBMDBX_API int mdbx_txn_abort(MDBX_txn *txn); * * [in] txn A transaction handle returned by mdbx_txn_begin(). * - * Returns A non-zero error value on failure and 0 on success. */ + * Returns A non-zero error value on failure and 0 on success, some + * possible errors are: + * - MDBX_PANIC = a fatal error occurred earlier and the environment + * must be shut down. + * - MDBX_BAD_TXN = transaction is already fihished or never began. + * - MDBX_EBADSIGN = transaction object has invalid signature, + * e.g. transaction was already terminated + * or memory was corrupted. + * - MDBX_THREAD_MISMATCH = given transaction is not owned by current thread. + * - MDBX_EINVAL = transaction handle is NULL. */ LIBMDBX_API int mdbx_txn_reset(MDBX_txn *txn); /* Renew a read-only transaction. @@ -2492,9 +2540,14 @@ LIBMDBX_API int mdbx_txn_reset(MDBX_txn *txn); * * Returns A non-zero error value on failure and 0 on success, some * possible errors are: - * - MDBX_PANIC = a fatal error occurred earlier and the environment - * must be shut down. - * - MDBX_EINVAL = an invalid parameter was specified. */ + * - MDBX_PANIC = a fatal error occurred earlier and the environment + * must be shut down. + * - MDBX_BAD_TXN = transaction is already fihished or never began. + * - MDBX_EBADSIGN = transaction object has invalid signature, + * e.g. transaction was already terminated + * or memory was corrupted. + * - MDBX_THREAD_MISMATCH = transaction is running by other thread. + * - MDBX_EINVAL = transaction handle is NULL. */ LIBMDBX_API int mdbx_txn_renew(MDBX_txn *txn); /* The fours integers markers (aka "canary") associated with the environment. @@ -2669,6 +2722,21 @@ __inline uint32_t mdbx_key_from_int32(const int32_t i32) { LIBMDBX_API int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, size_t bytes); +/* Retrieve depth (bitmask) information of nested dupsort (multi-value) B+trees + * for given database. + * + * [in] txn A transaction handle returned by mdbx_txn_begin(). + * [in] dbi A database handle returned by mdbx_dbi_open(). + * [out] mask The address of an uint32_t value where the bitmask + * will be stored. + * + * Returns A non-zero error value on failure and 0 on success, some + * possible errors are: + * - MDBX_EINVAL = an invalid parameter was specified. + * - MDBX_RESULT_TRUE = the dbi isn't a dupsort (multi-value) database. */ +LIBMDBX_API int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, + uint32_t *mask); + /* Retrieve the DB flags and status for a database handle. * * [in] txn A transaction handle returned by mdbx_txn_begin(). @@ -2910,7 +2978,8 @@ LIBMDBX_API int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * * NOTE: The data parameter is NOT ignored regardless the database does * support sorted duplicate data items or not. If the data parameter - * is non-NULL only the matching data item will be deleted. + * is non-NULL only the matching data item will be deleted. Otherwise, if data + * parameter is NULL, any/all value(s) for specified key will be deleted. * * This function will return MDBX_NOTFOUND if the specified key/data * pair is not in the database. @@ -3025,14 +3094,15 @@ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, * [in] key The key operated on. * [in,out] data The data operated on. * [in] flags Options for this operation. This parameter - * must be set to 0 or one of the values described here: + * must be set to 0 or by bitwise OR'ing together one or more of + * the values described here: * * - MDBX_CURRENT * Replace the item at the current cursor position. The key parameter * must still be provided, and must match it, otherwise the function * return MDBX_EKEYMISMATCH. * - * NOTE: MDBX unlike LMDB allows you to change the size of the data and + * NOTE: MDBX allows (unlike LMDB) you to change the size of the data and * automatically handles reordering for sorted duplicates (MDBX_DUPSORT). * * - MDBX_NODUPDATA @@ -3097,8 +3167,8 @@ LIBMDBX_API int mdbx_cursor_put(MDBX_cursor *cursor, const MDBX_val *key, * or one of the values described here. * * - MDBX_NODUPDATA - * Delete all of the data items for the current key. This flag may only - * be specified if the database was opened with MDBX_DUPSORT. + * Delete all of the data items for the current key. This flag has effect + * only for database(s) was created with MDBX_DUPSORT. * * Returns A non-zero error value on failure and 0 on success, some * possible errors are: @@ -3286,8 +3356,8 @@ LIBMDBX_API int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr); * [in] increment Value to increase the sequence, * must be 0 for read-only transactions. * - * Returns A non-zero error value on failure and 0 on success, some - * possible errors are: + * Returns A non-zero error value on failure and 0 on success, + * some possible errors are: * - MDBX_RESULT_TRUE = Increasing the sequence has resulted in an overflow * and therefore cannot be executed. */ LIBMDBX_API int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, @@ -3483,7 +3553,7 @@ MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, /* B-tree traversal function. */ LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *ctx); + void *ctx, int dont_check_keys_ordering); /**** Attribute support functions for Nexenta *********************************/ #ifdef MDBX_NEXENTA_ATTRS diff --git a/libs/libmdbx/src/src/config.h.in b/libs/libmdbx/src/src/config.h.in index 12f3dc635c..871944e115 100644 --- a/libs/libmdbx/src/src/config.h.in +++ b/libs/libmdbx/src/src/config.h.in @@ -12,6 +12,7 @@ #cmakedefine MDBX_FORCE_ASSERTIONS /* Common */ +#cmakedefine01 MDBX_HUGE_TRANSACTIONS #cmakedefine01 MDBX_TXN_CHECKOWNER #cmakedefine MDBX_TXN_CHECKPID_AUTO #ifndef MDBX_TXN_CHECKPID_AUTO diff --git a/libs/libmdbx/src/src/core.c b/libs/libmdbx/src/src/core.c index 098da86f45..21a0e46fcb 100644 --- a/libs/libmdbx/src/src/core.c +++ b/libs/libmdbx/src/src/core.c @@ -388,16 +388,15 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, unsigned flags) { if (flags & MDBX_INTEGERDUP) return 8 /* sizeof(uint64_t) */; - if (flags & - (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP)) return BRANCH_NODEMAX(pagesize) - NODESIZE; const unsigned page_ln2 = log2n(pagesize); const size_t hard = 0x7FF00000ul; const size_t hard_pages = hard >> page_ln2; - const size_t limit = (hard_pages < MDBX_DPL_TXNFULL) + const size_t limit = (hard_pages < MDBX_DPL_TXNFULL / 3) ? hard - : ((size_t)MDBX_DPL_TXNFULL << page_ln2); + : ((size_t)MDBX_DPL_TXNFULL / 3 << page_ln2); return (limit < MAX_MAPSIZE) ? limit / 2 : MAX_MAPSIZE / 2; } @@ -468,6 +467,25 @@ static __pure_function __always_inline size_t branch_size(const MDBX_env *env, return node_bytes + sizeof(indx_t); } +static __pure_function __always_inline uint16_t +flags_db2sub(uint16_t db_flags) { + uint16_t sub_flags = db_flags & MDBX_DUPFIXED; + + /* MDBX_INTEGERDUP => MDBX_INTEGERKEY */ +#define SHIFT_INTEGERDUP_TO_INTEGERKEY 2 + STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == + MDBX_INTEGERKEY); + sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY; + + /* MDBX_REVERSEDUP => MDBX_REVERSEKEY */ +#define SHIFT_REVERSEDUP_TO_REVERSEKEY 5 + STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == + MDBX_REVERSEKEY); + sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY; + + return sub_flags; +} + /*----------------------------------------------------------------------------*/ static __pure_function __always_inline size_t pgno2bytes(const MDBX_env *env, @@ -571,6 +589,8 @@ static __pure_function __always_inline MDBX_node *page_node(const MDBX_page *mp, * There are no node headers, keys are stored contiguously. */ static __pure_function __always_inline void * page_leaf2key(const MDBX_page *mp, unsigned i, size_t keysize) { + assert((mp->mp_flags & (P_BRANCH | P_LEAF | P_LEAF2 | P_OVERFLOW | P_META)) == + (P_LEAF | P_LEAF2)); assert(mp->mp_leaf2_ksize == keysize); (void)keysize; return (char *)mp + PAGEHDRSZ + (i * mp->mp_leaf2_ksize); @@ -3062,14 +3082,15 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode); static int __must_check_result mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **mp, int *lvl); static int __must_check_result mdbx_page_search_root(MDBX_cursor *mc, - MDBX_val *key, int modify); + const MDBX_val *key, + int modify); #define MDBX_PS_MODIFY 1 #define MDBX_PS_ROOTONLY 2 #define MDBX_PS_FIRST 4 #define MDBX_PS_LAST 8 -static int __must_check_result mdbx_page_search(MDBX_cursor *mc, MDBX_val *key, - int flags); +static int __must_check_result mdbx_page_search(MDBX_cursor *mc, + const MDBX_val *key, int flags); static int __must_check_result mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result mdbx_page_flush(MDBX_txn *txn, @@ -3088,7 +3109,8 @@ static int __must_check_result mdbx_sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending); static int mdbx_env_close0(MDBX_env *env); -static MDBX_node *mdbx_node_search(MDBX_cursor *mc, MDBX_val *key, int *exactp); +static MDBX_node *mdbx_node_search(MDBX_cursor *mc, const MDBX_val *key, + int *exactp); static int __must_check_result mdbx_node_add_branch(MDBX_cursor *mc, unsigned indx, @@ -3124,10 +3146,11 @@ mdbx_audit(MDBX_txn *txn) { return mdbx_audit_ex(txn, 0, (txn->mt_flags & MDBX_RDONLY) != 0); } -static int __must_check_result mdbx_page_check(MDBX_env *env, +static int __must_check_result mdbx_page_check(MDBX_cursor *const mc, const MDBX_page *const mp, - bool maybe_unfinished); -static int __must_check_result mdbx_cursor_check(MDBX_cursor *mc, bool pending); + unsigned options); +static int __must_check_result mdbx_cursor_check(MDBX_cursor *mc, + unsigned options); static int __must_check_result mdbx_cursor_del0(MDBX_cursor *mc); static int __must_check_result mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, @@ -3160,9 +3183,15 @@ static void mdbx_cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result mdbx_drop0(MDBX_cursor *mc, int subs); static int __must_check_result mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi); +static int __must_check_result mdbx_setup_dbx(MDBX_dbx *const dbx, + const MDBX_db *const db, + const unsigned pagesize); static MDBX_cmp_func mdbx_cmp_memn, mdbx_cmp_memnr, mdbx_cmp_int_align4, - mdbx_cmp_int_align2, mdbx_cmp_int_unaligned; + mdbx_cmp_int_align2, mdbx_cmp_int_unaligned, mdbx_cmp_lenfast; + +static MDBX_cmp_func *mdbx_default_keycmp(unsigned flags); +static MDBX_cmp_func *mdbx_default_datacmp(unsigned flags); static const char *__mdbx_strerr(int errnum) { /* Table of descriptions for MDBX errors */ @@ -3185,7 +3214,7 @@ static const char *__mdbx_strerr(int errnum) { "MDBX_PAGE_FULL: Internal error - Page has no more space", "MDBX_UNABLE_EXTEND_MAPSIZE: Database engine was unable to extend" " mapping, e.g. since address space is unavailable or busy," - " or Operation system not supported such operations" + " or Operation system not supported such operations", "MDBX_INCOMPATIBLE: Environment or database is not compatible" " with the requested operation or the specified flags", "MDBX_BAD_RSLOT: Invalid reuse of reader locktable slot," @@ -3972,11 +4001,12 @@ static int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) { return MDBX_SUCCESS; } + mp->mp_pgno = pgno; mp->mp_flags = P_LOOSE | P_DIRTY; mp->mp_next = txn->tw.loose_pages; txn->tw.loose_pages = mp; txn->tw.loose_count++; - if (unlikely(txn->mt_next_pgno == mp->mp_pgno + 1)) + if (unlikely(txn->mt_next_pgno == pgno + 1)) mdbx_refund(txn); return MDBX_SUCCESS; @@ -4034,9 +4064,19 @@ static int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) { static __must_check_result __always_inline int mdbx_retire_pgno(MDBX_cursor *mc, const pgno_t pgno) { MDBX_page *mp; - int rc = mdbx_page_get(mc, pgno, &mp, NULL); - if (likely(rc == MDBX_SUCCESS)) - rc = mdbx_page_retire(mc, mp); + int rc; + if (mdbx_audit_enabled()) { + const unsigned save_flags = mc->mc_flags; + mc->mc_flags |= C_RETIRING; + rc = mdbx_page_get(mc, pgno, &mp, NULL); + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_page_retire(mc, mp); + mc->mc_flags = (mc->mc_flags & ~C_RETIRING) | (save_flags & C_RETIRING); + } else { + rc = mdbx_page_get(mc, pgno, &mp, NULL); + if (likely(rc == MDBX_SUCCESS)) + rc = mdbx_page_retire(mc, mp); + } return rc; } @@ -4088,7 +4128,7 @@ mark_done: if (all) { /* Mark dirty root pages */ for (i = 0; i < txn->mt_numdbs; i++) { - if (txn->mt_dbflags[i] & DB_DIRTY) { + if (txn->mt_dbstate[i] & DB_DIRTY) { pgno_t pgno = txn->mt_dbs[i].md_root; if (pgno == P_INVALID) continue; @@ -4161,8 +4201,17 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key, if (txn->tw.dirtyroom > i) return MDBX_SUCCESS; + /* Less aggressive spill - we originally spilled the entire dirty list, + * with a few exceptions for cursor pages and DB root pages. But this + * turns out to be a lot of wasted effort because in a large txn many + * of those pages will need to be used again. So now we spill only 1/8th + * of the dirty pages. Testing revealed this to be a good tradeoff, + * better than 1/2, 1/4, or 1/10. */ + if (need < MDBX_DPL_TXNFULL / 8) + need = MDBX_DPL_TXNFULL / 8; + if (!txn->tw.spill_pages) { - txn->tw.spill_pages = mdbx_pnl_alloc(MDBX_DPL_TXNFULL / 8); + txn->tw.spill_pages = mdbx_pnl_alloc(need); if (unlikely(!txn->tw.spill_pages)) return MDBX_ENOMEM; } else { @@ -4181,15 +4230,6 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - /* Less aggressive spill - we originally spilled the entire dirty list, - * with a few exceptions for cursor pages and DB root pages. But this - * turns out to be a lot of wasted effort because in a large txn many - * of those pages will need to be used again. So now we spill only 1/8th - * of the dirty pages. Testing revealed this to be a good tradeoff, - * better than 1/2, 1/4, or 1/10. */ - if (need < MDBX_DPL_TXNFULL / 8) - need = MDBX_DPL_TXNFULL / 8; - /* Save the page IDs of all the pages we're flushing */ /* flush from the tail forward, this saves a lot of shifting later on. */ for (i = dl->length; i && need; i--) { @@ -5035,7 +5075,7 @@ skip_cache: goto fail; } last = unaligned_peek_u64(4, key.iov_base); - if (unlikely(last < 1 || last >= SAFE64_INVALID_THRESHOLD)) { + if (unlikely(last < MIN_TXNID || last > MAX_TXNID)) { rc = MDBX_CORRUPTED; goto fail; } @@ -5133,7 +5173,7 @@ skip_cache: } /* Don't try to coalesce too much. */ - if (unlikely(re_len > MDBX_DPL_TXNFULL / 4)) + if (unlikely(re_len > MDBX_DPL_TXNFULL / 42)) break; if (re_len /* current size */ >= env->me_maxgc_ov1page || (re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >= @@ -5671,7 +5711,7 @@ static int mdbx_cursor_shadow(MDBX_txn *src, MDBX_txn *dst) { * user may not use mc until dst ends. But we need a valid * txn pointer here for cursor fixups to keep working. */ mc->mc_txn = dst; - mc->mc_dbflag = &dst->mt_dbflags[i]; + mc->mc_dbstate = &dst->mt_dbstate[i]; if ((mx = mc->mc_xcursor) != NULL) { *(MDBX_xcursor *)(bk + 1) = *mx; mx->mx_cursor.mc_txn = dst; @@ -5710,7 +5750,7 @@ static void mdbx_cursors_eot(MDBX_txn *txn, unsigned merge) { mc->mc_backup = bk->mc_backup; mc->mc_txn = bk->mc_txn; mc->mc_db = bk->mc_db; - mc->mc_dbflag = bk->mc_dbflag; + mc->mc_dbstate = bk->mc_dbstate; if ((mx = mc->mc_xcursor) != NULL) mx->mx_cursor.mc_txn = bk->mc_txn; } else { @@ -5752,7 +5792,7 @@ static pgno_t mdbx_find_largest_this(MDBX_env *env, pgno_t largest) { goto retry; if (largest < snap_pages && lck->mti_oldest_reader <= /* ignore pending updates */ snap_txnid && - snap_txnid < SAFE64_INVALID_THRESHOLD) + snap_txnid <= MAX_TXNID) largest = snap_pages; } } @@ -5959,8 +5999,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { } } - if (unlikely(txn->mt_txnid == 0 || - txn->mt_txnid >= SAFE64_INVALID_THRESHOLD)) { + if (unlikely(txn->mt_txnid < MIN_TXNID || txn->mt_txnid > MAX_TXNID)) { mdbx_error("%s", "environment corrupted by died writer, must shutdown!"); rc = MDBX_CORRUPTED; goto bailout; @@ -5982,7 +6021,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { if (lck->mti_readers[i].mr_pid == env->me_pid && unlikely(lck->mti_readers[i].mr_tid == tid)) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (txnid >= MIN_TXNID && txnid < SAFE64_INVALID_THRESHOLD) + if (txnid >= MIN_TXNID && txnid <= MAX_TXNID) return MDBX_TXN_OVERLAPPING; } } @@ -6010,7 +6049,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { txn->mt_canary = meta->mm_canary; const txnid_t snap = mdbx_meta_txnid_stable(env, meta); txn->mt_txnid = safe64_txnid_next(snap); - if (unlikely(txn->mt_txnid >= SAFE64_INVALID_THRESHOLD)) { + if (unlikely(txn->mt_txnid > MAX_TXNID)) { mdbx_debug("%s", "txnid overflow!"); rc = MDBX_TXN_FULL; goto bailout; @@ -6043,11 +6082,11 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { for (unsigned i = CORE_DBS; i < txn->mt_numdbs; i++) { unsigned x = env->me_dbflags[i]; txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; - txn->mt_dbflags[i] = + txn->mt_dbstate[i] = (x & MDBX_VALID) ? DB_VALID | DB_USRVALID | DB_STALE : 0; } - txn->mt_dbflags[MAIN_DBI] = DB_VALID | DB_USRVALID; - txn->mt_dbflags[FREE_DBI] = DB_VALID; + txn->mt_dbstate[MAIN_DBI] = DB_VALID | DB_USRVALID; + txn->mt_dbstate[FREE_DBI] = DB_VALID; if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { mdbx_warning("%s", "environment had fatal error, must shutdown!"); @@ -6138,8 +6177,6 @@ static __always_inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) { } int mdbx_txn_renew(MDBX_txn *txn) { - int rc; - if (unlikely(!txn)) return MDBX_EINVAL; @@ -6149,8 +6186,12 @@ int mdbx_txn_renew(MDBX_txn *txn) { if (unlikely((txn->mt_flags & MDBX_RDONLY) == 0)) return MDBX_EINVAL; - if (unlikely(txn->mt_owner != 0)) - return MDBX_THREAD_MISMATCH; + int rc; + if (unlikely(txn->mt_owner != 0 || !(txn->mt_flags & MDBX_TXN_FINISHED))) { + rc = mdbx_txn_reset(txn); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } rc = mdbx_txn_renew0(txn, MDBX_RDONLY); if (rc == MDBX_SUCCESS) { @@ -6238,7 +6279,7 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags, memset(txn, 0, tsize); txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); - txn->mt_dbflags = (uint8_t *)txn + size - env->me_maxdbs; + txn->mt_dbstate = (uint8_t *)txn + size - env->me_maxdbs; txn->mt_flags = flags; txn->mt_env = env; @@ -6286,9 +6327,9 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags, txn->mt_numdbs = parent->mt_numdbs; txn->mt_owner = parent->mt_owner; memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); - /* Copy parent's mt_dbflags, but clear DB_NEW */ + /* Copy parent's mt_dbstate, but clear DB_NEW */ for (unsigned i = 0; i < txn->mt_numdbs; i++) - txn->mt_dbflags[i] = parent->mt_dbflags[i] & ~(DB_FRESH | DB_CREAT); + txn->mt_dbstate[i] = parent->mt_dbstate[i] & ~(DB_FRESH | DB_CREAT); mdbx_tassert(parent, parent->mt_parent || parent->tw.dirtyroom + parent->tw.dirtylist->length == @@ -6462,7 +6503,7 @@ static void mdbx_dbis_update(MDBX_txn *txn, int keep) { if (n) { bool locked = false; MDBX_env *env = txn->mt_env; - uint8_t *tdbflags = txn->mt_dbflags; + uint8_t *tdbflags = txn->mt_dbstate; for (unsigned i = n; --i >= CORE_DBS;) { if (likely((tdbflags[i] & DB_CREAT) == 0)) @@ -6714,16 +6755,16 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored, mdbx_tassert(txn, rc == MDBX_NOTFOUND); for (MDBX_dbi i = FREE_DBI; i < txn->mt_numdbs; i++) - txn->mt_dbflags[i] &= ~DB_AUDITED; + txn->mt_dbstate[i] &= ~DB_AUDITED; pgno_t count = 0; for (MDBX_dbi i = FREE_DBI; i <= MAIN_DBI; i++) { - if (!(txn->mt_dbflags[i] & DB_VALID)) + if (!(txn->mt_dbstate[i] & DB_VALID)) continue; rc = mdbx_cursor_init(&cx.outer, txn, i); if (unlikely(rc != MDBX_SUCCESS)) return rc; - txn->mt_dbflags[i] |= DB_AUDITED; + txn->mt_dbstate[i] |= DB_AUDITED; if (txn->mt_dbs[i].md_root == P_INVALID) continue; count += txn->mt_dbs[i].md_branch_pages + txn->mt_dbs[i].md_leaf_pages + @@ -6743,14 +6784,14 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored, memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); if ((txn->mt_flags & MDBX_RDONLY) == 0) { for (MDBX_dbi k = txn->mt_numdbs; --k > MAIN_DBI;) { - if ((txn->mt_dbflags[k] & DB_VALID) && + if ((txn->mt_dbstate[k] & DB_VALID) && /* txn->mt_dbxs[k].md_name.iov_len > 0 && */ node_ks(node) == txn->mt_dbxs[k].md_name.iov_len && memcmp(node_key(node), txn->mt_dbxs[k].md_name.iov_base, node_ks(node)) == 0) { - txn->mt_dbflags[k] |= DB_AUDITED; - if (txn->mt_dbflags[k] & DB_DIRTY) { - mdbx_tassert(txn, (txn->mt_dbflags[k] & DB_STALE) == 0); + txn->mt_dbstate[k] |= DB_AUDITED; + if (txn->mt_dbstate[k] & DB_DIRTY) { + mdbx_tassert(txn, (txn->mt_dbstate[k] & DB_STALE) == 0); db = txn->mt_dbs + k; } break; @@ -6767,9 +6808,9 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored, } for (MDBX_dbi i = FREE_DBI; i < txn->mt_numdbs; i++) { - if ((txn->mt_dbflags[i] & (DB_VALID | DB_AUDITED | DB_STALE)) != DB_VALID) + if ((txn->mt_dbstate[i] & (DB_VALID | DB_AUDITED | DB_STALE)) != DB_VALID) continue; - if (F_ISSET(txn->mt_dbflags[i], DB_DIRTY | DB_CREAT)) { + if (F_ISSET(txn->mt_dbstate[i], DB_DIRTY | DB_CREAT)) { count += txn->mt_dbs[i].md_branch_pages + txn->mt_dbs[i].md_leaf_pages + txn->mt_dbs[i].md_overflow_pages; } else { @@ -6778,7 +6819,7 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored, txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, (int)txn->mt_dbxs[i].md_name.iov_len, (const char *)txn->mt_dbxs[i].md_name.iov_base, - txn->mt_dbflags[i]); + txn->mt_dbstate[i]); } } @@ -6866,29 +6907,32 @@ static int mdbx_update_gc(MDBX_txn *txn) { mdbx_trace("\n>>> @%" PRIaTXN, txn->mt_txnid); unsigned retired_stored = 0, loop = 0; - MDBX_cursor mc; - int rc = mdbx_cursor_init(&mc, txn, FREE_DBI); + MDBX_cursor_couple couple; + int rc = mdbx_cursor_init(&couple.outer, txn, FREE_DBI); if (unlikely(rc != MDBX_SUCCESS)) goto bailout_notracking; - mc.mc_flags |= C_RECLAIMING; - mc.mc_next = txn->mt_cursors[FREE_DBI]; - txn->mt_cursors[FREE_DBI] = &mc; + couple.outer.mc_flags |= C_RECLAIMING; + couple.outer.mc_next = txn->mt_cursors[FREE_DBI]; + txn->mt_cursors[FREE_DBI] = &couple.outer; retry: + ++loop; +retry_noaccount: mdbx_trace("%s", " >> restart"); mdbx_tassert( txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno)); mdbx_tassert(txn, mdbx_dirtylist_check(txn)); mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == MDBX_DPL_TXNFULL); - if (unlikely(/* paranoia */ ++loop > 42)) { + if (unlikely(/* paranoia */ loop > ((MDBX_DEBUG > 0) ? 9 : 99))) { mdbx_error("too more loops %u, bailout", loop); rc = MDBX_PROBLEM; goto bailout; } - rc = mdbx_prep_backlog(txn, &mc, MDBX_PNL_SIZEOF(txn->tw.retired_pages)); + rc = mdbx_prep_backlog(txn, &couple.outer, + MDBX_PNL_SIZEOF(txn->tw.retired_pages)); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -6915,18 +6959,18 @@ retry: cleaned_gc_slot > 0 && cleaned_gc_id < *env->me_oldest); key.iov_base = &cleaned_gc_id; key.iov_len = sizeof(cleaned_gc_id); - rc = mdbx_cursor_get(&mc, &key, NULL, MDBX_SET); + rc = mdbx_cursor_get(&couple.outer, &key, NULL, MDBX_SET); if (rc == MDBX_NOTFOUND) continue; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - rc = mdbx_prep_backlog(txn, &mc, 0); + rc = mdbx_prep_backlog(txn, &couple.outer, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; mdbx_tassert(txn, cleaned_gc_id < *env->me_oldest); mdbx_trace("%s.cleanup-reclaimed-id [%u]%" PRIaTXN, dbg_prefix_mode, cleaned_gc_slot, cleaned_gc_id); - rc = mdbx_cursor_del(&mc, 0); + rc = mdbx_cursor_del(&couple.outer, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } while (cleaned_gc_slot < MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)); @@ -6938,7 +6982,7 @@ retry: while (cleaned_gc_id <= txn->tw.last_reclaimed) { gc_rid = cleaned_gc_id; settled = 0; - rc = mdbx_cursor_first(&mc, &key, NULL); + rc = mdbx_cursor_first(&couple.outer, &key, NULL); if (unlikely(rc != MDBX_SUCCESS)) { if (rc == MDBX_NOTFOUND) break; @@ -6949,15 +6993,14 @@ retry: goto bailout; } cleaned_gc_id = unaligned_peek_u64(4, key.iov_base); - if (unlikely(cleaned_gc_id < 1 || - cleaned_gc_id >= SAFE64_INVALID_THRESHOLD)) { + if (unlikely(cleaned_gc_id < MIN_TXNID || cleaned_gc_id > MAX_TXNID)) { rc = MDBX_CORRUPTED; goto bailout; } if (cleaned_gc_id > txn->tw.last_reclaimed) break; if (cleaned_gc_id < txn->tw.last_reclaimed) { - rc = mdbx_prep_backlog(txn, &mc, 0); + rc = mdbx_prep_backlog(txn, &couple.outer, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -6965,7 +7008,7 @@ retry: mdbx_tassert(txn, cleaned_gc_id < *env->me_oldest); mdbx_trace("%s.cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode, cleaned_gc_id); - rc = mdbx_cursor_del(&mc, 0); + rc = mdbx_cursor_del(&couple.outer, 0); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -7064,9 +7107,10 @@ retry: if (retired_stored < MDBX_PNL_SIZE(txn->tw.retired_pages)) { if (unlikely(!retired_stored)) { /* Make sure last page of GC is touched and on retired-list */ - mc.mc_flags &= ~C_RECLAIMING; - rc = mdbx_page_search(&mc, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY); - mc.mc_flags |= C_RECLAIMING; + couple.outer.mc_flags &= ~C_RECLAIMING; + rc = mdbx_page_search(&couple.outer, NULL, + MDBX_PS_LAST | MDBX_PS_MODIFY); + couple.outer.mc_flags |= C_RECLAIMING; if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND) goto bailout; } @@ -7075,8 +7119,8 @@ retry: key.iov_base = &txn->mt_txnid; do { data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages); - mdbx_prep_backlog(txn, &mc, data.iov_len); - rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE); + mdbx_prep_backlog(txn, &couple.outer, data.iov_len); + rc = mdbx_cursor_put(&couple.outer, &key, &data, MDBX_RESERVE); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; /* Retry if tw.retired_pages[] grew during the Put() */ @@ -7102,7 +7146,9 @@ retry: if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) { mdbx_trace("%s.reclaimed-list changed %u -> %u, retry", dbg_prefix_mode, amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)); - goto retry /* rare case, but avoids GC fragmentation and one loop. */; + goto retry_noaccount /* rare case, but avoids GC fragmentation and one + cycle. */ + ; } continue; } @@ -7146,10 +7192,10 @@ retry: env->me_maxgc_ov1page) { /* LY: need just a txn-id for save page list. */ - mc.mc_flags &= ~C_RECLAIMING; + couple.outer.mc_flags &= ~C_RECLAIMING; bool need_cleanup = false; do { - rc = mdbx_page_alloc(&mc, 0, NULL, MDBX_ALLOC_GC); + rc = mdbx_page_alloc(&couple.outer, 0, NULL, MDBX_ALLOC_GC); if (likely(rc == MDBX_SUCCESS)) { mdbx_trace("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode, MDBX_PNL_LAST(txn->tw.lifo_reclaimed)); @@ -7161,7 +7207,7 @@ retry: left > ((unsigned)MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) - reused_gc_slot) * env->me_maxgc_ov1page); - mc.mc_flags |= C_RECLAIMING; + couple.outer.mc_flags |= C_RECLAIMING; if (likely(rc == MDBX_SUCCESS)) { mdbx_trace("%s: got enough from GC.", dbg_prefix_mode); @@ -7198,7 +7244,7 @@ retry: break; } - mdbx_tassert(txn, gc_rid > 1 && gc_rid < SAFE64_INVALID_THRESHOLD); + mdbx_tassert(txn, gc_rid >= MIN_TXNID && gc_rid <= MAX_TXNID); rc = mdbx_txl_append(&txn->tw.lifo_reclaimed, --gc_rid); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -7234,14 +7280,14 @@ retry: mdbx_tassert(txn, txn->tw.lifo_reclaimed == NULL); if (unlikely(gc_rid == 0)) { gc_rid = mdbx_find_oldest(txn) - 1; - rc = mdbx_cursor_get(&mc, &key, NULL, MDBX_FIRST); + rc = mdbx_cursor_get(&couple.outer, &key, NULL, MDBX_FIRST); if (rc == MDBX_SUCCESS) { if (unlikely(key.iov_len != sizeof(txnid_t))) { rc = MDBX_CORRUPTED; goto bailout; } txnid_t gc_first = unaligned_peek_u64(4, key.iov_base); - if (unlikely(gc_first < 1 || gc_first >= SAFE64_INVALID_THRESHOLD)) { + if (unlikely(gc_first < MIN_TXNID || gc_first > MAX_TXNID)) { rc = MDBX_CORRUPTED; goto bailout; } @@ -7330,8 +7376,9 @@ retry: data.iov_len = (chunk + 1) * sizeof(pgno_t); mdbx_trace("%s.reserve: %u [%u...%u] @%" PRIaTXN, dbg_prefix_mode, chunk, settled + 1, settled + chunk + 1, reservation_gc_id); - mdbx_prep_backlog(txn, &mc, data.iov_len); - rc = mdbx_cursor_put(&mc, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); + mdbx_prep_backlog(txn, &couple.outer, data.iov_len); + rc = mdbx_cursor_put(&couple.outer, &key, &data, + MDBX_RESERVE | MDBX_NOOVERWRITE); mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno)); if (unlikely(rc != MDBX_SUCCESS)) @@ -7346,7 +7393,7 @@ retry: unlikely(amount < MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) { mdbx_notice("** restart: reclaimed-list growth %u -> %u", amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)); - goto retry; + goto retry_noaccount; } continue; @@ -7376,7 +7423,7 @@ retry: unsigned left = amount; if (txn->tw.lifo_reclaimed == nullptr) { mdbx_tassert(txn, lifo == 0); - rc = mdbx_cursor_first(&mc, &key, &data); + rc = mdbx_cursor_first(&couple.outer, &key, &data); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } else { @@ -7412,7 +7459,7 @@ retry: dbg_prefix_mode, fill_gc_id, filled_gc_slot); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); - rc = mdbx_cursor_get(&mc, &key, &data, MDBX_SET_KEY); + rc = mdbx_cursor_get(&couple.outer, &key, &data, MDBX_SET_KEY); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } @@ -7425,7 +7472,7 @@ retry: key.iov_len = sizeof(fill_gc_id); mdbx_tassert(txn, data.iov_len >= sizeof(pgno_t) * 2); - mc.mc_flags |= C_GCFREEZE; + couple.outer.mc_flags |= C_GCFREEZE; unsigned chunk = (unsigned)(data.iov_len / sizeof(pgno_t)) - 1; if (unlikely(chunk > left)) { mdbx_trace("%s: chunk %u > left %u, @%" PRIaTXN, dbg_prefix_mode, chunk, @@ -7434,20 +7481,21 @@ retry: chunk - left > env->me_maxgc_ov1page) { data.iov_len = (left + 1) * sizeof(pgno_t); if (loop < 7) - mc.mc_flags &= ~C_GCFREEZE; + couple.outer.mc_flags &= ~C_GCFREEZE; } chunk = left; } - rc = mdbx_cursor_put(&mc, &key, &data, MDBX_CURRENT | MDBX_RESERVE); - mc.mc_flags &= ~C_GCFREEZE; + rc = mdbx_cursor_put(&couple.outer, &key, &data, + MDBX_CURRENT | MDBX_RESERVE); + couple.outer.mc_flags &= ~C_GCFREEZE; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; clean_reserved_gc_pnl(env, data); if (unlikely(txn->tw.loose_count || amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) { - mdbx_notice("** restart: reclaimed-list changed (%u -> %u, %u)", amount, - MDBX_PNL_SIZE(txn->tw.reclaimed_pglist), + mdbx_notice("** restart: reclaimed-list growth (%u -> %u, loose +%u)", + amount, MDBX_PNL_SIZE(txn->tw.reclaimed_pglist), txn->tw.loose_count); goto retry; } @@ -7482,7 +7530,7 @@ retry: if (txn->tw.lifo_reclaimed == nullptr) { mdbx_tassert(txn, lifo == 0); - rc = mdbx_cursor_next(&mc, &key, &data, MDBX_NEXT); + rc = mdbx_cursor_next(&couple.outer, &key, &data, MDBX_NEXT); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } else { @@ -7507,7 +7555,7 @@ retry: cleaned_gc_slot == MDBX_PNL_SIZE(txn->tw.lifo_reclaimed)); bailout: - txn->mt_cursors[FREE_DBI] = mc.mc_next; + txn->mt_cursors[FREE_DBI] = couple.outer.mc_next; bailout_notracking: MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) = 0; @@ -7519,17 +7567,25 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov, unsigned iov_items, size_t iov_off, size_t iov_bytes) { MDBX_env *const env = txn->mt_env; - int rc = mdbx_pwritev(env->me_lazy_fd, iov, iov_items, iov_off, iov_bytes); - if (unlikely(rc != MDBX_SUCCESS)) { - mdbx_error("Write error: %s", mdbx_strerror(rc)); - txn->mt_flags |= MDBX_TXN_ERROR; - } - - for (unsigned i = 0; i < iov_items; i++) - mdbx_dpage_free(env, (MDBX_page *)iov[i].iov_base, - bytes2pgno(env, iov[i].iov_len)); + mdbx_assert(env, iov_items > 0); + if (likely(iov_items == 1)) { + mdbx_assert(env, iov->iov_len == iov_bytes); + int rc = mdbx_pwrite(env->me_lazy_fd, iov->iov_base, iov_bytes, iov_off); + mdbx_dpage_free(env, (MDBX_page *)iov->iov_base, + bytes2pgno(env, iov_bytes)); + return rc; + } else { + int rc = mdbx_pwritev(env->me_lazy_fd, iov, iov_items, iov_off, iov_bytes); + if (unlikely(rc != MDBX_SUCCESS)) { + mdbx_error("Write error: %s", mdbx_strerror(rc)); + txn->mt_flags |= MDBX_TXN_ERROR; + } - return rc; + for (unsigned i = 0; i < iov_items; i++) + mdbx_dpage_free(env, (MDBX_page *)iov[i].iov_base, + bytes2pgno(env, iov[i].iov_len)); + return rc; + } } /* Flush (some) dirty pages to the map, after clearing their dirty flag. @@ -7645,34 +7701,41 @@ static __cold bool mdbx_txn_import_dbi(MDBX_txn *txn, MDBX_dbi dbi) { mdbx_compiler_barrier(); for (unsigned i = CORE_DBS; i < snap_numdbs; ++i) { if (i >= txn->mt_numdbs) - txn->mt_dbflags[i] = 0; - if (!(txn->mt_dbflags[i] & DB_USRVALID) && + txn->mt_dbstate[i] = 0; + if (!(txn->mt_dbstate[i] & DB_USRVALID) && (env->me_dbflags[i] & MDBX_VALID)) { txn->mt_dbs[i].md_flags = env->me_dbflags[i] & PERSISTENT_FLAGS; - txn->mt_dbflags[i] = DB_VALID | DB_USRVALID | DB_STALE; + txn->mt_dbstate[i] = DB_VALID | DB_USRVALID | DB_STALE; mdbx_tassert(txn, txn->mt_dbxs[i].md_cmp != NULL); } } txn->mt_numdbs = snap_numdbs; mdbx_ensure(env, mdbx_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - return txn->mt_dbflags[dbi] & DB_USRVALID; + return txn->mt_dbstate[dbi] & DB_USRVALID; } /* Check txn and dbi arguments to a function */ static __always_inline bool mdbx_txn_dbi_exists(MDBX_txn *txn, MDBX_dbi dbi, unsigned validity) { - if (likely(dbi < txn->mt_numdbs && (txn->mt_dbflags[dbi] & validity))) + if (likely(dbi < txn->mt_numdbs && (txn->mt_dbstate[dbi] & validity))) return true; return mdbx_txn_import_dbi(txn, dbi); } int mdbx_txn_commit(MDBX_txn *txn) { - int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD); + STATIC_ASSERT(MDBX_TXN_FINISHED == + MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR); + int rc = check_txn(txn, MDBX_TXN_FINISHED); if (unlikely(rc != MDBX_SUCCESS)) return rc; + if (unlikely(txn->mt_flags & MDBX_TXN_ERROR)) { + rc = MDBX_RESULT_TRUE; + goto fail; + } + MDBX_env *env = txn->mt_env; #if MDBX_TXN_CHECKPID if (unlikely(env->me_pid != mdbx_getpid())) { @@ -7736,12 +7799,12 @@ int mdbx_txn_commit(MDBX_txn *txn) { /* Update parent's DB table. */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); parent->mt_numdbs = txn->mt_numdbs; - parent->mt_dbflags[FREE_DBI] = txn->mt_dbflags[FREE_DBI]; - parent->mt_dbflags[MAIN_DBI] = txn->mt_dbflags[MAIN_DBI]; + parent->mt_dbstate[FREE_DBI] = txn->mt_dbstate[FREE_DBI]; + parent->mt_dbstate[MAIN_DBI] = txn->mt_dbstate[MAIN_DBI]; for (unsigned i = CORE_DBS; i < txn->mt_numdbs; i++) { /* preserve parent's DB_NEW status */ - parent->mt_dbflags[i] = - txn->mt_dbflags[i] | (parent->mt_dbflags[i] & (DB_CREAT | DB_FRESH)); + parent->mt_dbstate[i] = + txn->mt_dbstate[i] | (parent->mt_dbstate[i] & (DB_CREAT | DB_FRESH)); } /* Remove refunded pages from parent's dirty & spill lists */ @@ -7958,7 +8021,7 @@ int mdbx_txn_commit(MDBX_txn *txn) { if (txn->tw.dirtylist->length == 0 && (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { for (int i = txn->mt_numdbs; --i >= 0;) - mdbx_tassert(txn, (txn->mt_dbflags[i] & DB_DIRTY) == 0); + mdbx_tassert(txn, (txn->mt_dbstate[i] & DB_DIRTY) == 0); rc = MDBX_SUCCESS; goto done; } @@ -7970,15 +8033,15 @@ int mdbx_txn_commit(MDBX_txn *txn) { /* Update DB root pointers */ if (txn->mt_numdbs > CORE_DBS) { - MDBX_cursor mc; + MDBX_cursor_couple couple; MDBX_val data; data.iov_len = sizeof(MDBX_db); - rc = mdbx_cursor_init(&mc, txn, MAIN_DBI); + rc = mdbx_cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) goto fail; for (MDBX_dbi i = CORE_DBS; i < txn->mt_numdbs; i++) { - if (txn->mt_dbflags[i] & DB_DIRTY) { + if (txn->mt_dbstate[i] & DB_DIRTY) { if (unlikely(TXN_DBI_CHANGED(txn, i))) { rc = MDBX_BAD_DBI; goto fail; @@ -7986,8 +8049,9 @@ int mdbx_txn_commit(MDBX_txn *txn) { MDBX_db *db = &txn->mt_dbs[i]; db->md_mod_txnid = txn->mt_txnid; data.iov_base = db; - WITH_CURSOR_TRACKING(mc, - rc = mdbx_cursor_put(&mc, &txn->mt_dbxs[i].md_name, + WITH_CURSOR_TRACKING(couple.outer, + rc = mdbx_cursor_put(&couple.outer, + &txn->mt_dbxs[i].md_name, &data, F_SUBDATA)); if (unlikely(rc != MDBX_SUCCESS)) goto fail; @@ -8837,7 +8901,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, } /* get untouched params from DB */ - if (pagesize < 0) + if (pagesize <= 0 || pagesize >= INT_MAX) pagesize = env->me_psize; if (size_lower < 0) size_lower = pgno2bytes(env, head->mm_geo.lower); @@ -8867,19 +8931,34 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, if (unlikely(inside_txn)) return MDBX_PANIC; - if (pagesize < 0) { - pagesize = env->me_os_psize; - if ((uintptr_t)pagesize > MAX_PAGESIZE) - pagesize = MAX_PAGESIZE; - mdbx_assert(env, (uintptr_t)pagesize >= MIN_PAGESIZE); + /* is requested some auto-value for pagesize ? */ + if (pagesize >= INT_MAX /* maximal */) + pagesize = MAX_PAGESIZE; + else if (pagesize <= 0) { + if (pagesize < 0 /* default */) { + pagesize = env->me_os_psize; + if ((uintptr_t)pagesize > MAX_PAGESIZE) + pagesize = MAX_PAGESIZE; + mdbx_assert(env, (uintptr_t)pagesize >= MIN_PAGESIZE); + } else if (pagesize == 0 /* minimal */) + pagesize = MIN_PAGESIZE; + + /* choose pagesize */ + intptr_t max_size = (size_now > size_lower) ? size_now : size_lower; + max_size = (size_upper > max_size) ? size_upper : max_size; + if (max_size < 0 /* default */) + max_size = DEFAULT_MAPSIZE; + else if (max_size == 0 /* minimal */) + max_size = MIN_MAPSIZE; + else if (max_size >= (intptr_t)MAX_MAPSIZE /* maximal */) + max_size = MAX_MAPSIZE; + + while (max_size > pagesize * (int64_t)MAX_PAGENO && + pagesize < MAX_PAGESIZE) + pagesize <<= 1; } } - if (pagesize == 0) - pagesize = MIN_PAGESIZE; - else if (pagesize == INTPTR_MAX) - pagesize = MAX_PAGESIZE; - if (pagesize < (intptr_t)MIN_PAGESIZE || pagesize > (intptr_t)MAX_PAGESIZE || !is_powerof2(pagesize)) { rc = MDBX_EINVAL; @@ -8891,6 +8970,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, if (MIN_MAPSIZE / pagesize < MIN_PAGENO) size_lower = MIN_PAGENO * pagesize; } + if (size_lower == INTPTR_MAX) + size_lower = MAX_MAPSIZE; if (size_now <= 0) { size_now = DEFAULT_MAPSIZE; @@ -8899,6 +8980,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, if (size_upper >= size_lower && size_now > size_upper) size_now = size_upper; } + if (size_now == INTPTR_MAX) + size_now = MAX_MAPSIZE; if (size_upper <= 0) { if ((size_t)size_now >= MAX_MAPSIZE / 2) @@ -8932,8 +9015,8 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, goto bailout; } - const size_t unit = - (env->me_os_psize > (size_t)pagesize) ? env->me_os_psize : pagesize; + const size_t unit = (env->me_os_psize > (size_t)pagesize) ? env->me_os_psize + : (size_t)pagesize; size_lower = ceil_powerof2(size_lower, unit); size_upper = ceil_powerof2(size_upper, unit); size_now = ceil_powerof2(size_now, unit); @@ -9100,6 +9183,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now, env->me_txn->mt_geo = new_geo; env->me_txn->mt_flags |= MDBX_TXN_DIRTY; } else { + meta.mm_geo = new_geo; mdbx_meta_set_txnid( env, &meta, safe64_txnid_next(mdbx_meta_txnid_stable(env, head))); rc = mdbx_sync_locked(env, env->me_flags, &meta); @@ -9227,8 +9311,9 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { meta.mm_geo.upper * pagesize, meta.mm_geo.grow * pagesize, meta.mm_geo.shrink * pagesize, meta.mm_psize); if (unlikely(err != MDBX_SUCCESS)) { - mdbx_error("%s", "could not use present dbsize-params from db"); - return MDBX_INCOMPATIBLE; + mdbx_error("%s: err %d", "could not apply preconfigured db-geometry", + err); + return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; } } else if (env->me_dbgeo.now) { /* silently growth to last used page */ @@ -9259,8 +9344,9 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { env->me_dbgeo.upper, env->me_dbgeo.grow, env->me_dbgeo.shrink, meta.mm_psize); if (unlikely(err != MDBX_SUCCESS)) { - mdbx_error("%s", "could not apply preconfigured dbsize-params to db"); - return MDBX_INCOMPATIBLE; + mdbx_error("%s: err %d", "could not apply preconfigured db-geometry", + err); + return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err; } /* update meta fields */ @@ -9726,6 +9812,16 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname, mdbx_jitter4testing(false); lck->mti_magic_and_version = MDBX_LOCK_MAGIC; lck->mti_os_and_format = MDBX_LOCK_FORMAT; + err = mdbx_msync(&env->me_lck_mmap, 0, (size_t)size, false); + if (unlikely(err != MDBX_SUCCESS)) { + mdbx_error("initial-%s for lck-file failed", "msync"); + goto bailout; + } + err = mdbx_filesync(env->me_lck_mmap.fd, MDBX_SYNC_SIZE); + if (unlikely(err != MDBX_SUCCESS)) { + mdbx_error("initial-%s for lck-file failed", "fsync"); + goto bailout; + } } else { if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { mdbx_error("%s", "lock region has invalid magic/version"); @@ -9896,6 +9992,19 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { #error "Persistent DB flags & env flags overlap, but both go in mm_flags" #endif +/* Merge flags and avoid false MDBX_UTTERLY_NOSYNC */ +static uint32_t merge_flags(const uint32_t a, const uint32_t b) { + uint32_t r = a | b; + if (F_ISSET(r, MDBX_UTTERLY_NOSYNC) && !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(b, MDBX_UTTERLY_NOSYNC)) + r -= (r & MDBX_WRITEMAP) ? MDBX_UTTERLY_NOSYNC ^ MDBX_MAPASYNC + : MDBX_UTTERLY_NOSYNC ^ MDBX_SAFE_NOSYNC; + assert(!(F_ISSET(r, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(a, MDBX_UTTERLY_NOSYNC) && + !F_ISSET(b, MDBX_UTTERLY_NOSYNC))); + return r; +} + int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, mode_t mode) { if (unlikely(!env || !pathname)) @@ -9911,6 +10020,71 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, (env->me_flags & MDBX_ENV_ACTIVE) != 0) return MDBX_EPERM; +#if defined(_WIN32) || defined(_WIN64) + const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX); + if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX) + return ERROR_INVALID_NAME; + wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t)); + if (wlen != mbstowcs(pathnameW, pathname, wlen + 1)) + return ERROR_INVALID_NAME; +#endif /* Windows */ + + /* pickup previously mdbx_env_set_flags(), + * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ + flags = merge_flags(flags, env->me_flags); + +#if defined(_WIN32) || defined(_WIN64) + const DWORD dwAttrib = GetFileAttributesW(pathnameW); + if (dwAttrib == INVALID_FILE_ATTRIBUTES) { + int rc = GetLastError(); + if (rc != MDBX_ENOFILE) + return rc; + if (mode == 0 || (flags & MDBX_RDONLY) != 0) + /* can't open existing */ + return rc; + + /* auto-create directory if requested */ + if ((flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathnameW, nullptr)) { + rc = GetLastError(); + if (rc != ERROR_ALREADY_EXISTS) + return rc; + } + } else { + /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ + flags |= MDBX_NOSUBDIR; + if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) + flags -= MDBX_NOSUBDIR; + } +#else + struct stat st; + if (stat(pathname, &st)) { + int rc = errno; + if (rc != MDBX_ENOFILE) + return rc; + if (mode == 0 || (flags & MDBX_RDONLY) != 0) + /* can't open existing */ + return rc; + + /* auto-create directory if requested */ + const mode_t dir_mode = + (/* inherit read/write permissions for group and others */ mode & + (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | + /* always add read/write/search for owner */ S_IRWXU | + ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | + ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); + if ((flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { + rc = errno; + if (rc != EEXIST) + return rc; + } + } else { + /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ + flags |= MDBX_NOSUBDIR; + if (S_ISDIR(st.st_mode)) + flags -= MDBX_NOSUBDIR; + } +#endif + size_t len_full, len = strlen(pathname); if (flags & MDBX_NOSUBDIR) { len_full = len + sizeof(MDBX_LOCK_SUFFIX) + len + 1; @@ -9933,7 +10107,6 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, } int rc = MDBX_SUCCESS; - flags |= env->me_flags; if (flags & MDBX_RDONLY) { /* LY: silently ignore irrelevant flags when * we're only getting read access */ @@ -9976,36 +10149,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, } env->me_dbxs[FREE_DBI].md_cmp = mdbx_cmp_int_align4; /* aligned MDBX_INTEGERKEY */ - - if ((flags & (MDBX_RDONLY | MDBX_NOSUBDIR)) == 0 && mode != 0) { -#if defined(_WIN32) || defined(_WIN64) - const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX); - if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX) - return ERROR_INVALID_NAME; - wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t)); - if (wlen != mbstowcs(pathnameW, pathname, wlen + 1)) { - rc = ERROR_INVALID_NAME; - goto bailout; - } - if (!CreateDirectoryW(pathnameW, nullptr)) { - rc = GetLastError(); - if (rc != ERROR_ALREADY_EXISTS) - goto bailout; - } -#else - const mode_t dir_mode = - (/* inherit read/write permissions for group and others */ mode & - (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) | - /* always add read/write/search for owner */ S_IRWXU | - ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | - ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); - if (mkdir(pathname, dir_mode)) { - rc = errno; - if (rc != EEXIST) - goto bailout; - } -#endif - } + env->me_dbxs[FREE_DBI].md_dcmp = mdbx_cmp_lenfast; rc = mdbx_openfile(F_ISSET(flags, MDBX_RDONLY) ? MDBX_OPEN_DXB_READ : MDBX_OPEN_DXB_LAZY, @@ -10031,7 +10175,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, #if !(defined(_WIN32) || defined(_WIN64)) if (mode == 0) { - struct stat st; + /* pickup mode for lck-file */ if (fstat(env->me_lazy_fd, &st)) { rc = errno; goto bailout; @@ -10039,7 +10183,6 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, mode = st.st_mode; } #endif /* !Windows */ - const int lck_rc = mdbx_setup_lck(env, lck_pathname, mode); if (MDBX_IS_ERROR(lck_rc)) { rc = lck_rc; @@ -10123,7 +10266,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags, txn->mt_dbs = (MDBX_db *)((char *)txn + tsize); txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs); txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs); - txn->mt_dbflags = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs); + txn->mt_dbstate = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDBX_TXN_FINISHED; @@ -10176,7 +10319,6 @@ static int __cold mdbx_env_close0(MDBX_env *env) { return MDBX_SUCCESS; } - env->me_signature = 0; env->me_flags &= ~MDBX_ENV_ACTIVE; env->me_oldest = nullptr; env->me_sync_timestamp = nullptr; @@ -10289,6 +10431,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) { #endif } + env->me_signature = 0; rc = mdbx_env_close0(env) ? MDBX_PANIC : rc; mdbx_ensure(env, mdbx_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); #if defined(_WIN32) || defined(_WIN64) @@ -10400,13 +10543,19 @@ static int __hot mdbx_cmp_memnr(const MDBX_val *a, const MDBX_val *b) { return CMP2INT(a->iov_len, b->iov_len); } +/* Fast non-lexically comparator */ +static int __hot mdbx_cmp_lenfast(const MDBX_val *a, const MDBX_val *b) { + int diff = CMP2INT(a->iov_len, b->iov_len); + return likely(diff) ? diff : memcmp(a->iov_base, b->iov_base, a->iov_len); +} + /* Search for key within a page, using binary search. * Returns the smallest entry larger or equal to the key. * If exactp is non-null, stores whether the found entry was an exact match * in *exactp (1 or 0). * Updates the cursor index with the index of the found entry. * If no entry larger or equal to the key is found, returns NULL. */ -static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, MDBX_val *key, +static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, const MDBX_val *key, int *exactp) { MDBX_page *mp = mc->mc_pg[mc->mc_top]; const int nkeys = page_numkeys(mp); @@ -10614,7 +10763,7 @@ dirty: } if (mdbx_audit_enabled()) { - int err = mdbx_page_check(env, p, true); + int err = mdbx_page_check(mc, p, C_UPDATING); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -10630,7 +10779,7 @@ corrupted: /* Finish mdbx_page_search() / mdbx_page_search_lowest(). * The cursor is at the root page, set up the rest of it. */ -__hot static int mdbx_page_search_root(MDBX_cursor *mc, MDBX_val *key, +__hot static int mdbx_page_search_root(MDBX_cursor *mc, const MDBX_val *key, int flags) { MDBX_page *mp = mc->mc_pg[mc->mc_top]; int rc; @@ -10709,25 +10858,54 @@ __hot static int mdbx_page_search_root(MDBX_cursor *mc, MDBX_val *key, return MDBX_SUCCESS; } +static int mdbx_setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, + const unsigned pagesize) { + if (unlikely(!dbx->md_cmp)) { + dbx->md_cmp = mdbx_default_keycmp(db->md_flags); + dbx->md_dcmp = mdbx_default_datacmp(db->md_flags); + } + + dbx->md_klen_min = + (db->md_flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; + dbx->md_klen_max = mdbx_limits_keysize_max(pagesize, db->md_flags); + assert(dbx->md_klen_max != (unsigned)-1); + + dbx->md_vlen_min = (db->md_flags & MDBX_INTEGERDUP) + ? 4 /* sizeof(uint32_t) */ + : ((db->md_flags & MDBX_DUPFIXED) ? 1 : 0); + dbx->md_vlen_max = mdbx_limits_valsize_max(pagesize, db->md_flags); + assert(dbx->md_vlen_max != (unsigned)-1); + + if ((db->md_flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->md_xsize) { + if (unlikely(db->md_xsize < dbx->md_vlen_min || + db->md_xsize > dbx->md_vlen_max)) + return MDBX_CORRUPTED; + dbx->md_vlen_min = dbx->md_vlen_max = db->md_xsize; + } + return MDBX_SUCCESS; +} + static int mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi) { - MDBX_cursor mc; + MDBX_cursor_couple couple; if (unlikely(TXN_DBI_CHANGED(txn, dbi))) return MDBX_BAD_DBI; - int rc = mdbx_cursor_init(&mc, txn, MAIN_DBI); + int rc = mdbx_cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - rc = mdbx_page_search(&mc, &txn->mt_dbxs[dbi].md_name, 0); + + MDBX_dbx *const dbx = &txn->mt_dbxs[dbi]; + rc = mdbx_page_search(&couple.outer, &dbx->md_name, 0); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc; MDBX_val data; int exact = 0; - MDBX_node *node = mdbx_node_search(&mc, &txn->mt_dbxs[dbi].md_name, &exact); + MDBX_node *node = mdbx_node_search(&couple.outer, &dbx->md_name, &exact); if (unlikely(!exact)) return MDBX_BAD_DBI; if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) return MDBX_INCOMPATIBLE; /* not a named DB */ - rc = mdbx_node_read(&mc, node, &data); + rc = mdbx_node_read(&couple.outer, node, &data); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -10737,11 +10915,16 @@ static int mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi) { uint16_t md_flags = UNALIGNED_PEEK_16(data.iov_base, MDBX_db, md_flags); /* The txn may not know this DBI, or another process may * have dropped and recreated the DB with other flags. */ - if (unlikely((txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS) != md_flags)) + MDBX_db *const db = &txn->mt_dbs[dbi]; + if (unlikely((db->md_flags & PERSISTENT_FLAGS) != md_flags)) return MDBX_INCOMPATIBLE; - memcpy(&txn->mt_dbs[dbi], data.iov_base, sizeof(MDBX_db)); - txn->mt_dbflags[dbi] &= ~DB_STALE; + memcpy(db, data.iov_base, sizeof(MDBX_db)); + rc = mdbx_setup_dbx(dbx, db, txn->mt_env->me_psize); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + txn->mt_dbstate[dbi] &= ~DB_STALE; return MDBX_SUCCESS; } @@ -10779,7 +10962,8 @@ __hot static int mdbx_page_search_lowest(MDBX_cursor *mc) { * lookups. * * Returns 0 on success, non-zero on failure. */ -__hot static int mdbx_page_search(MDBX_cursor *mc, MDBX_val *key, int flags) { +__hot static int mdbx_page_search(MDBX_cursor *mc, const MDBX_val *key, + int flags) { int rc; pgno_t root; @@ -10791,7 +10975,7 @@ __hot static int mdbx_page_search(MDBX_cursor *mc, MDBX_val *key, int flags) { } /* Make sure we're using an up-to-date root */ - if (unlikely(*mc->mc_dbflag & DB_STALE)) { + if (unlikely(*mc->mc_dbstate & DB_STALE)) { rc = mdbx_fetch_sdb(mc->mc_txn, mc->mc_dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -11215,13 +11399,34 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, exactp = exactp ? exactp : &stub_exactp; - if ((mc->mc_db->md_flags & MDBX_INTEGERKEY) && - unlikely(key->iov_len != sizeof(uint32_t) && - key->iov_len != sizeof(uint64_t))) { - mdbx_cassert(mc, !"key-size is invalid for MDBX_INTEGERKEY"); + if (unlikely(key->iov_len < mc->mc_dbx->md_klen_min || + key->iov_len > mc->mc_dbx->md_klen_max)) { + mdbx_cassert(mc, !"Invalid key-size"); return MDBX_BAD_VALSIZE; } + MDBX_val aligned_key = *key; + uint64_t aligned_keybytes; + if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { + switch (aligned_key.iov_len) { + default: + mdbx_cassert(mc, !"key-size is invalid for MDBX_INTEGERKEY"); + return MDBX_BAD_VALSIZE; + case 4: + if (unlikely(3 & (uintptr_t)aligned_key.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = + memcpy(&aligned_keybytes, aligned_key.iov_base, 4); + break; + case 8: + if (unlikely(7 & (uintptr_t)aligned_key.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = + memcpy(&aligned_keybytes, aligned_key.iov_base, 8); + break; + } + } + if (mc->mc_xcursor) mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); @@ -11242,7 +11447,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, node = page_node(mp, 0); get_key(node, &nodekey); } - rc = mc->mc_dbx->md_cmp(key, &nodekey); + rc = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); if (unlikely(rc == 0)) { /* Probably happens rarely, but first node on the page * was the one we wanted. */ @@ -11260,7 +11465,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, node = page_node(mp, nkeys - 1); get_key(node, &nodekey); } - rc = mc->mc_dbx->md_cmp(key, &nodekey); + rc = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); if (rc == 0) { /* last node was the one we wanted */ mdbx_cassert(mc, nkeys >= 1 && nkeys <= UINT16_MAX + 1); @@ -11278,7 +11483,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, node = page_node(mp, mc->mc_ki[mc->mc_top]); get_key(node, &nodekey); } - rc = mc->mc_dbx->md_cmp(key, &nodekey); + rc = mc->mc_dbx->md_cmp(&aligned_key, &nodekey); if (rc == 0) { /* current node was the one we wanted */ *exactp = 1; @@ -11315,7 +11520,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mc->mc_pg[0] = 0; } - rc = mdbx_page_search(mc, key, 0); + rc = mdbx_page_search(mc, &aligned_key, 0); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -11323,7 +11528,7 @@ static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, mdbx_cassert(mc, IS_LEAF(mp)); set2: - node = mdbx_node_search(mc, key, exactp); + node = mdbx_node_search(mc, &aligned_key, exactp); if (exactp != &stub_exactp && !*exactp) { /* MDBX_SET specified and not an exact match. */ return MDBX_NOTFOUND; @@ -11362,25 +11567,43 @@ set1: if (op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_SET_RANGE) { rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); } else { - int ex2, *ex2p; - if (op == MDBX_GET_BOTH) { - ex2p = &ex2; - ex2 = 0; - } else { - ex2p = NULL; - } + int ex2 = 0, *ex2p = (op == MDBX_GET_BOTH) ? &ex2 : NULL; rc = mdbx_cursor_set(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_SET_RANGE, ex2p); if (unlikely(rc != MDBX_SUCCESS)) return rc; } } else if (op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE) { + if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || + data->iov_len > mc->mc_dbx->md_vlen_max)) { + mdbx_cassert(mc, !"Invalid data-size"); + return MDBX_BAD_VALSIZE; + } + MDBX_val aligned_data = *data; + uint64_t aligned_databytes; + if (mc->mc_db->md_flags & MDBX_INTEGERDUP) { + switch (aligned_data.iov_len) { + default: + mdbx_cassert(mc, !"data-size is invalid for MDBX_INTEGERDUP"); + return MDBX_BAD_VALSIZE; + case 4: + if (unlikely(3 & (uintptr_t)aligned_data.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = + memcpy(&aligned_databytes, aligned_data.iov_base, 4); + break; + case 8: + if (unlikely(7 & (uintptr_t)aligned_data.iov_base)) + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = + memcpy(&aligned_databytes, aligned_data.iov_base, 8); + break; + } + } MDBX_val olddata; if (unlikely((rc = mdbx_node_read(mc, node, &olddata)) != MDBX_SUCCESS)) return rc; - if (unlikely(mc->mc_dbx->md_dcmp == NULL)) - return MDBX_EINVAL; - rc = mc->mc_dbx->md_dcmp(data, &olddata); + rc = mc->mc_dbx->md_dcmp(&aligned_data, &olddata); if (rc) { if (op != MDBX_GET_BOTH_RANGE || rc > 0) return MDBX_NOTFOUND; @@ -11668,7 +11891,7 @@ static int mdbx_cursor_touch(MDBX_cursor *mc) { int rc = MDBX_SUCCESS; if (mc->mc_dbi >= CORE_DBS && - (*mc->mc_dbflag & (DB_DIRTY | DB_DUPDATA)) == 0) { + (*mc->mc_dbstate & (DB_DIRTY | DB_DUPDATA)) == 0) { mdbx_cassert(mc, (mc->mc_flags & C_RECLAIMING) == 0); /* Touch DB record of named DB */ MDBX_cursor_couple cx; @@ -11680,7 +11903,7 @@ static int mdbx_cursor_touch(MDBX_cursor *mc) { rc = mdbx_page_search(&cx.outer, &mc->mc_dbx->md_name, MDBX_PS_MODIFY); if (unlikely(rc)) return rc; - *mc->mc_dbflag |= DB_DIRTY; + *mc->mc_dbstate |= DB_DIRTY; } mc->mc_top = 0; if (mc->mc_snum) { @@ -11727,7 +11950,8 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } if (flags & MDBX_RESERVE) { - if (unlikely(mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_REVERSEDUP))) + if (unlikely(mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_INTEGERDUP | MDBX_DUPFIXED))) return MDBX_INCOMPATIBLE; data->iov_base = nullptr; } @@ -11738,37 +11962,64 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, if (unlikely(mc->mc_txn->mt_flags & (MDBX_RDONLY | MDBX_TXN_BLOCKED))) return (mc->mc_txn->mt_flags & MDBX_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN; - if ((mc->mc_flags & C_SUB) == 0) { - if (unlikely(key->iov_len > (size_t)((mc->mc_db->md_flags & MDBX_DUPSORT) - ? env->me_maxkey_ds - : env->me_maxkey_nd) || - data->iov_len > ((mc->mc_db->md_flags & MDBX_DUPSORT) - ? env->me_maxval_ds - : env->me_maxval_nd))) { + uint64_t aligned_keybytes, aligned_databytes; + MDBX_val aligned_key, aligned_data; + if (likely((mc->mc_flags & C_SUB) == 0)) { + if (unlikely(key->iov_len < mc->mc_dbx->md_klen_min || + key->iov_len > mc->mc_dbx->md_klen_max)) { + mdbx_cassert(mc, !"Invalid key-size"); + return MDBX_BAD_VALSIZE; + } + if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || + data->iov_len > mc->mc_dbx->md_vlen_max)) { + mdbx_cassert(mc, !"Invalid data-size"); return MDBX_BAD_VALSIZE; } - if ((mc->mc_db->md_flags & MDBX_INTEGERKEY)) { - if (unlikely(key->iov_len != sizeof(uint32_t) && - key->iov_len != sizeof(uint64_t))) { + if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { + switch (key->iov_len) { + default: mdbx_cassert(mc, !"key-size is invalid for MDBX_INTEGERKEY"); return MDBX_BAD_VALSIZE; - } - if (unlikely(3 & (uintptr_t)key->iov_base)) { - mdbx_cassert(mc, !"key-alignment is invalid for MDBX_INTEGERKEY"); - return MDBX_BAD_VALSIZE; + case 4: + if (unlikely(3 & (uintptr_t)key->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = + memcpy(&aligned_keybytes, key->iov_base, aligned_key.iov_len = 4); + key = &aligned_key; + } + break; + case 8: + if (unlikely(7 & (uintptr_t)key->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_key.iov_base = + memcpy(&aligned_keybytes, key->iov_base, aligned_key.iov_len = 8); + key = &aligned_key; + } + break; } } - - if ((mc->mc_db->md_flags & MDBX_INTEGERDUP)) { - if (unlikely(data->iov_len != sizeof(uint32_t) && - data->iov_len != sizeof(uint64_t))) { - mdbx_cassert(mc, !"data-size is invalid for MDBX_INTEGERDUP"); - return MDBX_BAD_VALSIZE; - } - if (unlikely(3 & (uintptr_t)data->iov_base)) { - mdbx_cassert(mc, !"data-alignment is invalid for MDBX_INTEGERDUP"); + if (mc->mc_db->md_flags & MDBX_INTEGERDUP) { + switch (data->iov_len) { + default: + mdbx_cassert(mc, !"data-size is invalid for MDBX_INTEGERKEY"); return MDBX_BAD_VALSIZE; + case 4: + if (unlikely(3 & (uintptr_t)data->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = memcpy(&aligned_databytes, data->iov_base, + aligned_data.iov_len = 4); + data = &aligned_data; + } + break; + case 8: + if (unlikely(7 & (uintptr_t)data->iov_base)) { + /* copy instead of return error to avoid break compatibility */ + aligned_data.iov_base = memcpy(&aligned_databytes, data->iov_base, + aligned_data.iov_len = 8); + data = &aligned_data; + } + break; } } } @@ -11827,12 +12078,10 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { int exact = 0; - MDBX_val d2; if ((flags & MDBX_APPEND) != 0 && mc->mc_db->md_entries > 0) { - MDBX_val k2; - rc = mdbx_cursor_last(mc, &k2, &d2); + rc = mdbx_cursor_last(mc, &dkey, &olddata); if (rc == 0) { - rc = mc->mc_dbx->md_cmp(key, &k2); + rc = mc->mc_dbx->md_cmp(key, &dkey); if (rc > 0) { rc = MDBX_NOTFOUND; mc->mc_ki[mc->mc_top]++; @@ -11842,15 +12091,26 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } } } else { - rc = mdbx_cursor_set(mc, (MDBX_val *)key, &d2, MDBX_SET, &exact); + rc = mdbx_cursor_set(mc, (MDBX_val *)key, &olddata, MDBX_SET, &exact); } if ((flags & MDBX_NOOVERWRITE) && (rc == MDBX_SUCCESS || rc == MDBX_EKEYMISMATCH)) { mdbx_debug("duplicate key [%s]", DKEY(key)); - *data = d2; + *data = olddata; return MDBX_KEYEXIST; } - if (rc && unlikely(rc != MDBX_NOTFOUND)) + if (likely(rc == MDBX_SUCCESS)) { + if (exact) { + if (mc->mc_flags & C_SUB) { + mdbx_assert(env, data->iov_len == 0); + return (flags & MDBX_NODUPDATA) ? MDBX_KEYEXIST : MDBX_SUCCESS; + } + if (!(flags & MDBX_RESERVE) && + unlikely(mc->mc_dbx->md_dcmp(data, &olddata) == 0)) + return ((flags & MDBX_NODUPDATA) && mc->mc_xcursor) ? MDBX_KEYEXIST + : MDBX_SUCCESS; + } + } else if (unlikely(rc != MDBX_NOTFOUND)) return rc; } @@ -11868,7 +12128,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, return rc2; } - if (rc == MDBX_NO_ROOT) { + if (unlikely(rc == MDBX_NO_ROOT)) { MDBX_page *np; /* new database, write a root leaf page */ mdbx_debug("%s", "allocating new root leaf page"); @@ -11880,7 +12140,22 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, return rc2; mc->mc_db->md_root = np->mp_pgno; mc->mc_db->md_depth++; - *mc->mc_dbflag |= DB_DIRTY; + if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { + assert(key->iov_len >= mc->mc_dbx->md_klen_min && + key->iov_len <= mc->mc_dbx->md_klen_max); + mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = key->iov_len; + } + if (mc->mc_db->md_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) { + assert(data->iov_len >= mc->mc_dbx->md_vlen_min && + data->iov_len <= mc->mc_dbx->md_vlen_max); + mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = data->iov_len; + assert(mc->mc_xcursor != NULL); + mc->mc_db->md_xsize = mc->mc_xcursor->mx_db.md_xsize = + (unsigned)data->iov_len; + mc->mc_xcursor->mx_dbx.md_klen_min = mc->mc_xcursor->mx_dbx.md_klen_max = + data->iov_len; + } + *mc->mc_dbstate |= DB_DIRTY; if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED) np->mp_flags |= P_LEAF2; mc->mc_flags |= C_INITIALIZED; @@ -11904,7 +12179,8 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, /* See note inside leaf_size() */ env->me_branch_nodemax) { /* Too big for a node, insert in sub-DB. Set up an empty * "old sub-page" for prep_subDB to expand to a full page. */ - fp->mp_leaf2_ksize = (uint16_t)data->iov_len /* used if MDBX_DUPFIXED */; + fp->mp_leaf2_ksize = + (mc->mc_db->md_flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; fp->mp_lower = fp->mp_upper = 0; olddata.iov_len = PAGEHDRSZ; goto prep_subDB; @@ -11940,7 +12216,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } if (mdbx_audit_enabled()) { - int err = mdbx_cursor_check(mc, false); + int err = mdbx_cursor_check(mc, 0); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -11949,7 +12225,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, more:; if (mdbx_audit_enabled()) { - int err = mdbx_cursor_check(mc, false); + int err = mdbx_cursor_check(mc, 0); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -12021,7 +12297,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, memcpy(page_data(omp), data->iov_base, data->iov_len); if (mdbx_audit_enabled()) { - int err = mdbx_cursor_check(mc, false); + int err = mdbx_cursor_check(mc, 0); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -12131,12 +12407,10 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, fp_flags &= ~P_SUBP; prep_subDB: nested_dupdb.md_xsize = 0; - nested_dupdb.md_flags = 0; + nested_dupdb.md_flags = flags_db2sub(mc->mc_db->md_flags); if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp_flags |= P_LEAF2; nested_dupdb.md_xsize = fp->mp_leaf2_ksize; - if (mc->mc_db->md_flags & MDBX_INTEGERDUP) - nested_dupdb.md_flags = MDBX_INTEGERKEY; } nested_dupdb.md_depth = 1; nested_dupdb.md_branch_pages = 0; @@ -12214,7 +12488,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, } if (mdbx_audit_enabled()) { - int err = mdbx_cursor_check(mc, false); + int err = mdbx_cursor_check(mc, 0); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -12237,7 +12511,7 @@ new_sub: nflags |= MDBX_SPLIT_REPLACE; rc = mdbx_page_split(mc, key, rdata, P_INVALID, nflags); if (rc == MDBX_SUCCESS && mdbx_audit_enabled()) - rc = mdbx_cursor_check(mc, false); + rc = mdbx_cursor_check(mc, 0); } else { /* There is room already in this leaf page. */ if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { @@ -12325,8 +12599,10 @@ new_sub: } mdbx_cassert(mc, mc->mc_xcursor->mx_db.md_entries < PTRDIFF_MAX); ecount = (size_t)mc->mc_xcursor->mx_db.md_entries; - if (flags & MDBX_APPENDDUP) - xflags |= MDBX_APPEND; +#define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 + STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == + MDBX_APPEND); + xflags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; rc = mdbx_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags); if (flags & F_SUBDATA) { void *db = node_data(node); @@ -12358,7 +12634,7 @@ new_sub: } } if (rc == MDBX_SUCCESS && mdbx_audit_enabled()) - rc = mdbx_cursor_check(mc, false); + rc = mdbx_cursor_check(mc, 0); return rc; bad_sub: if (unlikely(rc == MDBX_KEYEXIST)) @@ -12674,14 +12950,16 @@ static int __must_check_result mdbx_node_add_leaf(MDBX_cursor *mc, memcpy(nodedata, data->iov_base, sizeof(pgno_t)); else if (unlikely(flags & MDBX_RESERVE)) data->iov_base = nodedata; - else if (likely(nodedata != data->iov_base)) + else if (likely(nodedata != data->iov_base && + data->iov_len /* to avoid UBSAN traps*/ != 0)) memcpy(nodedata, data->iov_base, data->iov_len); } else { poke_pgno(nodedata, largepage->mp_pgno); nodedata = page_data(largepage); if (unlikely(flags & MDBX_RESERVE)) data->iov_base = nodedata; - else if (likely(nodedata != data->iov_base)) + else if (likely(nodedata != data->iov_base && + data->iov_len /* to avoid UBSAN traps*/ != 0)) memcpy(nodedata, data->iov_base, data->iov_len); } return MDBX_SUCCESS; @@ -12746,6 +13024,13 @@ static void mdbx_node_del(MDBX_cursor *mc, size_t ksize) { mp->mp_lower -= sizeof(indx_t); mdbx_cassert(mc, (size_t)UINT16_MAX - mp->mp_upper >= sz); mp->mp_upper += (indx_t)sz; + +#if MDBX_DEBUG > 0 + if (mdbx_audit_enabled()) { + int page_check_err = mdbx_page_check(mc, mp, C_UPDATING); + mdbx_cassert(mc, page_check_err == MDBX_SUCCESS); + } +#endif } /* Compact the main page after deleting a node on a subpage. @@ -12819,14 +13104,16 @@ static int mdbx_xcursor_init0(MDBX_cursor *mc) { mx->mx_cursor.mc_db = &mx->mx_db; mx->mx_cursor.mc_dbx = &mx->mx_dbx; mx->mx_cursor.mc_dbi = mc->mc_dbi; - mx->mx_cursor.mc_dbflag = &mx->mx_dbflag; + mx->mx_cursor.mc_dbstate = &mx->mx_dbstate; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB; + mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_COPYING | C_SKIPORD)); mx->mx_dbx.md_name.iov_len = 0; mx->mx_dbx.md_name.iov_base = NULL; mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp; mx->mx_dbx.md_dcmp = NULL; + mx->mx_dbx.md_klen_min = INT_MAX; + mx->mx_dbx.md_vlen_min = mx->mx_dbx.md_klen_max = mx->mx_dbx.md_vlen_max = 0; return MDBX_SUCCESS; } @@ -12847,13 +13134,11 @@ static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node) { mx->mx_cursor.mc_pg[0] = 0; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_SUB; + mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_COPYING | C_SKIPORD)); } else { if (unlikely(node_ds(node) <= PAGEHDRSZ)) return MDBX_CORRUPTED; MDBX_page *fp = node_data(node); - mx->mx_db.md_xsize = 0; - mx->mx_db.md_flags = 0; mx->mx_db.md_depth = 1; mx->mx_db.md_branch_pages = 0; mx->mx_db.md_leaf_pages = 1; @@ -12862,18 +13147,32 @@ static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node) { mx->mx_db.md_root = fp->mp_pgno; mx->mx_cursor.mc_snum = 1; mx->mx_cursor.mc_top = 0; - mx->mx_cursor.mc_flags = C_INITIALIZED | C_SUB; + mx->mx_cursor.mc_flags = + C_INITIALIZED | C_SUB | (mc->mc_flags & (C_COPYING | C_SKIPORD)); mx->mx_cursor.mc_pg[0] = fp; mx->mx_cursor.mc_ki[0] = 0; - if (mc->mc_db->md_flags & MDBX_DUPFIXED) { - mx->mx_db.md_xsize = fp->mp_leaf2_ksize; - if (mc->mc_db->md_flags & MDBX_INTEGERDUP) - mx->mx_db.md_flags = MDBX_INTEGERKEY; - } + mx->mx_db.md_flags = flags_db2sub(mc->mc_db->md_flags); + mx->mx_db.md_xsize = + (mc->mc_db->md_flags & MDBX_DUPFIXED) ? fp->mp_leaf2_ksize : 0; + } + + if (unlikely(mx->mx_db.md_xsize != mc->mc_db->md_xsize)) { + if (unlikely(mc->mc_db->md_xsize != 0)) + return MDBX_CORRUPTED; + if (unlikely((mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) + return MDBX_CORRUPTED; + if (unlikely(mx->mx_db.md_xsize < mc->mc_dbx->md_vlen_min || + mx->mx_db.md_xsize > mc->mc_dbx->md_vlen_max)) + return MDBX_CORRUPTED; + mc->mc_db->md_xsize = mx->mx_db.md_xsize; + mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = mx->mx_db.md_xsize; } + mx->mx_dbx.md_klen_min = mc->mc_dbx->md_vlen_min; + mx->mx_dbx.md_klen_max = mc->mc_dbx->md_vlen_max; + mdbx_debug("Sub-db -%u root page %" PRIaPGNO, mx->mx_cursor.mc_dbi, mx->mx_db.md_root); - mx->mx_dbflag = DB_VALID | DB_USRVALID | DB_DUPDATA; + mx->mx_dbstate = DB_VALID | DB_USRVALID | DB_DUPDATA; return MDBX_SUCCESS; } @@ -12895,54 +13194,69 @@ static int mdbx_xcursor_init2(MDBX_cursor *mc, MDBX_xcursor *src_mx, mx->mx_cursor.mc_top = 0; mx->mx_cursor.mc_flags |= C_INITIALIZED; mx->mx_cursor.mc_ki[0] = 0; - mx->mx_dbflag = DB_VALID | DB_USRVALID | DB_DUPDATA; - mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; - } else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) { - return MDBX_SUCCESS; + mx->mx_dbstate = DB_VALID | DB_USRVALID | DB_DUPDATA; } + + mx->mx_dbx.md_klen_min = src_mx->mx_dbx.md_klen_min; + mx->mx_dbx.md_klen_max = src_mx->mx_dbx.md_klen_max; + mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp; mx->mx_db = src_mx->mx_db; mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0]; - mdbx_debug("Sub-db -%u root page %" PRIaPGNO, mx->mx_cursor.mc_dbi, - mx->mx_db.md_root); + if (mx->mx_cursor.mc_flags & C_INITIALIZED) { + mdbx_debug("Sub-db -%u root page %" PRIaPGNO, mx->mx_cursor.mc_dbi, + mx->mx_db.md_root); + } return MDBX_SUCCESS; } -/* Initialize a cursor for a given transaction and database. */ -static int mdbx_cursor_init(MDBX_cursor *mc, MDBX_txn *txn, MDBX_dbi dbi) { - mc->mc_signature = MDBX_MC_SIGNATURE; - mc->mc_next = NULL; - mc->mc_backup = NULL; - mc->mc_dbi = dbi; - mc->mc_txn = txn; - mc->mc_db = &txn->mt_dbs[dbi]; - mc->mc_dbx = &txn->mt_dbxs[dbi]; - mc->mc_dbflag = &txn->mt_dbflags[dbi]; - mc->mc_snum = 0; - mc->mc_top = 0; - mc->mc_pg[0] = 0; - mc->mc_flags = 0; - mc->mc_ki[0] = 0; - mc->mc_xcursor = NULL; - - if (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) { - STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); - MDBX_xcursor *mx = &container_of(mc, MDBX_cursor_couple, outer)->inner; - mdbx_tassert(txn, mx != NULL); - mx->mx_cursor.mc_signature = MDBX_MC_SIGNATURE; - mc->mc_xcursor = mx; - int rc = mdbx_xcursor_init0(mc); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } +static __inline int mdbx_couple_init(MDBX_cursor_couple *couple, + const MDBX_dbi dbi, MDBX_txn *const txn, + MDBX_db *const db, MDBX_dbx *const dbx, + uint8_t *const dbstate) { + couple->outer.mc_signature = MDBX_MC_SIGNATURE; + couple->outer.mc_next = NULL; + couple->outer.mc_backup = NULL; + couple->outer.mc_dbi = dbi; + couple->outer.mc_txn = txn; + couple->outer.mc_db = db; + couple->outer.mc_dbx = dbx; + couple->outer.mc_dbstate = dbstate; + couple->outer.mc_snum = 0; + couple->outer.mc_top = 0; + couple->outer.mc_pg[0] = 0; + couple->outer.mc_flags = 0; + couple->outer.mc_ki[0] = 0; + couple->outer.mc_xcursor = NULL; int rc = MDBX_SUCCESS; - if (unlikely(*mc->mc_dbflag & DB_STALE)) { - rc = mdbx_page_search(mc, NULL, MDBX_PS_ROOTONLY); + if (unlikely(*couple->outer.mc_dbstate & DB_STALE)) { + rc = mdbx_page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; + } else if (unlikely(couple->outer.mc_dbx->md_klen_max == 0)) { + rc = mdbx_setup_dbx(couple->outer.mc_dbx, couple->outer.mc_db, + txn->mt_env->me_psize); + } + + if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) { + couple->inner.mx_cursor.mc_signature = MDBX_MC_SIGNATURE; + couple->outer.mc_xcursor = &couple->inner; + rc = mdbx_xcursor_init0(&couple->outer); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + couple->inner.mx_dbx.md_klen_min = couple->outer.mc_dbx->md_vlen_min; + couple->inner.mx_dbx.md_klen_max = couple->outer.mc_dbx->md_vlen_max; } return rc; } +/* Initialize a cursor for a given transaction and database. */ +static int mdbx_cursor_init(MDBX_cursor *mc, MDBX_txn *txn, MDBX_dbi dbi) { + STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); + return mdbx_couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, + &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], + &txn->mt_dbstate[dbi]); +} + int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { if (unlikely(!ret)) return MDBX_EINVAL; @@ -13139,7 +13453,7 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) { mdbx_node_del(mc, 0); int rc = mdbx_page_split(mc, key, NULL, pgno, MDBX_SPLIT_REPLACE); if (rc == MDBX_SUCCESS && mdbx_audit_enabled()) - rc = mdbx_cursor_check(mc, true); + rc = mdbx_cursor_check(mc, C_UPDATING); return rc; } @@ -13163,7 +13477,8 @@ static int mdbx_update_key(MDBX_cursor *mc, const MDBX_val *key) { /* But even if no shift was needed, update ksize */ node_set_ks(node, key->iov_len); - memcpy(node_key(node), key->iov_base, key->iov_len); + if (likely(key->iov_len /* to avoid UBSAN traps*/ != 0)) + memcpy(node_key(node), key->iov_base, key->iov_len); return MDBX_SUCCESS; } @@ -13193,7 +13508,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { key4move.iov_base = node_key(srcnode); if (csrc->mc_ki[csrc->mc_top] == 0) { - const uint16_t snum = csrc->mc_snum; + const unsigned snum = csrc->mc_snum; mdbx_cassert(csrc, snum > 0); /* must find the lowest key below src */ rc = mdbx_page_search_lowest(csrc); @@ -13225,7 +13540,7 @@ static int mdbx_node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, int fromleft) { } if (cdst->mc_ki[cdst->mc_top] == 0) { - const uint16_t snum = cdst->mc_snum; + const unsigned snum = cdst->mc_snum; mdbx_cassert(csrc, snum > 0); MDBX_cursor mn; mdbx_cursor_copy(cdst, &mn); @@ -13532,9 +13847,20 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { rc = mdbx_page_search_lowest(&mn); if (unlikely(rc)) return rc; - MDBX_node *lowest = page_node(mn.mc_pg[mn.mc_top], 0); - key.iov_len = node_ks(lowest); - key.iov_base = node_key(lowest); + + const MDBX_page *mp = mn.mc_pg[mn.mc_top]; + if (likely(!IS_LEAF2(mp))) { + mdbx_cassert(&mn, IS_LEAF(mp)); + const MDBX_node *lowest = page_node(mp, 0); + key.iov_len = node_ks(lowest); + key.iov_base = node_key(lowest); + } else { + mdbx_cassert(&mn, mn.mc_top > csrc->mc_top); + key.iov_len = mp->mp_leaf2_ksize; + key.iov_base = page_leaf2key(mp, mn.mc_ki[mn.mc_top], key.iov_len); + } + mdbx_cassert(&mn, key.iov_len >= csrc->mc_dbx->md_klen_min); + mdbx_cassert(&mn, key.iov_len <= csrc->mc_dbx->md_klen_max); const size_t dst_room = page_room(pdst); const size_t src_used = page_used(cdst->mc_txn->mt_env, psrc); @@ -13630,7 +13956,7 @@ static int mdbx_page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { mdbx_cassert(cdst, cdst->mc_snum == cdst->mc_top + 1); MDBX_page *const top_page = cdst->mc_pg[cdst->mc_top]; const indx_t top_indx = cdst->mc_ki[cdst->mc_top]; - const uint16_t save_snum = cdst->mc_snum; + const unsigned save_snum = cdst->mc_snum; const uint16_t save_depth = cdst->mc_db->md_depth; mdbx_cursor_pop(cdst); rc = mdbx_rebalance(cdst); @@ -13948,8 +14274,13 @@ static int mdbx_rebalance(MDBX_cursor *mc) { } } - if (nkeys >= minkeys) + if (nkeys >= minkeys) { +#if MDBX_DEBUG > 0 + if (mdbx_audit_enabled()) + return mdbx_cursor_check(mc, C_UPDATING); +#endif return MDBX_SUCCESS; + } if (left && (!right || page_room(left) > page_room(right))) { /* try merge with left */ @@ -13985,8 +14316,10 @@ static int mdbx_rebalance(MDBX_cursor *mc) { return MDBX_PROBLEM; } -static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, - bool maybe_unfinished) { +static __cold int mdbx_page_check(MDBX_cursor *const mc, + const MDBX_page *const mp, unsigned options) { + options |= mc->mc_flags & (C_COPYING | C_UPDATING | C_RETIRING | C_SKIPORD); + MDBX_env *const env = mc->mc_txn->mt_env; const unsigned nkeys = page_numkeys(mp); char *const end_of_page = (char *)mp + env->me_psize; mdbx_assert(env, mp->mp_pgno >= MIN_PAGENO && mp->mp_pgno <= MAX_PAGENO); @@ -14001,19 +14334,41 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, return MDBX_CORRUPTED; return MDBX_SUCCESS; } - if (!(IS_DIRTY(mp) && maybe_unfinished)) { + if ((options & C_UPDATING) == 0 || !IS_DIRTY(mp)) { mdbx_assert(env, nkeys >= 2 || !IS_BRANCH(mp)); if (unlikely(nkeys < 2 && IS_BRANCH(mp))) return MDBX_CORRUPTED; } - for (unsigned i = IS_LEAF(mp) ? 0 : 1; i < nkeys; ++i) { + MDBX_val here, prev = {0, 0}; + for (unsigned i = 0; i < nkeys; ++i) { if (IS_LEAF2(mp)) { const size_t ksize = mp->mp_leaf2_ksize; - const char *const key = page_leaf2key(mp, i, ksize); + char *const key = page_leaf2key(mp, i, ksize); mdbx_assert(env, key + ksize <= end_of_page); if (unlikely(end_of_page < key + ksize)) return MDBX_CORRUPTED; + + if ((options & C_COPYING) == 0) { + if (unlikely(ksize != mc->mc_dbx->md_klen_min)) { + mdbx_assert(env, ksize >= mc->mc_dbx->md_klen_min); + mdbx_assert(env, ksize <= mc->mc_dbx->md_klen_max); + if (unlikely(ksize < mc->mc_dbx->md_klen_min || + ksize > mc->mc_dbx->md_klen_max)) + return MDBX_CORRUPTED; + mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = ksize; + } + if ((options & C_SKIPORD) == 0) { + here.iov_len = ksize; + here.iov_base = key; + if (prev.iov_base) { + mdbx_assert(env, mc->mc_dbx->md_cmp(&here, &prev) > 0); + if (unlikely(mc->mc_dbx->md_cmp(&here, &prev) <= 0)) + return MDBX_CORRUPTED; + } + prev = here; + } + } } else { const MDBX_node *const node = page_node(mp, i); const char *node_end = (char *)node + NODESIZE; @@ -14026,12 +14381,59 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, mdbx_assert(env, key + ksize <= end_of_page); if (unlikely(end_of_page < key + ksize)) return MDBX_CORRUPTED; + + if ((options & C_COPYING) == 0) { + mdbx_assert(env, ksize >= mc->mc_dbx->md_klen_min); + mdbx_assert(env, ksize <= mc->mc_dbx->md_klen_max); + if (unlikely(ksize < mc->mc_dbx->md_klen_min || + ksize > mc->mc_dbx->md_klen_max)) + return MDBX_CORRUPTED; + + if ((options & C_SKIPORD) == 0) { + here.iov_base = key; + here.iov_len = ksize; + if (prev.iov_base) { + mdbx_assert(env, mc->mc_dbx->md_cmp(&here, &prev) > 0); + if (unlikely(mc->mc_dbx->md_cmp(&here, &prev) <= 0)) + return MDBX_CORRUPTED; + } + prev = here; + } + } } - if (IS_BRANCH(mp)) + if (IS_BRANCH(mp)) { + if ((options & C_RETIRING) == 0) { + const pgno_t ref = node_pgno(node); + mdbx_assert(env, ref >= MIN_PAGENO); + mdbx_assert(env, ref < mc->mc_txn->mt_next_pgno); + if (unlikely(ref < MIN_PAGENO || ref >= mc->mc_txn->mt_next_pgno)) + return MDBX_CORRUPTED; + } continue; + } if (node_flags(node) == F_BIGDATA /* data on large-page */) { + const size_t dsize = node_ds(node); + if ((options & C_COPYING) == 0) { + mdbx_assert(env, dsize > mc->mc_dbx->md_vlen_min); + mdbx_assert(env, dsize <= mc->mc_dbx->md_vlen_max); + if (unlikely(dsize <= mc->mc_dbx->md_vlen_min || + dsize > mc->mc_dbx->md_vlen_max)) + return MDBX_CORRUPTED; + } + if ((options & C_RETIRING) == 0) { + MDBX_page *lp; + int err = mdbx_page_get(mc, node_largedata_pgno(node), &lp, NULL); + if (unlikely(err != MDBX_SUCCESS)) + return err; + mdbx_assert(env, IS_OVERFLOW(lp)); + mdbx_assert(env, number_of_ovpages(env, dsize) == lp->mp_pages); + if (unlikely(!IS_OVERFLOW(lp) || + number_of_ovpages(env, dsize) != lp->mp_pages)) + return MDBX_CORRUPTED; + } continue; } + const size_t dsize = node_ds(node); const char *const data = node_data(node); mdbx_assert(env, data + dsize <= end_of_page); @@ -14043,6 +14445,13 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, mdbx_assert(env, false); return MDBX_CORRUPTED; case 0 /* usual */: + if ((options & C_COPYING) == 0) { + mdbx_assert(env, dsize >= mc->mc_dbx->md_vlen_min); + mdbx_assert(env, dsize <= mc->mc_dbx->md_vlen_max); + if (unlikely(dsize < mc->mc_dbx->md_vlen_min || + dsize > mc->mc_dbx->md_vlen_max)) + return MDBX_CORRUPTED; + } break; case F_SUBDATA /* sub-db */: mdbx_assert(env, dsize >= sizeof(MDBX_db)); @@ -14071,7 +14480,11 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, return MDBX_CORRUPTED; } + MDBX_val sub_here, sub_prev = {0, 0}; for (int j = 0; j < nsubkeys; j++) { + mdbx_assert(env, IS_LEAF(sp)); + if (unlikely(!IS_LEAF(sp))) + return MDBX_CORRUPTED; if (IS_LEAF2(sp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */ size_t sub_ksize = sp->mp_leaf2_ksize; @@ -14079,10 +14492,30 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, mdbx_assert(env, sub_key + sub_ksize <= end_of_subpage); if (unlikely(end_of_subpage < sub_key + sub_ksize)) return MDBX_CORRUPTED; + + if ((options & C_COPYING) == 0) { + if (unlikely(sub_ksize != mc->mc_dbx->md_vlen_min)) { + mdbx_assert(env, sub_ksize >= mc->mc_dbx->md_vlen_min); + mdbx_assert(env, sub_ksize <= mc->mc_dbx->md_vlen_max); + if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || + sub_ksize > mc->mc_dbx->md_vlen_max)) + return MDBX_CORRUPTED; + mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize; + } + if ((options & C_SKIPORD) == 0) { + sub_here.iov_len = sub_ksize; + sub_here.iov_base = sub_key; + if (sub_prev.iov_base) { + mdbx_assert(env, + mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) < 0); + if (unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= + 0)) + return MDBX_CORRUPTED; + } + sub_prev = sub_here; + } + } } else { - mdbx_assert(env, IS_LEAF(sp)); - if (unlikely(!IS_LEAF(sp))) - return MDBX_CORRUPTED; const MDBX_node *const sub_node = page_node(sp, j); const char *sub_node_end = (char *)sub_node + NODESIZE; mdbx_assert(env, sub_node_end <= end_of_subpage); @@ -14096,6 +14529,31 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, char *sub_key = node_key(sub_node); size_t sub_dsize = node_ds(sub_node); char *sub_data = node_data(sub_node); + + if ((options & C_COPYING) == 0) { + mdbx_assert(env, sub_ksize >= mc->mc_dbx->md_vlen_min); + mdbx_assert(env, sub_ksize <= mc->mc_dbx->md_vlen_max); + if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || + sub_ksize > mc->mc_dbx->md_vlen_max)) + return MDBX_CORRUPTED; + + if ((options & C_SKIPORD) == 0) { + sub_here.iov_len = sub_ksize; + sub_here.iov_base = sub_key; + if (sub_prev.iov_base) { + mdbx_assert(env, + mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) < 0); + if (unlikely(mc->mc_dbx->md_dcmp(&sub_prev, &sub_here) >= + 0)) + return MDBX_CORRUPTED; + } + sub_prev = sub_here; + } + } + mdbx_assert(env, sub_dsize == 0); + if (unlikely(sub_dsize != 0)) + return MDBX_CORRUPTED; + mdbx_assert(env, sub_key + sub_ksize <= end_of_subpage); if (unlikely(end_of_subpage < sub_key + sub_ksize)) return MDBX_CORRUPTED; @@ -14112,7 +14570,7 @@ static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp, return MDBX_SUCCESS; } -static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { +static __cold int mdbx_cursor_check(MDBX_cursor *mc, unsigned options) { mdbx_tassert(mc->mc_txn, mc->mc_txn->mt_parent || mc->mc_txn->tw.dirtyroom + mc->mc_txn->tw.dirtylist->length == @@ -14120,13 +14578,13 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { mdbx_cassert(mc, mc->mc_top == mc->mc_snum - 1); if (unlikely(mc->mc_top != mc->mc_snum - 1)) return MDBX_CURSOR_FULL; - mdbx_cassert(mc, pending ? mc->mc_snum <= mc->mc_db->md_depth - : mc->mc_snum == mc->mc_db->md_depth); - if (unlikely(pending ? mc->mc_snum > mc->mc_db->md_depth - : mc->mc_snum != mc->mc_db->md_depth)) + mdbx_cassert(mc, (options & C_UPDATING) ? mc->mc_snum <= mc->mc_db->md_depth + : mc->mc_snum == mc->mc_db->md_depth); + if (unlikely((options & C_UPDATING) ? mc->mc_snum > mc->mc_db->md_depth + : mc->mc_snum != mc->mc_db->md_depth)) return MDBX_CURSOR_FULL; - for (int n = 0; n < mc->mc_snum; ++n) { + for (int n = 0; n < (int)mc->mc_snum; ++n) { MDBX_page *mp = mc->mc_pg[n]; const unsigned nkeys = page_numkeys(mp); const bool expect_branch = (n < mc->mc_db->md_depth - 1) ? true : false; @@ -14136,7 +14594,7 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { mdbx_cassert(mc, branch == expect_branch); if (unlikely(branch != expect_branch)) return MDBX_CURSOR_FULL; - if (!pending) { + if ((options & C_UPDATING) == 0) { mdbx_cassert(mc, nkeys > mc->mc_ki[n] || (!branch && nkeys == mc->mc_ki[n] && (mc->mc_flags & C_EOF) != 0)); @@ -14150,7 +14608,7 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { return MDBX_CURSOR_FULL; } - int err = mdbx_page_check(mc->mc_txn->mt_env, mp, pending); + int err = mdbx_page_check(mc, mp, options); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -14170,7 +14628,7 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { mdbx_cassert(mc, nested_leaf == expect_nested_leaf); if (unlikely(nested_leaf != expect_nested_leaf)) return MDBX_CURSOR_FULL; - err = mdbx_page_check(mc->mc_txn->mt_env, np, pending); + err = mdbx_page_check(mc, np, options); if (unlikely(err != MDBX_SUCCESS)) return err; } @@ -14312,7 +14770,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) { if (unlikely(rc)) mc->mc_txn->mt_flags |= MDBX_TXN_ERROR; else if (mdbx_audit_enabled()) - rc = mdbx_cursor_check(mc, false); + rc = mdbx_cursor_check(mc, 0); return rc; } @@ -14403,7 +14861,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, unsigned newindx = mc->mc_ki[mc->mc_top]; unsigned nkeys = page_numkeys(mp); if (mdbx_audit_enabled()) { - rc = mdbx_cursor_check(mc, true); + rc = mdbx_cursor_check(mc, C_UPDATING); if (unlikely(rc != MDBX_SUCCESS)) return rc; } @@ -14607,10 +15065,10 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, mdbx_debug("separator is %d [%s]", split_indx, DKEY(&sepkey)); if (mdbx_audit_enabled()) { - rc = mdbx_cursor_check(mc, true); + rc = mdbx_cursor_check(mc, C_UPDATING); if (unlikely(rc != MDBX_SUCCESS)) goto done; - rc = mdbx_cursor_check(&mn, true); + rc = mdbx_cursor_check(&mn, C_UPDATING); if (unlikely(rc != MDBX_SUCCESS)) goto done; } @@ -14627,9 +15085,9 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey, mn, rc = mdbx_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0)); if (unlikely(rc != MDBX_SUCCESS)) goto done; - mdbx_cassert(mc, mc->mc_snum - snum == mc->mc_db->md_depth - depth); + mdbx_cassert(mc, (int)mc->mc_snum - snum == mc->mc_db->md_depth - depth); if (mdbx_audit_enabled()) { - rc = mdbx_cursor_check(mc, true); + rc = mdbx_cursor_check(mc, C_UPDATING); if (unlikely(rc != MDBX_SUCCESS)) goto done; } @@ -14919,7 +15377,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, typedef struct mdbx_copy { MDBX_env *mc_env; MDBX_txn *mc_txn; - mdbx_condmutex_t mc_condmutex; + mdbx_condpair_t mc_condpair; uint8_t *mc_wbuf[2]; uint8_t *mc_over[2]; size_t mc_wlen[2]; @@ -14939,10 +15397,10 @@ static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) { uint8_t *ptr; int toggle = 0; - mdbx_condmutex_lock(&my->mc_condmutex); + mdbx_condpair_lock(&my->mc_condpair); while (!my->mc_error) { while (!my->mc_new && !my->mc_error) { - int err = mdbx_condmutex_wait(&my->mc_condmutex); + int err = mdbx_condpair_wait(&my->mc_condpair, true); if (err != MDBX_SUCCESS) { my->mc_error = err; goto bailout; @@ -14972,10 +15430,10 @@ static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) { toggle ^= 1; /* Return the empty buffer to provider */ my->mc_new--; - mdbx_condmutex_signal(&my->mc_condmutex); + mdbx_condpair_signal(&my->mc_condpair, false); } bailout: - mdbx_condmutex_unlock(&my->mc_condmutex); + mdbx_condpair_unlock(&my->mc_condpair); return (THREAD_RESULT)0; } @@ -14984,15 +15442,15 @@ bailout: * [in] my control structure. * [in] adjust (1 to hand off 1 buffer) | (MDBX_EOF when ending). */ static int __cold mdbx_env_cthr_toggle(mdbx_copy *my, int adjust) { - mdbx_condmutex_lock(&my->mc_condmutex); + mdbx_condpair_lock(&my->mc_condpair); my->mc_new += (short)adjust; - mdbx_condmutex_signal(&my->mc_condmutex); + mdbx_condpair_signal(&my->mc_condpair, true); while (!my->mc_error && (my->mc_new & 2) /* both buffers in use */) { - int err = mdbx_condmutex_wait(&my->mc_condmutex); + int err = mdbx_condpair_wait(&my->mc_condpair, false); if (err != MDBX_SUCCESS) my->mc_error = err; } - mdbx_condmutex_unlock(&my->mc_condmutex); + mdbx_condpair_unlock(&my->mc_condpair); my->mc_toggle ^= (adjust & 1); /* Both threads reset mc_wlen, to be safe from threading errors */ @@ -15005,7 +15463,7 @@ static int __cold mdbx_env_cthr_toggle(mdbx_copy *my, int adjust) { * [in,out] pg database root. * [in] flags includes F_DUPDATA if it is a sorted-duplicate sub-DB. */ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { - MDBX_cursor mc; + MDBX_cursor_couple couple; MDBX_page *mo, *mp, *leaf; char *buf, *ptr; int rc, toggle; @@ -15015,25 +15473,28 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { if (*pg == P_INVALID) return MDBX_SUCCESS; - memset(&mc, 0, sizeof(mc)); - mc.mc_snum = 1; - mc.mc_txn = my->mc_txn; + memset(&couple, 0, sizeof(couple)); + couple.outer.mc_snum = 1; + couple.outer.mc_txn = my->mc_txn; + couple.outer.mc_flags = couple.inner.mx_cursor.mc_flags = + C_COPYING | C_SKIPORD; - rc = mdbx_page_get(&mc, *pg, &mc.mc_pg[0], NULL); + rc = mdbx_page_get(&couple.outer, *pg, &couple.outer.mc_pg[0], NULL); if (unlikely(rc != MDBX_SUCCESS)) return rc; - rc = mdbx_page_search_root(&mc, NULL, MDBX_PS_FIRST); + rc = mdbx_page_search_root(&couple.outer, NULL, MDBX_PS_FIRST); if (unlikely(rc != MDBX_SUCCESS)) return rc; /* Make cursor pages writable */ - buf = ptr = mdbx_malloc(pgno2bytes(my->mc_env, mc.mc_snum)); + buf = ptr = mdbx_malloc(pgno2bytes(my->mc_env, couple.outer.mc_snum)); if (buf == NULL) return MDBX_ENOMEM; - for (i = 0; i < mc.mc_top; i++) { - mdbx_page_copy((MDBX_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize); - mc.mc_pg[i] = (MDBX_page *)ptr; + for (i = 0; i < couple.outer.mc_top; i++) { + mdbx_page_copy((MDBX_page *)ptr, couple.outer.mc_pg[i], + my->mc_env->me_psize); + couple.outer.mc_pg[i] = (MDBX_page *)ptr; ptr += my->mc_env->me_psize; } @@ -15041,9 +15502,9 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { leaf = (MDBX_page *)ptr; toggle = my->mc_toggle; - while (mc.mc_snum > 0) { + while (couple.outer.mc_snum > 0) { unsigned n; - mp = mc.mc_pg[mc.mc_top]; + mp = couple.outer.mc_pg[couple.outer.mc_top]; n = page_numkeys(mp); if (IS_LEAF(mp)) { @@ -15055,7 +15516,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { /* Need writable leaf */ if (mp != leaf) { - mc.mc_pg[mc.mc_top] = leaf; + couple.outer.mc_pg[couple.outer.mc_top] = leaf; mdbx_page_copy(leaf, mp, my->mc_env->me_psize); mp = leaf; node = page_node(mp, i); @@ -15063,7 +15524,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { const pgno_t pgno = node_largedata_pgno(node); poke_pgno(node_data(node), my->mc_next_pgno); - rc = mdbx_page_get(&mc, pgno, &omp, NULL); + rc = mdbx_page_get(&couple.outer, pgno, &omp, NULL); if (unlikely(rc != MDBX_SUCCESS)) goto done; if (my->mc_wlen[toggle] >= MDBX_WBUF) { @@ -15075,7 +15536,6 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); memcpy(mo, omp, my->mc_env->me_psize); mo->mp_pgno = my->mc_next_pgno; - mo->mp_txnid = MIN_TXNID; my->mc_next_pgno += omp->mp_pages; my->mc_wlen[toggle] += my->mc_env->me_psize; if (omp->mp_pages > 1) { @@ -15094,7 +15554,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { /* Need writable leaf */ if (mp != leaf) { - mc.mc_pg[mc.mc_top] = leaf; + couple.outer.mc_pg[couple.outer.mc_top] = leaf; mdbx_page_copy(leaf, mp, my->mc_env->me_psize); mp = leaf; node = page_node(mp, i); @@ -15112,23 +15572,26 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { } } } else { - mc.mc_ki[mc.mc_top]++; - if (mc.mc_ki[mc.mc_top] < n) { + couple.outer.mc_ki[couple.outer.mc_top]++; + if (couple.outer.mc_ki[couple.outer.mc_top] < n) { again: - rc = mdbx_page_get(&mc, node_pgno(page_node(mp, mc.mc_ki[mc.mc_top])), - &mp, NULL); + rc = mdbx_page_get( + &couple.outer, + node_pgno(page_node(mp, couple.outer.mc_ki[couple.outer.mc_top])), + &mp, NULL); if (unlikely(rc != MDBX_SUCCESS)) goto done; - mc.mc_top++; - mc.mc_snum++; - mc.mc_ki[mc.mc_top] = 0; + couple.outer.mc_top++; + couple.outer.mc_snum++; + couple.outer.mc_ki[couple.outer.mc_top] = 0; if (IS_BRANCH(mp)) { /* Whenever we advance to a sibling branch page, * we must proceed all the way down to its first leaf. */ - mdbx_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize); + mdbx_page_copy(couple.outer.mc_pg[couple.outer.mc_top], mp, + my->mc_env->me_psize); goto again; } else - mc.mc_pg[mc.mc_top] = mp; + couple.outer.mc_pg[couple.outer.mc_top] = mp; continue; } } @@ -15140,14 +15603,14 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) { } mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]); mdbx_page_copy(mo, mp, my->mc_env->me_psize); - mo->mp_txnid = MIN_TXNID; mo->mp_pgno = my->mc_next_pgno++; my->mc_wlen[toggle] += my->mc_env->me_psize; - if (mc.mc_top) { + if (couple.outer.mc_top) { /* Update parent if there is one */ - node_set_pgno(page_node(mc.mc_pg[mc.mc_top - 1], mc.mc_ki[mc.mc_top - 1]), + node_set_pgno(page_node(couple.outer.mc_pg[couple.outer.mc_top - 1], + couple.outer.mc_ki[couple.outer.mc_top - 1]), mo->mp_pgno); - mdbx_cursor_pop(&mc); + mdbx_cursor_pop(&couple.outer); } else { /* Otherwise we're done */ *pg = mo->mp_pgno; @@ -15161,19 +15624,15 @@ done: static __cold void compact_fixup_meta(MDBX_env *env, MDBX_meta *meta) { /* Calculate filesize taking in account shrink/growing thresholds */ - if (meta->mm_geo.next > meta->mm_geo.now) { - const pgno_t aligned = pgno_align2os_pgno( - env, - pgno_add(meta->mm_geo.next, - meta->mm_geo.grow - meta->mm_geo.next % meta->mm_geo.grow)); - meta->mm_geo.now = aligned; - } else if (meta->mm_geo.next < meta->mm_geo.now) { + if (meta->mm_geo.next != meta->mm_geo.now) { meta->mm_geo.now = meta->mm_geo.next; const pgno_t aligner = meta->mm_geo.grow ? meta->mm_geo.grow : meta->mm_geo.shrink; - const pgno_t aligned = pgno_align2os_pgno( - env, meta->mm_geo.next + aligner - meta->mm_geo.next % aligner); - meta->mm_geo.now = aligned; + if (aligner) { + const pgno_t aligned = pgno_align2os_pgno( + env, meta->mm_geo.next + aligner - meta->mm_geo.next % aligner); + meta->mm_geo.now = aligned; + } } if (meta->mm_geo.now < meta->mm_geo.lower) @@ -15186,14 +15645,32 @@ static __cold void compact_fixup_meta(MDBX_env *env, MDBX_meta *meta) { meta->mm_datasync_sign = mdbx_meta_sign(meta); } +/* Make resizeable */ +static __cold void make_sizeable(MDBX_meta *meta) { + meta->mm_geo.lower = MIN_PAGENO; + if (meta->mm_geo.grow == 0) { + const size_t step = 1 + (meta->mm_geo.upper - meta->mm_geo.lower) / 42; + meta->mm_geo.grow = (step < UINT16_MAX) ? (uint16_t)step : UINT16_MAX; + } + if (meta->mm_geo.shrink == 0) { + const size_t step = meta->mm_geo.grow + meta->mm_geo.grow; + meta->mm_geo.shrink = (step < UINT16_MAX) ? (uint16_t)step : UINT16_MAX; + } +} + /* Copy environment with compaction. */ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe) { + const bool dest_is_pipe, const int flags) { const size_t meta_bytes = pgno2bytes(env, NUM_METAS); uint8_t *const data_buffer = buffer + ceil_powerof2(meta_bytes, env->me_os_psize); MDBX_meta *const meta = mdbx_init_metas(env, buffer); + mdbx_meta_set_txnid(env, meta, read_txn->mt_txnid); + + if (flags & MDBX_CP_FORCE_RESIZEABLE) + make_sizeable(meta); + /* copy canary sequenses if present */ if (read_txn->mt_canary.v) { meta->mm_canary = read_txn->mt_canary; @@ -15216,13 +15693,13 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, /* Count free pages + GC pages. Subtract from last_pg * to find the new last_pg, which also becomes the new root. */ pgno_t freecount = 0; - MDBX_cursor mc; + MDBX_cursor_couple couple; MDBX_val key, data; - int rc = mdbx_cursor_init(&mc, read_txn, FREE_DBI); + int rc = mdbx_cursor_init(&couple.outer, read_txn, FREE_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - while ((rc = mdbx_cursor_get(&mc, &key, &data, MDBX_NEXT)) == 0) + while ((rc = mdbx_cursor_get(&couple.outer, &key, &data, MDBX_NEXT)) == 0) freecount += *(pgno_t *)data.iov_base; if (unlikely(rc != MDBX_NOTFOUND)) return rc; @@ -15238,7 +15715,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, mdbx_copy ctx; memset(&ctx, 0, sizeof(ctx)); - rc = mdbx_condmutex_init(&ctx.mc_condmutex); + rc = mdbx_condpair_init(&ctx.mc_condpair); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -15261,7 +15738,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, rc = mdbx_env_cwalk(&ctx, &root, 0); mdbx_env_cthr_toggle(&ctx, 1 | MDBX_EOF); thread_err = mdbx_thread_join(thread); - mdbx_condmutex_destroy(&ctx.mc_condmutex); + mdbx_condpair_destroy(&ctx.mc_condpair); } if (unlikely(thread_err != MDBX_SUCCESS)) return thread_err; @@ -15321,7 +15798,7 @@ static int __cold mdbx_env_compact(MDBX_env *env, MDBX_txn *read_txn, /* Copy environment as-is. */ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, mdbx_filehandle_t fd, uint8_t *buffer, - const bool dest_is_pipe) { + const bool dest_is_pipe, const int flags) { /* We must start the actual read txn after blocking writers */ int rc = mdbx_txn_end(read_txn, MDBX_END_RESET_TMP); if (unlikely(rc != MDBX_SUCCESS)) @@ -15345,9 +15822,12 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, memcpy(buffer, env->me_map, meta_bytes); MDBX_meta *const headcopy = /* LY: get pointer to the spanshot copy */ (MDBX_meta *)(buffer + ((uint8_t *)mdbx_meta_head(env) - env->me_map)); + mdbx_txn_unlock(env); + + if (flags & MDBX_CP_FORCE_RESIZEABLE) + make_sizeable(headcopy); /* Update signature to steady */ headcopy->mm_datasync_sign = mdbx_meta_sign(headcopy); - mdbx_txn_unlock(env); /* Copy the data */ const size_t whole_size = pgno_align2os_bytes(env, read_txn->mt_end_pgno); @@ -15463,9 +15943,8 @@ int __cold mdbx_env_copy2fd(MDBX_env *env, mdbx_filehandle_t fd, if (likely(rc == MDBX_SUCCESS)) { memset(buffer, 0, pgno2bytes(env, NUM_METAS)); - rc = (flags & MDBX_CP_COMPACT) - ? mdbx_env_compact(env, read_txn, fd, buffer, dest_is_pipe) - : mdbx_env_copy_asis(env, read_txn, fd, buffer, dest_is_pipe); + rc = ((flags & MDBX_CP_COMPACT) ? mdbx_env_compact : mdbx_env_copy_asis)( + env, read_txn, fd, buffer, dest_is_pipe, flags); } mdbx_txn_abort(read_txn); @@ -15540,7 +16019,7 @@ int __cold mdbx_env_set_flags(MDBX_env *env, unsigned flags, int onoff) { return rc; if (onoff) - env->me_flags |= flags; + env->me_flags = merge_flags(env->me_flags, flags); else env->me_flags &= ~flags; @@ -15671,6 +16150,56 @@ int __cold mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, } } +int __cold mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, + uint32_t *mask) { + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(!mask)) + return MDBX_EINVAL; + + if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_VALID))) + return MDBX_EINVAL; + + MDBX_cursor_couple cx; + rc = mdbx_cursor_init(&cx.outer, txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if ((cx.outer.mc_db->md_flags & MDBX_DUPSORT) == 0) + return MDBX_RESULT_TRUE; + + MDBX_val key, data; + rc = mdbx_cursor_first(&cx.outer, &key, &data); + *mask = 0; + while (rc == MDBX_SUCCESS) { + const MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], + cx.outer.mc_ki[cx.outer.mc_top]); + const MDBX_db *db = node_data(node); + const unsigned flags = node_flags(node); + switch (flags) { + case F_BIGDATA: + case 0: + /* single-value entry, deep = 0 */ + *mask |= 1 << 0; + break; + case F_DUPDATA: + /* single sub-page, deep = 1 */ + *mask |= 1 << 1; + break; + case F_DUPDATA | F_SUBDATA: + /* sub-tree */ + *mask |= 1 << unaligned_peek_u16(1, &db->md_depth); + break; + default: + return MDBX_CORRUPTED; + } + rc = mdbx_cursor_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP); + } + + return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; +} + int __cold mdbx_env_info(MDBX_env *env, MDBX_envinfo *arg, size_t bytes) { return mdbx_env_info_ex(env, NULL, arg, bytes); } @@ -15807,7 +16336,7 @@ static MDBX_cmp_func *mdbx_default_keycmp(unsigned flags) { static MDBX_cmp_func *mdbx_default_datacmp(unsigned flags) { return !(flags & MDBX_DUPSORT) - ? mdbx_cmp_memn + ? mdbx_cmp_lenfast : ((flags & MDBX_INTEGERDUP) ? mdbx_cmp_int_unaligned : ((flags & MDBX_REVERSEDUP) ? mdbx_cmp_memnr @@ -15825,9 +16354,10 @@ static int mdbx_dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, * = assume that a properly create request with custom flags; */ if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & PERSISTENT_FLAGS) { - /* flags ara differs, check other conditions */ - if (!user_flags && (!keycmp || keycmp == txn->mt_dbxs[dbi].md_cmp) && - (!datacmp || datacmp == txn->mt_dbxs[dbi].md_dcmp)) { + /* flags are differs, check other conditions */ + if ((!user_flags && (!keycmp || keycmp == txn->mt_dbxs[dbi].md_cmp) && + (!datacmp || datacmp == txn->mt_dbxs[dbi].md_dcmp)) || + user_flags == MDBX_ACCEDE) { /* no comparators were provided and flags are zero, * seems that is case #1 above */ user_flags = txn->mt_dbs[dbi].md_flags; @@ -15866,18 +16396,30 @@ static int mdbx_dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - if (unlikely(!dbi || (user_flags & ~VALID_FLAGS) != 0)) - return MDBX_EINVAL; - *dbi = 0; + int rc = MDBX_EINVAL; + if (unlikely(!dbi)) + return rc; - int rc = check_txn(txn, MDBX_TXN_BLOCKED); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely((user_flags & ~VALID_FLAGS) != 0)) { + early_bailout: + *dbi = 0; return rc; + } + + rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + goto early_bailout; - switch (user_flags & - (MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT | MDBX_REVERSEDUP)) { + switch (user_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT | + MDBX_REVERSEDUP | MDBX_ACCEDE)) { + case MDBX_ACCEDE: + if ((user_flags & MDBX_CREATE) == 0) + break; + __fallthrough /* fall through */; default: - return MDBX_EINVAL; + rc = MDBX_EINVAL; + goto early_bailout; + case MDBX_DUPSORT: case MDBX_DUPSORT | MDBX_REVERSEDUP: case MDBX_DUPSORT | MDBX_DUPFIXED: @@ -15891,8 +16433,9 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, /* main table? */ if (!table_name) { rc = mdbx_dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); - if (likely(rc == MDBX_SUCCESS)) - *dbi = MAIN_DBI; + if (unlikely(rc != MDBX_SUCCESS)) + goto early_bailout; + *dbi = MAIN_DBI; return rc; } @@ -15914,69 +16457,88 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, } if (len == txn->mt_dbxs[scan].md_name.iov_len && !strncmp(table_name, txn->mt_dbxs[scan].md_name.iov_base, len)) { + rc = mdbx_dbi_bind(txn, scan, user_flags, keycmp, datacmp); + if (unlikely(rc != MDBX_SUCCESS)) + goto early_bailout; *dbi = scan; - return mdbx_dbi_bind(txn, scan, user_flags, keycmp, datacmp); + return rc; } } /* Fail, if no free slot and max hit */ MDBX_env *env = txn->mt_env; - if (unlikely(slot >= env->me_maxdbs)) - return MDBX_DBS_FULL; + if (unlikely(slot >= env->me_maxdbs)) { + rc = MDBX_DBS_FULL; + goto early_bailout; + } /* Cannot mix named table with some main-table flags */ if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & - (MDBX_DUPSORT | MDBX_INTEGERKEY))) - return (user_flags & MDBX_CREATE) ? MDBX_INCOMPATIBLE : MDBX_NOTFOUND; + (MDBX_DUPSORT | MDBX_INTEGERKEY))) { + rc = (user_flags & MDBX_CREATE) ? MDBX_INCOMPATIBLE : MDBX_NOTFOUND; + goto early_bailout; + } /* Find the DB info */ int exact = 0; MDBX_val key, data; key.iov_len = len; key.iov_base = (void *)table_name; - MDBX_cursor mc; - rc = mdbx_cursor_init(&mc, txn, MAIN_DBI); + MDBX_cursor_couple couple; + rc = mdbx_cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) - return rc; - rc = mdbx_cursor_set(&mc, &key, &data, MDBX_SET, &exact); + goto early_bailout; + rc = mdbx_cursor_set(&couple.outer, &key, &data, MDBX_SET, &exact); if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) - return rc; + goto early_bailout; } else { /* make sure this is actually a table */ - MDBX_node *node = page_node(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); - if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) - return MDBX_INCOMPATIBLE; - if (unlikely(data.iov_len < sizeof(MDBX_db))) - return MDBX_CORRUPTED; + MDBX_node *node = page_node(couple.outer.mc_pg[couple.outer.mc_top], + couple.outer.mc_ki[couple.outer.mc_top]); + if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { + rc = MDBX_INCOMPATIBLE; + goto early_bailout; + } + if (unlikely(data.iov_len < sizeof(MDBX_db))) { + rc = MDBX_CORRUPTED; + goto early_bailout; + } } - if (rc != MDBX_SUCCESS && unlikely(txn->mt_flags & MDBX_RDONLY)) - return MDBX_EACCESS; + if (rc != MDBX_SUCCESS && unlikely(txn->mt_flags & MDBX_RDONLY)) { + rc = MDBX_EACCESS; + goto early_bailout; + } /* Done here so we cannot fail after creating a new DB */ char *namedup = mdbx_strdup(table_name); - if (unlikely(!namedup)) - return MDBX_ENOMEM; + if (unlikely(!namedup)) { + rc = MDBX_ENOMEM; + goto early_bailout; + } int err = mdbx_fastmutex_acquire(&env->me_dbi_lock); if (unlikely(err != MDBX_SUCCESS)) { + rc = err; mdbx_free(namedup); - return err; + goto early_bailout; } if (txn->mt_numdbs < env->me_numdbs) { + /* Import handles from env */ for (unsigned i = txn->mt_numdbs; i < env->me_numdbs; ++i) { - txn->mt_dbflags[i] = 0; + txn->mt_dbstate[i] = 0; if (env->me_dbflags[i] & MDBX_VALID) { txn->mt_dbs[i].md_flags = env->me_dbflags[i] & PERSISTENT_FLAGS; - txn->mt_dbflags[i] = DB_VALID | DB_USRVALID | DB_STALE; + txn->mt_dbstate[i] = DB_VALID | DB_USRVALID | DB_STALE; mdbx_tassert(txn, txn->mt_dbxs[i].md_cmp != NULL); } } txn->mt_numdbs = env->me_numdbs; } + /* Rescan after mutex acquisition & import handles */ for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { if (!txn->mt_dbxs[scan].md_name.iov_len) { /* Remember this free slot */ @@ -15985,15 +16547,17 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, } if (len == txn->mt_dbxs[scan].md_name.iov_len && !strncmp(table_name, txn->mt_dbxs[scan].md_name.iov_base, len)) { - *dbi = scan; rc = mdbx_dbi_bind(txn, scan, user_flags, keycmp, datacmp); - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) + goto later_bailout; + *dbi = scan; + goto later_exit; } } if (unlikely(slot >= env->me_maxdbs)) { rc = MDBX_DBS_FULL; - goto bailout; + goto later_bailout; } unsigned dbflag = DB_FRESH | DB_VALID | DB_USRVALID; @@ -16006,28 +16570,29 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, db_dummy.md_flags = user_flags & PERSISTENT_FLAGS; data.iov_len = sizeof(db_dummy); data.iov_base = &db_dummy; - WITH_CURSOR_TRACKING( - mc, - rc = mdbx_cursor_put(&mc, &key, &data, F_SUBDATA | MDBX_NOOVERWRITE)); + WITH_CURSOR_TRACKING(couple.outer, + rc = mdbx_cursor_put(&couple.outer, &key, &data, + F_SUBDATA | MDBX_NOOVERWRITE)); if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + goto later_bailout; dbflag |= DB_DIRTY | DB_CREAT; } /* Got info, register DBI in this txn */ - txn->mt_dbxs[slot].md_cmp = nullptr; - txn->mt_dbxs[slot].md_dcmp = nullptr; + memset(txn->mt_dbxs + slot, 0, sizeof(MDBX_dbx)); txn->mt_dbs[slot] = *(MDBX_db *)data.iov_base; env->me_dbflags[slot] = 0; rc = mdbx_dbi_bind(txn, slot, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) { mdbx_tassert(txn, (dbflag & DB_CREAT) == 0); - bailout: + later_bailout: + *dbi = 0; + later_exit: mdbx_free(namedup); } else { - txn->mt_dbflags[slot] = (uint8_t)dbflag; + txn->mt_dbstate[slot] = (uint8_t)dbflag; txn->mt_dbxs[slot].md_name.iov_base = namedup; txn->mt_dbxs[slot].md_name.iov_len = len; txn->mt_numdbs += (slot == txn->mt_numdbs); @@ -16037,7 +16602,7 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags, if (env->me_numdbs <= slot) env->me_numdbs = slot + 1; } else { - env->me_dbiseqs[slot] += 1; + env->me_dbiseqs[slot]++; } txn->mt_dbiseqs[slot] = env->me_dbiseqs[slot]; *dbi = slot; @@ -16071,7 +16636,7 @@ int __cold mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) return MDBX_BAD_TXN; - if (unlikely(txn->mt_dbflags[dbi] & DB_STALE)) { + if (unlikely(txn->mt_dbstate[dbi] & DB_STALE)) { rc = mdbx_fetch_sdb(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -16093,7 +16658,6 @@ static int mdbx_dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { env->me_dbflags[dbi] = 0; env->me_dbxs[dbi].md_name.iov_len = 0; mdbx_compiler_barrier(); - env->me_dbiseqs[dbi]++; env->me_dbxs[dbi].md_name.iov_base = NULL; mdbx_free(ptr); return MDBX_SUCCESS; @@ -16130,7 +16694,7 @@ int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, return MDBX_EINVAL; *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS; - *state = txn->mt_dbflags[dbi] & (DB_FRESH | DB_CREAT | DB_DIRTY | DB_STALE); + *state = txn->mt_dbstate[dbi] & (DB_FRESH | DB_CREAT | DB_DIRTY | DB_STALE); return MDBX_SUCCESS; } @@ -16272,13 +16836,14 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) { if (del && dbi >= CORE_DBS) { rc = mdbx_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (likely(rc == MDBX_SUCCESS)) { - txn->mt_dbflags[dbi] = DB_STALE; + txn->mt_dbstate[dbi] = DB_STALE; MDBX_env *env = txn->mt_env; rc = mdbx_fastmutex_acquire(&env->me_dbi_lock); if (unlikely(rc != MDBX_SUCCESS)) { txn->mt_flags |= MDBX_TXN_ERROR; goto bailout; } + env->me_dbiseqs[dbi]++; mdbx_dbi_close_locked(env, dbi); mdbx_ensure(env, mdbx_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); @@ -16287,7 +16852,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) { } } else { /* reset the DB record, mark it dirty */ - txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbstate[dbi] |= DB_DIRTY; txn->mt_dbs[dbi].md_depth = 0; txn->mt_dbs[dbi].md_branch_pages = 0; txn->mt_dbs[dbi].md_leaf_pages = 0; @@ -16761,21 +17326,23 @@ int mdbx_txn_straggler(const MDBX_txn *txn, int *percent) typedef struct mdbx_walk_ctx { void *mw_user; MDBX_pgvisitor_func *mw_visitor; - MDBX_cursor mw_cursor; + MDBX_txn *mw_txn; + MDBX_cursor *mw_cursor; + bool mw_dont_check_keys_ordering; } mdbx_walk_ctx_t; +static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db, + const char *name, int deep); /* Depth-first tree traversal. */ -static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, - pgno_t pgno, int deep) { - if (unlikely(pgno == P_INVALID)) - return MDBX_SUCCESS; /* empty db */ - +static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno, + const char *name, int deep) { + assert(pgno != P_INVALID); MDBX_page *mp; - int rc = mdbx_page_get(&ctx->mw_cursor, pgno, &mp, NULL); + int rc = mdbx_page_get(ctx->mw_cursor, pgno, &mp, NULL); if (unlikely(rc != MDBX_SUCCESS)) return rc; - rc = mdbx_page_check(ctx->mw_cursor.mc_txn->mt_env, mp, false); + rc = mdbx_page_check(ctx->mw_cursor, mp, 0); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -16832,10 +17399,10 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, const pgno_t large_pgno = node_largedata_pgno(node); MDBX_page *op; - rc = mdbx_page_get(&ctx->mw_cursor, large_pgno, &op, NULL); + rc = mdbx_page_get(ctx->mw_cursor, large_pgno, &op, NULL); if (unlikely(rc != MDBX_SUCCESS)) return rc; - rc = mdbx_page_check(ctx->mw_cursor.mc_txn->mt_env, op, false); + rc = mdbx_page_check(ctx->mw_cursor, op, 0); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -16848,12 +17415,12 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, const size_t over_header = PAGEHDRSZ; const size_t over_payload = node_ds(node); const size_t over_unused = - pgno2bytes(ctx->mw_cursor.mc_txn->mt_env, op->mp_pages) - + pgno2bytes(ctx->mw_cursor->mc_txn->mt_env, op->mp_pages) - over_payload - over_header; rc = ctx->mw_visitor( - large_pgno, op->mp_pages, ctx->mw_user, deep, dbi, - pgno2bytes(ctx->mw_cursor.mc_txn->mt_env, op->mp_pages), + large_pgno, op->mp_pages, ctx->mw_user, deep, name, + pgno2bytes(ctx->mw_cursor->mc_txn->mt_env, op->mp_pages), MDBX_page_large, 1, over_payload, over_header, over_unused); } break; @@ -16909,7 +17476,7 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, } } - rc = ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, dbi, node_ds(node), + rc = ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), subtype, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); header_size += subheader_size; @@ -16926,8 +17493,8 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, return rc; } - rc = ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, deep, dbi, - ctx->mw_cursor.mc_txn->mt_env->me_psize, type, nkeys, + rc = ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, deep, name, + ctx->mw_cursor->mc_txn->mt_env->me_psize, type, nkeys, payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -16939,7 +17506,7 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, MDBX_node *node = page_node(mp, i); if (type == MDBX_page_branch) { - rc = mdbx_env_walk(ctx, dbi, node_pgno(node), deep + 1); + rc = mdbx_walk_tree(ctx, node_pgno(node), name, deep + 1); if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_RESULT_TRUE) return rc; @@ -16959,17 +17526,17 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) return MDBX_CORRUPTED; - char namebuf_onstask[142]; - char *const name = (namelen < sizeof(namebuf_onstask)) - ? namebuf_onstask - : mdbx_malloc(namelen + 1); - if (name) { - memcpy(name, node_key(node), namelen); - name[namelen] = 0; + char namebuf_onstask[64]; + char *const sub_name = (namelen < sizeof(namebuf_onstask)) + ? namebuf_onstask + : mdbx_malloc(namelen + 1); + if (sub_name) { + memcpy(sub_name, node_key(node), namelen); + sub_name[namelen] = 0; memcpy(&db, node_data(node), sizeof(db)); - rc = mdbx_env_walk(ctx, name, db.md_root, deep + 1); - if (name != namebuf_onstask) - mdbx_free(name); + rc = mdbx_walk_sdb(ctx, &db, sub_name, deep + 1); + if (sub_name != namebuf_onstask) + mdbx_free(sub_name); } else { rc = MDBX_ENOMEM; } @@ -16979,8 +17546,19 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, if (unlikely(node_ds(node) != sizeof(MDBX_db))) return MDBX_CORRUPTED; + if (unlikely(ctx->mw_cursor->mc_xcursor == NULL)) + return MDBX_CORRUPTED; + memcpy(&db, node_data(node), sizeof(db)); - rc = mdbx_env_walk(ctx, dbi, db.md_root, deep + 1); + assert(ctx->mw_cursor->mc_xcursor == + &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); + ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; + rc = mdbx_walk_tree(ctx, db.md_root, name, deep + 1); + MDBX_xcursor *inner_xcursor = + container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); + MDBX_cursor_couple *couple = + container_of(inner_xcursor, MDBX_cursor_couple, inner); + ctx->mw_cursor = &couple->outer; break; } @@ -16991,18 +17569,41 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, return MDBX_SUCCESS; } +static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db, + const char *name, int deep) { + if (unlikely(db->md_root == P_INVALID)) + return MDBX_SUCCESS; /* empty db */ + + MDBX_cursor_couple couple; + MDBX_dbx dbx = {.md_klen_min = INT_MAX}; + uint8_t dbstate = DB_VALID | DB_AUDITED; + int rc = mdbx_couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbstate); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (ctx->mw_dont_check_keys_ordering) { + couple.outer.mc_flags |= C_SKIPORD; + couple.inner.mx_cursor.mc_flags |= C_SKIPORD; + } + couple.outer.mc_next = ctx->mw_cursor; + ctx->mw_cursor = &couple.outer; + rc = mdbx_walk_tree(ctx, db->md_root, name, deep); + ctx->mw_cursor = couple.outer.mc_next; + return rc; +} + int __cold mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *user) { + void *user, int dont_check_keys_ordering) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; mdbx_walk_ctx_t ctx; memset(&ctx, 0, sizeof(ctx)); - ctx.mw_cursor.mc_snum = 1; - ctx.mw_cursor.mc_txn = txn; + ctx.mw_txn = txn; ctx.mw_user = user; ctx.mw_visitor = visitor; + ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering != 0; rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META, pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, NUM_METAS, @@ -17010,10 +17611,9 @@ int __cold mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, (txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * NUM_METAS); if (!MDBX_IS_ERROR(rc)) - rc = mdbx_env_walk(&ctx, MDBX_PGWALK_GC, txn->mt_dbs[FREE_DBI].md_root, 0); + rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0); if (!MDBX_IS_ERROR(rc)) - rc = - mdbx_env_walk(&ctx, MDBX_PGWALK_MAIN, txn->mt_dbs[MAIN_DBI].md_root, 0); + rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0); if (!MDBX_IS_ERROR(rc)) rc = visitor(P_INVALID, 0, user, INT_MIN, NULL, 0, MDBX_page_void, 0, 0, 0, 0); @@ -17112,7 +17712,7 @@ int mdbx_cursor_eof(const MDBX_cursor *mc) { struct diff_result { ptrdiff_t diff; - int level; + unsigned level; int root_nkeys; }; @@ -17203,7 +17803,7 @@ __hot static ptrdiff_t estimate(const MDBX_db *db, * level-N: branch-page(s) => scale = leaf-factor * leaf-level: leaf-page(s) => scale = 1 */ - ptrdiff_t btree_power = db->md_depth - 2 - dr->level; + ptrdiff_t btree_power = (ptrdiff_t)db->md_depth - 2 - (ptrdiff_t)dr->level; if (btree_power < 0) return dr->diff; @@ -17339,11 +17939,6 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -static int mdbx_is_samedata(const MDBX_val *a, const MDBX_val *b) { - return a->iov_len == b->iov_len && - memcmp(a->iov_base, b->iov_base, a->iov_len) == 0; -} - int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, MDBX_val *begin_data, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *size_items) { @@ -17410,7 +18005,8 @@ int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, end_key = begin_key; } if (end_key && !begin_data && !end_data && - (begin_key == end_key || mdbx_is_samedata(begin_key, end_key))) { + (begin_key == end_key || + begin.outer.mc_dbx->md_cmp(begin_key, end_key) == 0)) { /* LY: single key case */ int exact = 0; rc = mdbx_cursor_set(&begin.outer, begin_key, NULL, MDBX_SET, &exact); @@ -17566,7 +18162,7 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (new_data) { /* обновление конкретного дубликата */ - if (mdbx_is_samedata(old_data, new_data)) + if (cx.outer.mc_dbx->md_dcmp(old_data, new_data) == 0) /* если данные совпадают, то ничего делать не надо */ goto bailout; } @@ -17589,8 +18185,7 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_page *page = cx.outer.mc_pg[cx.outer.mc_top]; if (txn->mt_dbs[dbi].md_flags & MDBX_DUPSORT) { if (flags & MDBX_CURRENT) { - /* для не-уникальных ключей позволяем update/delete только если ключ - * один */ + /* disallow update/delete for multi-values */ MDBX_node *node = page_node(page, cx.outer.mc_ki[cx.outer.mc_top]); if (F_ISSET(node_flags(node), F_DUPDATA)) { mdbx_tassert(txn, XCURSOR_INITED(&cx.outer) && @@ -17601,7 +18196,8 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, } } /* если данные совпадают, то ничего делать не надо */ - if (new_data && mdbx_is_samedata(&present_data, new_data)) { + if (new_data && + cx.outer.mc_dbx->md_dcmp(&present_data, new_data) == 0) { *old_data = *new_data; goto bailout; } @@ -17610,14 +18206,15 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * но здесь это в любом случае допустимо, так как мы * проверили что для ключа есть только одно значение. */ } else if ((flags & MDBX_NODUPDATA) && - mdbx_is_samedata(&present_data, new_data)) { + cx.outer.mc_dbx->md_dcmp(&present_data, new_data) == 0) { /* если данные совпадают и установлен MDBX_NODUPDATA */ rc = MDBX_KEYEXIST; goto bailout; } } else { /* если данные совпадают, то ничего делать не надо */ - if (new_data && mdbx_is_samedata(&present_data, new_data)) { + if (new_data && + cx.outer.mc_dbx->md_dcmp(&present_data, new_data) == 0) { *old_data = *new_data; goto bailout; } @@ -17736,7 +18333,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, if (unlikely(TXN_DBI_CHANGED(txn, dbi))) return MDBX_BAD_DBI; - if (unlikely(txn->mt_dbflags[dbi] & DB_STALE)) { + if (unlikely(txn->mt_dbstate[dbi] & DB_STALE)) { rc = mdbx_fetch_sdb(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -17757,7 +18354,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, mdbx_tassert(txn, new > dbs->md_seq); dbs->md_seq = new; txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbstate[dbi] |= DB_DIRTY; } return MDBX_SUCCESS; @@ -17784,6 +18381,7 @@ __cold intptr_t mdbx_limits_dbsize_max(intptr_t pagesize) { !is_powerof2((size_t)pagesize))) return -1; + STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); const uint64_t limit = MAX_PAGENO * (uint64_t)pagesize; return (limit < (intptr_t)MAX_MAPSIZE) ? (intptr_t)limit : (intptr_t)MAX_MAPSIZE; @@ -17797,6 +18395,7 @@ __cold intptr_t mdbx_limits_txnsize_max(intptr_t pagesize) { !is_powerof2((size_t)pagesize))) return -1; + STATIC_ASSERT(MAX_MAPSIZE < INTPTR_MAX); const uint64_t limit = pagesize * (uint64_t)(MDBX_DPL_TXNFULL - 1); return (limit < (intptr_t)MAX_MAPSIZE) ? (intptr_t)limit : (intptr_t)MAX_MAPSIZE; @@ -18033,7 +18632,7 @@ int mdbx_set_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) - return MDBX_VERSION_MISMATCH; + return MDBX_EBADSIGN; if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) return MDBX_EINVAL; @@ -18214,6 +18813,9 @@ __dll_export #else #error "FIXME: Unsupported byte order" #endif /* __BYTE_ORDER__ */ +#if MDBX_HUGE_TRANSACTIONS + " MDBX_HUGE_TRANSACTIONS=YES" +#endif /* MDBX_HUGE_TRANSACTIONS */ " MDBX_TXN_CHECKPID=" MDBX_TXN_CHECKPID_CONFIG " MDBX_TXN_CHECKOWNER=" MDBX_TXN_CHECKOWNER_CONFIG " MDBX_64BIT_ATOMIC=" MDBX_64BIT_ATOMIC_CONFIG diff --git a/libs/libmdbx/src/src/internals.h b/libs/libmdbx/src/src/internals.h index 7ef818ca35..c751e912af 100644 --- a/libs/libmdbx/src/src/internals.h +++ b/libs/libmdbx/src/src/internals.h @@ -207,10 +207,13 @@ typedef uint32_t pgno_t; #define MAX_PAGENO UINT32_C(0x7FFFffff) #define MIN_PAGENO NUM_METAS +#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000) + /* A transaction ID. */ typedef uint64_t txnid_t; #define PRIaTXN PRIi64 #define MIN_TXNID UINT64_C(1) +#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1) #define INVALID_TXNID UINT64_MAX /* LY: for testing non-atomic 64-bit txnid on 32-bit arches. * #define MDBX_TXNID_STEP (UINT32_MAX / 3) */ @@ -251,8 +254,6 @@ typedef union mdbx_safe64 { }; } mdbx_safe64_t; -#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000) - /* Information about a single database in the environment. */ typedef struct MDBX_db { uint16_t md_flags; /* see mdbx_dbi_open */ @@ -646,9 +647,16 @@ typedef MDBX_DP *MDBX_DPL; #define MDBX_PNL_GRANULATE 1024 #define MDBX_PNL_INITIAL \ (MDBX_PNL_GRANULATE - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) + +#if MDBX_HUGE_TRANSACTIONS +#define MDBX_PNL_MAX \ + ((1u << 26) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) +#define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 2) +#else #define MDBX_PNL_MAX \ ((1u << 24) - 2 - MDBX_ASSUME_MALLOC_OVERHEAD / sizeof(pgno_t)) #define MDBX_DPL_TXNFULL (MDBX_PNL_MAX / 4) +#endif /* MDBX_HUGE_TRANSACTIONS */ #define MDBX_TXL_GRANULATE 32 #define MDBX_TXL_INITIAL \ @@ -681,9 +689,12 @@ typedef MDBX_DP *MDBX_DPL; * The information here is mostly static/read-only. There is * only a single copy of this record in the environment. */ typedef struct MDBX_dbx { - MDBX_val md_name; /* name of the database */ - MDBX_cmp_func *md_cmp; /* function for comparing keys */ - MDBX_cmp_func *md_dcmp; /* function for comparing data items */ + MDBX_val md_name; /* name of the database */ + MDBX_cmp_func *md_cmp; /* function for comparing keys */ + MDBX_cmp_func *md_dcmp; /* function for comparing data items */ + size_t md_klen_min, md_klen_max; /* min/max key length for the database */ + size_t md_vlen_min, + md_vlen_max; /* min/max value/data length for the database */ } MDBX_dbx; /* A database transaction. @@ -726,6 +737,8 @@ struct MDBX_txn { MDBX_db *mt_dbs; /* Array of sequence numbers for each DB handle */ unsigned *mt_dbiseqs; + /* In write txns, array of cursors for each DB */ + MDBX_cursor **mt_cursors; /* Transaction DB Flags */ #define DB_DIRTY MDBX_TBL_DIRTY /* DB was written in this txn */ @@ -736,10 +749,8 @@ struct MDBX_txn { #define DB_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ #define DB_DUPDATA 0x40 /* DB is MDBX_DUPSORT data */ #define DB_AUDITED 0x80 /* Internal flag for accounting during audit */ - /* In write txns, array of cursors for each DB */ - MDBX_cursor **mt_cursors; /* Array of flags for each DB */ - uint8_t *mt_dbflags; + uint8_t *mt_dbstate; /* Number of DB records in use, or 0 when the txn is finished. * This number only ever increments until the txn finishes; we * don't decrement it when individual DB handles are closed. */ @@ -822,18 +833,26 @@ struct MDBX_cursor { MDBX_db *mc_db; /* The database auxiliary record for this cursor */ MDBX_dbx *mc_dbx; - /* The mt_dbflag for this database */ - uint8_t *mc_dbflag; - uint16_t mc_snum; /* number of pushed pages */ - uint16_t mc_top; /* index of top page, normally mc_snum-1 */ - /* Cursor state flags. */ -#define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */ -#define C_EOF 0x02 /* No more data */ -#define C_SUB 0x04 /* Cursor is a sub-cursor */ -#define C_DEL 0x08 /* last op was a cursor_del */ -#define C_UNTRACK 0x10 /* Un-track cursor when closing */ -#define C_RECLAIMING 0x20 /* GC lookup is prohibited */ -#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */ + /* The mt_dbstate for this database */ + uint8_t *mc_dbstate; + unsigned mc_snum; /* number of pushed pages */ + unsigned mc_top; /* index of top page, normally mc_snum-1 */ + + /* Cursor state flags. */ +#define C_INITIALIZED 0x01 /* cursor has been initialized and is valid */ +#define C_EOF 0x02 /* No more data */ +#define C_SUB 0x04 /* Cursor is a sub-cursor */ +#define C_DEL 0x08 /* last op was a cursor_del */ +#define C_UNTRACK 0x10 /* Un-track cursor when closing */ +#define C_RECLAIMING 0x20 /* GC lookup is prohibited */ +#define C_GCFREEZE 0x40 /* reclaimed_pglist must not be updated */ + + /* Cursor checing flags. */ +#define C_COPYING 0x100 /* skip key-value length check (copying simplify) */ +#define C_UPDATING 0x200 /* update/rebalance pending */ +#define C_RETIRING 0x400 /* refs to child pages may be invalid */ +#define C_SKIPORD 0x800 /* don't check keys ordering */ + unsigned mc_flags; /* see mdbx_cursor */ MDBX_page *mc_pg[CURSOR_STACK]; /* stack of pushed pages */ indx_t mc_ki[CURSOR_STACK]; /* stack of page indices */ @@ -850,8 +869,8 @@ typedef struct MDBX_xcursor { MDBX_db mx_db; /* The auxiliary DB record for this Dup DB */ MDBX_dbx mx_dbx; - /* The mt_dbflag for this Dup DB */ - uint8_t mx_dbflag; + /* The mt_dbstate for this Dup DB */ + uint8_t mx_dbstate; } MDBX_xcursor; typedef struct MDBX_cursor_couple { @@ -913,7 +932,7 @@ struct MDBX_env { MDBX_page *me_dpages; /* list of malloc'd blocks for re-use */ /* PNL of pages that became unused in a write txn */ MDBX_PNL me_retired_pages; - /* MDBX_DP of pages written during a write txn. Length MDBX_DPL_TXNFULL. */ + /* MDBX_DP of pages written during a write txn. */ MDBX_DPL me_dirtylist; /* Number of freelist items that can fit in a single overflow page */ unsigned me_maxgc_ov1page; @@ -1235,11 +1254,6 @@ MDBX_INTERNAL_FUNC void mdbx_rthc_thread_dtor(void *ptr); * Used in pages of type P_BRANCH and P_LEAF without P_LEAF2. * We guarantee 2-byte alignment for 'MDBX_node's. * - * mn_lo and mn_hi are used for data size on leaf nodes, and for child - * pgno on branch nodes. On 64 bit platforms, mn_flags is also used - * for pgno. (Branch nodes have no flags). Lo and hi are in host byte - * order in case some accesses can be optimized to 32-bit word access. - * * Leaf node flags describe node contents. F_BIGDATA says the node's * data part is the page number of an overflow page with actual data. * F_DUPDATA and F_SUBDATA can be combined giving duplicate data in @@ -1247,9 +1261,6 @@ MDBX_INTERNAL_FUNC void mdbx_rthc_thread_dtor(void *ptr); typedef struct MDBX_node { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ union { - struct { - uint16_t mn_lo, mn_hi; /* part of data size or pgno */ - }; uint32_t mn_dsize; uint32_t mn_pgno32; }; @@ -1263,9 +1274,6 @@ typedef struct MDBX_node { union { uint32_t mn_pgno32; uint32_t mn_dsize; - struct { - uint16_t mn_hi, mn_lo; /* part of data size or pgno */ - }; }; #endif /* __BYTE_ORDER__ */ @@ -1280,11 +1288,11 @@ typedef struct MDBX_node { } MDBX_node; #define MDBX_VALID 0x8000 /* DB handle is valid, for me_dbflags */ -#define PERSISTENT_FLAGS (0xffff & ~(MDBX_VALID)) +#define PERSISTENT_FLAGS (0xffff & ~(MDBX_VALID | MDBX_NOSUBDIR)) /* mdbx_dbi_open() flags */ #define VALID_FLAGS \ (MDBX_REVERSEKEY | MDBX_DUPSORT | MDBX_INTEGERKEY | MDBX_DUPFIXED | \ - MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_CREATE) + MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_CREATE | MDBX_ACCEDE) /* max number of pages to commit in one writev() call */ #define MDBX_COMMIT_PAGES 64 diff --git a/libs/libmdbx/src/src/lck-posix.c b/libs/libmdbx/src/src/lck-posix.c index 6b6127bd4b..9038739f1e 100644 --- a/libs/libmdbx/src/src/lck-posix.c +++ b/libs/libmdbx/src/src/lck-posix.c @@ -477,8 +477,10 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env, mdbx_assert(env, rc == 0); if (rc == 0) { + const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages == 0; mdbx_munmap(&env->me_lck_mmap); - rc = ftruncate(env->me_lfd, 0) ? errno : 0; + if (synced) + rc = ftruncate(env->me_lfd, 0) ? errno : 0; } mdbx_jitter4testing(false); diff --git a/libs/libmdbx/src/src/lck-windows.c b/libs/libmdbx/src/src/lck-windows.c index 19a71cc337..8ec4659a28 100644 --- a/libs/libmdbx/src/src/lck-windows.c +++ b/libs/libmdbx/src/src/lck-windows.c @@ -133,10 +133,9 @@ static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset, #define LCK_MAXLEN (1u + (size_t)(MAXSSIZE_T)) #define LCK_META_OFFSET 0 -#define LCK_META_LEN 0x10000u +#define LCK_META_LEN (MAX_PAGESIZE * NUM_METAS) #define LCK_BODY_OFFSET LCK_META_LEN #define LCK_BODY_LEN (LCK_MAXLEN - LCK_BODY_OFFSET) -#define LCK_META LCK_META_OFFSET, LCK_META_LEN #define LCK_BODY LCK_BODY_OFFSET, LCK_BODY_LEN #define LCK_WHOLE 0, LCK_MAXLEN @@ -345,13 +344,6 @@ mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) { * i.e. free/shared/exclusive x free/shared/exclusive == 9. * Only 6 states of FSM are used, which 2 of ones are transitive. * - * The mdbx_lck_seize() moves the locking-FSM from the initial free/unlocked - * state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible, - * or to the "used" (and returns MDBX_RESULT_FALSE). - * - * The mdbx_lck_downgrade() moves the locking-FSM from "exclusive write" - * state to the "used" (i.e. shared) state. - * * States: * ?-? = free, i.e. unlocked * S-? = used, i.e. shared lock @@ -362,6 +354,16 @@ mdbx_resume_threads_after_remap(mdbx_handle_array_t *array) { * S-E = locked (transitive state) * E-S * E-E = exclusive-write, i.e. exclusive due (re)initialization + * + * The mdbx_lck_seize() moves the locking-FSM from the initial free/unlocked + * state to the "exclusive write" (and returns MDBX_RESULT_TRUE) if possible, + * or to the "used" (and returns MDBX_RESULT_FALSE). + * + * The mdbx_lck_downgrade() moves the locking-FSM from "exclusive write" + * state to the "used" (i.e. shared) state. + * + * The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) + * state to the "exclusive write" state. */ static void lck_unlock(MDBX_env *env) { @@ -397,14 +399,6 @@ static void lck_unlock(MDBX_env *env) { (void)err; SetLastError(ERROR_SUCCESS); - while (funlock(env->me_lazy_fd, LCK_META)) - ; - err = GetLastError(); - assert(err == ERROR_NOT_LOCKED || - (mdbx_RunningUnderWine() && err == ERROR_LOCK_VIOLATION)); - (void)err; - SetLastError(ERROR_SUCCESS); - while (funlock(env->me_lazy_fd, LCK_WHOLE)) ; err = GetLastError(); @@ -415,30 +409,6 @@ static void lck_unlock(MDBX_env *env) { } } -MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env, - MDBX_env *inprocess_neighbor, - int global_uniqueness_flag) { - (void)env; - (void)inprocess_neighbor; - (void)global_uniqueness_flag; - return MDBX_SUCCESS; -} - -MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor) { - (void)inprocess_neighbor; - - /* LY: should unmap before releasing the locks to avoid race condition and - * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ - if (env->me_map) - mdbx_munmap(&env->me_dxb_mmap); - if (env->me_lck) - mdbx_munmap(&env->me_lck_mmap); - - lck_unlock(env); - return MDBX_SUCCESS; -} - /* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE) * or as 'used' (S-? and returns MDBX_RESULT_FALSE). * Oherwise returns an error. */ @@ -535,43 +505,91 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) { } MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) { - /* Transite from exclusive state (E-?) to used (S-?) */ + /* Transite from exclusive-write state (E-E) to used (S-?) */ assert(env->me_lazy_fd != INVALID_HANDLE_VALUE); assert(env->me_lfd != INVALID_HANDLE_VALUE); -#if 1 if (env->me_flags & MDBX_EXCLUSIVE) return MDBX_SUCCESS /* nope since files were must be opened non-shareable */ ; -#else - /* 1) must be at E-E (exclusive-write) */ - if (env->me_flags & MDBX_EXCLUSIVE) { - /* transite from E-E to E_? (exclusive-read) */ - if (!funlock(env->me_lfd, LCK_UPPER)) - mdbx_panic("%s(%s) failed: err %u", __func__, - "E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError()); - return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */; - } -#endif - - /* 3) now at E-E (exclusive-write), transite to ?_E (middle) */ + /* 1) now at E-E (exclusive-write), transite to ?_E (middle) */ if (!funlock(env->me_lfd, LCK_LOWER)) mdbx_panic("%s(%s) failed: err %u", __func__, "E-E(exclusive-write) >> ?-E(middle)", GetLastError()); - /* 4) now at ?-E (middle), transite to S-E (locked) */ + /* 2) now at ?-E (middle), transite to S-E (locked) */ if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) { - int rc = GetLastError() /* 5) something went wrong, give up */; + int rc = GetLastError() /* 3) something went wrong, give up */; mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc); return rc; } - /* 6) got S-E (locked), continue transition to S-? (used) */ + /* 4) got S-E (locked), continue transition to S-? (used) */ if (!funlock(env->me_lfd, LCK_UPPER)) mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)", GetLastError()); - return MDBX_SUCCESS /* 7) now at S-? (used), done */; + return MDBX_SUCCESS /* 5) now at S-? (used), done */; +} + +MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { + /* Transite from used state (S-?) to exclusive-write (E-E) */ + assert(env->me_lfd != INVALID_HANDLE_VALUE); + + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_SUCCESS /* nope since files were must be opened non-shareable */ + ; + + int rc; + /* 1) now on S-? (used), try S-E (locked) */ + mdbx_jitter4testing(false); + if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER)) { + rc = GetLastError() /* 2) something went wrong, give up */; + mdbx_verbose("%s, err %u", "S-?(used) >> S-E(locked)", rc); + return rc; + } + + /* 3) now on S-E (locked), transite to ?-E (middle) */ + if (!funlock(env->me_lfd, LCK_LOWER)) + mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> ?-E(middle)", + GetLastError()); + + /* 4) now on ?-E (middle), try E-E (exclusive-write) */ + mdbx_jitter4testing(false); + if (!flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER)) { + rc = GetLastError() /* 5) something went wrong, give up */; + mdbx_verbose("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc); + return rc; + } + + return MDBX_SUCCESS /* 6) now at E-E (exclusive-write), done */; +} + +MDBX_INTERNAL_FUNC int mdbx_lck_init(MDBX_env *env, + MDBX_env *inprocess_neighbor, + int global_uniqueness_flag) { + (void)env; + (void)inprocess_neighbor; + (void)global_uniqueness_flag; + return MDBX_SUCCESS; +} + +MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env, + MDBX_env *inprocess_neighbor) { + /* LY: should unmap before releasing the locks to avoid race condition and + * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ + if (env->me_map) + mdbx_munmap(&env->me_dxb_mmap); + if (env->me_lck) { + const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages == 0; + mdbx_munmap(&env->me_lck_mmap); + if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && + mdbx_lck_upgrade(env) == MDBX_SUCCESS) + /* this will fail if LCK is used/mmapped by other process(es) */ + mdbx_ftruncate(env->me_lfd, 0); + } + lck_unlock(env); + return MDBX_SUCCESS; } /*----------------------------------------------------------------------------*/ diff --git a/libs/libmdbx/src/src/man1/mdbx_chk.1 b/libs/libmdbx/src/src/man1/mdbx_chk.1 index d2d03684e2..d6292f3384 100644 --- a/libs/libmdbx/src/src/man1/mdbx_chk.1 +++ b/libs/libmdbx/src/src/man1/mdbx_chk.1 @@ -1,6 +1,6 @@ .\" Copyright 2015-2020 Leonid Yuriev <leo@yuriev.ru>. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_CHK 1 "2020-03-18" "MDBX 0.7.x" +.TH MDBX_CHK 1 "2020-06-05" "MDBX 0.8.x" .SH NAME mdbx_chk \- MDBX checking tool .SH SYNOPSIS diff --git a/libs/libmdbx/src/src/man1/mdbx_copy.1 b/libs/libmdbx/src/src/man1/mdbx_copy.1 index a8427bb4df..6c587abdc3 100644 --- a/libs/libmdbx/src/src/man1/mdbx_copy.1 +++ b/libs/libmdbx/src/src/man1/mdbx_copy.1 @@ -2,7 +2,7 @@ .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_COPY 1 "2020-03-18" "MDBX 0.7.x" +.TH MDBX_COPY 1 "2020-06-05" "MDBX 0.8.x" .SH NAME mdbx_copy \- MDBX environment copy tool .SH SYNOPSIS diff --git a/libs/libmdbx/src/src/man1/mdbx_dump.1 b/libs/libmdbx/src/src/man1/mdbx_dump.1 index 9985127f00..698b7cf414 100644 --- a/libs/libmdbx/src/src/man1/mdbx_dump.1 +++ b/libs/libmdbx/src/src/man1/mdbx_dump.1 @@ -2,7 +2,7 @@ .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DUMP 1 "2020-03-18" "MDBX 0.7.x" +.TH MDBX_DUMP 1 "2020-06-05" "MDBX 0.8.x" .SH NAME mdbx_dump \- MDBX environment export tool .SH SYNOPSIS diff --git a/libs/libmdbx/src/src/man1/mdbx_load.1 b/libs/libmdbx/src/src/man1/mdbx_load.1 index b401b38715..bc3d737e17 100644 --- a/libs/libmdbx/src/src/man1/mdbx_load.1 +++ b/libs/libmdbx/src/src/man1/mdbx_load.1 @@ -2,7 +2,7 @@ .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_LOAD 1 "2020-03-18" "MDBX 0.7.x" +.TH MDBX_LOAD 1 "2020-06-05" "MDBX 0.8.x" .SH NAME mdbx_load \- MDBX environment import tool .SH SYNOPSIS diff --git a/libs/libmdbx/src/src/man1/mdbx_stat.1 b/libs/libmdbx/src/src/man1/mdbx_stat.1 index b7fbeb0833..f510d4e013 100644 --- a/libs/libmdbx/src/src/man1/mdbx_stat.1 +++ b/libs/libmdbx/src/src/man1/mdbx_stat.1 @@ -2,7 +2,7 @@ .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copyright 2015,2016 Peter-Service R&D LLC <http://billing.ru/>. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_STAT 1 "2020-03-18" "MDBX 0.7.x" +.TH MDBX_STAT 1 "2020-06-05" "MDBX 0.8.x" .SH NAME mdbx_stat \- MDBX environment status tool .SH SYNOPSIS diff --git a/libs/libmdbx/src/src/mdbx_chk.c b/libs/libmdbx/src/src/mdbx_chk.c index 64c1d333bc..657a28a730 100644 --- a/libs/libmdbx/src/src/mdbx_chk.c +++ b/libs/libmdbx/src/src/mdbx_chk.c @@ -34,7 +34,7 @@ const flagbit dbflags[] = {{MDBX_DUPSORT, "dupsort"}, {MDBX_DUPFIXED, "dupfixed"}, {MDBX_REVERSEDUP, "reversedup"}, {MDBX_INTEGERDUP, "integerdup"}, - {0, NULL}}; + {0, nullptr}}; #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -122,12 +122,12 @@ static void __printf_args(1, 2) error(const char *msg, ...) { if (!quiet) { va_list args; - fflush(NULL); + fflush(nullptr); va_start(args, msg); fputs(" ! ", stderr); vfprintf(stderr, msg, args); va_end(args); - fflush(NULL); + fflush(nullptr); } } @@ -137,7 +137,7 @@ static int check_user_break(void) { return MDBX_SUCCESS; case 1: print(" - interrupted by signal\n"); - fflush(NULL); + fflush(nullptr); user_break = 2; } return MDBX_EINTR; @@ -148,12 +148,12 @@ static void pagemap_cleanup(void) { i < ARRAY_LENGTH(walk.dbi); ++i) { if (walk.dbi[i].name) { mdbx_free((void *)walk.dbi[i].name); - walk.dbi[i].name = NULL; + walk.dbi[i].name = nullptr; } } mdbx_free(walk.pagemap); - walk.pagemap = NULL; + walk.pagemap = nullptr; } static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) { @@ -177,11 +177,11 @@ static walk_dbi_t *pagemap_lookup_dbi(const char *dbi_name, bool silent) { if (verbose > 0 && !silent) { print(" - found '%s' area\n", dbi_name); - fflush(NULL); + fflush(nullptr); } if (dbi == ARRAY_END(walk.dbi)) - return NULL; + return nullptr; dbi->name = mdbx_strdup(dbi_name); return last = dbi; @@ -222,14 +222,14 @@ static void __printf_args(4, 5) } printf("\n"); if (need_fflush) - fflush(NULL); + fflush(nullptr); } } } static struct problem *problems_push(void) { struct problem *p = problems_list; - problems_list = NULL; + problems_list = nullptr; return p; } @@ -249,7 +249,7 @@ static size_t problems_pop(struct problem *list) { problems_list = p; } print("\n"); - fflush(NULL); + fflush(nullptr); } problems_list = list; @@ -520,6 +520,13 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key, return check_user_break(); } +static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return (a->iov_len == b->iov_len && + memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) + ? 0 + : 1; +} + static int handle_maindb(const uint64_t record_number, const MDBX_val *key, const MDBX_val *data) { char *name; @@ -566,7 +573,10 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler, } if (dbi_handle == ~0u) { - rc = mdbx_dbi_open(txn, dbi_name, 0, &dbi_handle); + rc = mdbx_dbi_open_ex( + txn, dbi_name, MDBX_ACCEDE, &dbi_handle, + (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr, + (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr); if (rc) { if (!dbi_name || rc != @@ -582,7 +592,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler, strcmp(only_subdb, dbi_name) != 0) { if (verbose) { print("Skip processing '%s'...\n", dbi_name); - fflush(NULL); + fflush(nullptr); } skipped_subdb++; return MDBX_SUCCESS; @@ -590,7 +600,7 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler, if (!silent && verbose) { print("Processing '%s'...\n", dbi_name ? dbi_name : "@MAIN"); - fflush(NULL); + fflush(nullptr); } rc = mdbx_dbi_flags(txn, dbi_handle, &flags); @@ -654,13 +664,18 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler, error("mdbx_cursor_open failed, error %d %s\n", rc, mdbx_strerror(rc)); return rc; } + /* if (ignore_wrong_order) { + mc->mc_flags |= C_SKIPORD; + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD; + } */ const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags); saved_list = problems_push(); - prev_key.iov_base = NULL; + prev_key.iov_base = nullptr; prev_key.iov_len = 0; - prev_data.iov_base = NULL; + prev_data.iov_base = nullptr; prev_data.iov_len = 0; rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST); while (rc == MDBX_SUCCESS) { @@ -697,26 +712,26 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler, } if (!bad_key) { - int cmp = mdbx_cmp(txn, dbi_handle, &prev_key, &key); + int cmp = mdbx_cmp(txn, dbi_handle, &key, &prev_key); if (cmp == 0) { ++dups; if ((flags & MDBX_DUPSORT) == 0) { - problem_add("entry", record_count, "duplicated entries", NULL); + problem_add("entry", record_count, "duplicated entries", nullptr); if (data.iov_len == prev_data.iov_len && memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) { - problem_add("entry", record_count, "complete duplicate", NULL); + problem_add("entry", record_count, "complete duplicate", nullptr); } } else if (!bad_data) { - cmp = mdbx_dcmp(txn, dbi_handle, &prev_data, &data); + cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data); if (cmp == 0) { - problem_add("entry", record_count, "complete duplicate", NULL); - } else if (cmp > 0 && !ignore_wrong_order) { + problem_add("entry", record_count, "complete duplicate", nullptr); + } else if (cmp < 0 && !ignore_wrong_order) { problem_add("entry", record_count, "wrong order of multi-values", - NULL); + nullptr); } } - } else if (cmp > 0 && !ignore_wrong_order) { - problem_add("entry", record_count, "wrong order of entries", NULL); + } else if (cmp < 0 && !ignore_wrong_order) { + problem_add("entry", record_count, "wrong order of entries", nullptr); } } } else if (verbose) { @@ -757,7 +772,7 @@ bailout: " key's bytes, %" PRIu64 " data's " "bytes, %" PRIu64 " problems\n", record_count, dups, key_bytes, data_bytes, problems_count); - fflush(NULL); + fflush(nullptr); } mdbx_cursor_close(mc); @@ -765,19 +780,17 @@ bailout: } static void usage(char *prog) { - fprintf( - stderr, - "usage: %s [-V] [-v] [-n] [-q] [-c] [-w] [-d] [-i] [-s subdb] dbpath\n" - " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be used multiple times\n" - " -n\t\tNOSUBDIR mode for open\n" - " -q\t\tbe quiet\n" - " -c\t\tforce cooperative mode (don't try exclusive)\n" - " -w\t\tlock DB for writing while checking\n" - " -d\t\tdisable page-by-page traversal of B-tree\n" - " -i\t\tignore wrong order errors (for custom comparators case)\n" - " -s subdb\tprocess a specific subdatabase only\n", - prog); + fprintf(stderr, + "usage: %s [-V] [-v] [-q] [-c] [-w] [-d] [-i] [-s subdb] dbpath\n" + " -V\t\tprint version and exit\n" + " -v\t\tmore verbose, could be used multiple times\n" + " -q\t\tbe quiet\n" + " -c\t\tforce cooperative mode (don't try exclusive)\n" + " -w\t\tlock DB for writing while checking\n" + " -d\t\tdisable page-by-page traversal of B-tree\n" + " -i\t\tignore wrong order errors (for custom comparators case)\n" + " -s subdb\tprocess a specific subdatabase only\n", + prog); exit(EXIT_INTERRUPTED); } @@ -943,7 +956,7 @@ int main(int argc, char *argv[]) { if (argc < 2) usage(prog); - for (int i; (i = getopt(argc, argv, "Vvqnwcdsi:")) != EOF;) { + for (int i; (i = getopt(argc, argv, "Vvqnwcdis:")) != EOF;) { switch (i) { case 'V': printf("mdbx_chk version %d.%d.%d.%d\n" @@ -1011,7 +1024,7 @@ int main(int argc, char *argv[]) { mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, (envflags & MDBX_RDONLY) ? "only" : "write"); - fflush(NULL); + fflush(nullptr); rc = mdbx_env_create(&env); if (rc) { @@ -1057,7 +1070,7 @@ int main(int argc, char *argv[]) { locked = true; } - rc = mdbx_txn_begin(env, NULL, MDBX_RDONLY, &txn); + rc = mdbx_txn_begin(env, nullptr, MDBX_RDONLY, &txn); if (rc) { error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc)); goto bailout; @@ -1252,7 +1265,7 @@ int main(int argc, char *argv[]) { uint64_t empty_pages, lost_bytes; print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid); - fflush(NULL); + fflush(nullptr); walk.pagemap = mdbx_calloc((size_t)backed_pages, sizeof(*walk.pagemap)); if (!walk.pagemap) { rc = errno ? errno : MDBX_ENOMEM; @@ -1261,7 +1274,7 @@ int main(int argc, char *argv[]) { } saved_list = problems_push(); - rc = mdbx_env_pgwalk(txn, pgvisitor, NULL); + rc = mdbx_env_pgwalk(txn, pgvisitor, nullptr, ignore_wrong_order); traversal_problems = problems_pop(saved_list); if (rc) { @@ -1283,8 +1296,8 @@ int main(int argc, char *argv[]) { if (verbose) { uint64_t total_page_bytes = walk.pgcount * envstat.ms_psize; - print(" - pages: total %" PRIu64 ", unused %" PRIu64 "\n", walk.pgcount, - unused_pages); + print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n", + walk.pgcount, unused_pages); if (verbose > 1) { for (walk_dbi_t *dbi = walk.dbi; dbi < ARRAY_END(walk.dbi) && dbi->name; ++dbi) { @@ -1348,12 +1361,12 @@ int main(int argc, char *argv[]) { } } else if (verbose) { print("Skipping b-tree walk...\n"); - fflush(NULL); + fflush(nullptr); } if (!verbose) print("Iterating DBIs...\n"); - problems_maindb = process_db(~0u, /* MAIN_DBI */ NULL, NULL, false); + problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr, false); problems_freedb = process_db(FREE_DBI, "@GC", handle_freedb, false); if (verbose) { @@ -1403,7 +1416,7 @@ int main(int argc, char *argv[]) { "monopolistic or read-write mode only)\n"); } - if (!process_db(MAIN_DBI, NULL, handle_maindb, true)) { + if (!process_db(MAIN_DBI, nullptr, handle_maindb, true)) { if (!userdb_count && verbose) print(" - does not contain multiple databases\n"); } @@ -1415,7 +1428,7 @@ int main(int argc, char *argv[]) { print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64 "\n", envinfo.mi_recent_txnid); - fflush(NULL); + fflush(nullptr); if (locked) { mdbx_txn_unlock(env); locked = false; @@ -1440,7 +1453,7 @@ bailout: const bool dont_sync = rc != 0 || total_problems; mdbx_env_close_ex(env, dont_sync); } - fflush(NULL); + fflush(nullptr); if (rc) { if (rc < 0) return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS; diff --git a/libs/libmdbx/src/src/mdbx_copy.c b/libs/libmdbx/src/src/mdbx_copy.c index 81d177ab99..cde9573a15 100644 --- a/libs/libmdbx/src/src/mdbx_copy.c +++ b/libs/libmdbx/src/src/mdbx_copy.c @@ -45,11 +45,10 @@ static void signal_handler(int sig) { static void usage(const char *prog) { fprintf(stderr, - "usage: %s [-V] [-q] [-c] [-n] src_path [dest_path]\n" + "usage: %s [-V] [-q] [-c] src_path [dest_path]\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" " -c\t\tenable compactification (skip unused pages)\n" - " -n\t\tNOSUBDIR mode for open\n" " src_path\tsource database\n" " dest_path\tdestination (stdout if not specified)\n", prog); diff --git a/libs/libmdbx/src/src/mdbx_dump.c b/libs/libmdbx/src/src/mdbx_dump.c index 8bb057b2df..b921a8139b 100644 --- a/libs/libmdbx/src/src/mdbx_dump.c +++ b/libs/libmdbx/src/src/mdbx_dump.c @@ -26,7 +26,8 @@ #include <ctype.h> #define PRINT 1 -static int mode; +#define GLOBAL 2 +static int mode = GLOBAL; typedef struct flagbit { int bit; @@ -39,7 +40,7 @@ flagbit dbflags[] = {{MDBX_REVERSEKEY, "reversekey"}, {MDBX_DUPFIXED, "dupfixed"}, {MDBX_INTEGERDUP, "integerdup"}, {MDBX_REVERSEDUP, "reversedup"}, - {0, NULL}}; + {0, nullptr}}; #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -92,53 +93,105 @@ static void dumpval(MDBX_val *v) { putchar(' '); c = v->iov_base; end = c + v->iov_len; - while (c < end) { + while (c < end) dumpbyte(*c++); - } putchar('\n'); } +bool quiet = false, rescue = false; +const char *prog; +static void error(const char *func, int rc) { + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); +} + /* Dump in BDB-compatible format */ -static int dumpit(MDBX_txn *txn, MDBX_dbi dbi, char *name) { - MDBX_cursor *mc; - MDBX_stat ms; - MDBX_val key, data; - MDBX_envinfo info; +static int dump_sdb(MDBX_txn *txn, MDBX_dbi dbi, char *name) { unsigned int flags; - int rc, i; - - rc = mdbx_dbi_flags(txn, dbi, &flags); - if (rc) + int rc = mdbx_dbi_flags(txn, dbi, &flags); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_flags", rc); return rc; + } + MDBX_stat ms; rc = mdbx_dbi_stat(txn, dbi, &ms, sizeof(ms)); - if (rc) + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_stat", rc); return rc; + } + MDBX_envinfo info; rc = mdbx_env_info_ex(mdbx_txn_env(txn), txn, &info, sizeof(info)); - if (rc) + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_info_ex", rc); return rc; + } printf("VERSION=3\n"); + if (mode & GLOBAL) { + mode -= GLOBAL; + if (info.mi_geo.upper != info.mi_geo.lower) + printf("geometry=l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 + ",g%" PRIu64 "\n", + info.mi_geo.lower, info.mi_geo.current, info.mi_geo.upper, + info.mi_geo.shrink, info.mi_geo.grow); + printf("mapsize=%" PRIu64 "\n", info.mi_geo.upper); + printf("maxreaders=%u\n", info.mi_maxreaders); + + mdbx_canary canary; + rc = mdbx_canary_get(txn, &canary); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_canary_get", rc); + return rc; + } + if (canary.v) + printf("canary=v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64 "\n", + canary.v, canary.x, canary.y, canary.z); + } printf("format=%s\n", mode & PRINT ? "print" : "bytevalue"); if (name) printf("database=%s\n", name); printf("type=btree\n"); - printf("mapsize=%" PRIu64 "\n", info.mi_geo.upper); - printf("maxreaders=%u\n", info.mi_maxreaders); - - for (i = 0; dbflags[i].bit; i++) + printf("db_pagesize=%u\n", ms.ms_psize); + /* if (ms.ms_mod_txnid) + printf("txnid=%" PRIaTXN "\n", ms.ms_mod_txnid); + else if (!name) + printf("txnid=%" PRIaTXN "\n", mdbx_txn_id(txn)); */ + + printf("duplicates=%d\n", (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_INTEGERDUP | MDBX_REVERSEDUP)) + ? 1 + : 0); + for (int i = 0; dbflags[i].bit; i++) if (flags & dbflags[i].bit) printf("%s=1\n", dbflags[i].name); - printf("db_pagesize=%d\n", ms.ms_psize); - printf("HEADER=END\n"); + uint64_t sequence; + rc = mdbx_dbi_sequence(txn, dbi, &sequence, 0); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", rc); + return rc; + } + if (sequence) + printf("sequence=%" PRIu64 "\n", sequence); + + printf("HEADER=END\n"); /*-------------------------------------------------*/ - rc = mdbx_cursor_open(txn, dbi, &mc); - if (rc) + MDBX_cursor *cursor; + MDBX_val key, data; + rc = mdbx_cursor_open(txn, dbi, &cursor); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); return rc; + } + if (MDBX_DEBUG > 0 && rescue) { + cursor->mc_flags |= C_SKIPORD; + if (cursor->mc_xcursor) + cursor->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD; + } - while ((rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT)) == MDBX_SUCCESS) { + while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == + MDBX_SUCCESS) { if (user_break) { rc = MDBX_EINTR; break; @@ -154,13 +207,14 @@ static int dumpit(MDBX_txn *txn, MDBX_dbi dbi, char *name) { printf("DATA=END\n"); if (rc == MDBX_NOTFOUND) rc = MDBX_SUCCESS; - + if (unlikely(rc != MDBX_SUCCESS)) + error("mdbx_cursor_get", rc); return rc; } -static void usage(char *prog) { +static void usage(void) { fprintf(stderr, - "usage: %s [-V] [-q] [-f file] [-l] [-p] [-a|-s subdb] [-r] [-n] " + "usage: %s [-V] [-q] [-f file] [-l] [-p] [-a|-s subdb] [-r] " "dbpath\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" @@ -170,24 +224,31 @@ static void usage(char *prog) { " -a\t\tdump main DB and all subDBs,\n" " \t\tby default dump only the main DB\n" " -s\t\tdump only the named subDB\n" - " -r\t\trescure mode (ignore errors to dump corrupted DB)\n" - " -n\t\tNOSUBDIR mode for open\n", + " -r\t\trescure mode (ignore errors to dump corrupted DB)\n", prog); exit(EXIT_FAILURE); } +static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return (a->iov_len == b->iov_len && + memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) + ? 0 + : 1; +} + int main(int argc, char *argv[]) { int i, rc; MDBX_env *env; MDBX_txn *txn; MDBX_dbi dbi; - char *prog = argv[0]; + prog = argv[0]; char *envname; - char *subname = NULL; - int alldbs = 0, envflags = 0, list = 0, quiet = 0, rescue = 0; + char *subname = nullptr; + unsigned envflags = 0; + bool alldbs = false, list = false; if (argc < 2) - usage(prog); + usage(); while ((i = getopt(argc, argv, "af:lnps:Vrq")) != EOF) { switch (i) { @@ -206,16 +267,16 @@ int main(int argc, char *argv[]) { mdbx_build.options); return EXIT_SUCCESS; case 'l': - list = 1; + list = true; /*FALLTHROUGH*/; __fallthrough; case 'a': if (subname) - usage(prog); - alldbs++; + usage(); + alldbs = true; break; case 'f': - if (freopen(optarg, "w", stdout) == NULL) { + if (freopen(optarg, "w", stdout) == nullptr) { fprintf(stderr, "%s: %s: reopen: %s\n", prog, optarg, mdbx_strerror(errno)); exit(EXIT_FAILURE); @@ -229,22 +290,22 @@ int main(int argc, char *argv[]) { break; case 's': if (alldbs) - usage(prog); + usage(); subname = optarg; break; case 'q': - quiet = 1; + quiet = true; break; case 'r': - rescue = 1; + rescue = true; break; default: - usage(prog); + usage(); } } if (optind != argc - 1) - usage(prog); + usage(); #if defined(_WIN32) || defined(_WIN64) SetConsoleCtrlHandler(ConsoleBreakHandlerRoutine, true); @@ -264,78 +325,97 @@ int main(int argc, char *argv[]) { fprintf(stderr, "mdbx_dump %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); - fflush(NULL); + fflush(nullptr); } rc = mdbx_env_create(&env); - if (rc) { - fprintf(stderr, "mdbx_env_create failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_create", rc); return EXIT_FAILURE; } if (alldbs || subname) { - mdbx_env_set_maxdbs(env, 2); + rc = mdbx_env_set_maxdbs(env, 2); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_set_maxdbs", rc); + goto env_close; + } } rc = mdbx_env_open( env, envname, envflags | (rescue ? MDBX_RDONLY | MDBX_EXCLUSIVE : MDBX_RDONLY), 0); - if (rc) { - fprintf(stderr, "mdbx_env_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_open", rc); goto env_close; } - rc = mdbx_txn_begin(env, NULL, MDBX_RDONLY, &txn); - if (rc) { - fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc, - mdbx_strerror(rc)); + rc = mdbx_txn_begin(env, nullptr, MDBX_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_begin", rc); goto env_close; } - rc = mdbx_dbi_open(txn, subname, 0, &dbi); - if (rc) { - fprintf(stderr, "mdbx_open failed, error %d %s\n", rc, mdbx_strerror(rc)); + rc = mdbx_dbi_open(txn, subname, MDBX_ACCEDE, &dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_open", rc); goto txn_abort; } if (alldbs) { - MDBX_cursor *cursor; - MDBX_val key; - int count = 0; + assert(dbi == MAIN_DBI); - rc = mdbx_cursor_open(txn, dbi, &cursor); - if (rc) { - fprintf(stderr, "mdbx_cursor_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + MDBX_cursor *cursor; + rc = mdbx_cursor_open(txn, MAIN_DBI, &cursor); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); goto txn_abort; } - while ((rc = mdbx_cursor_get(cursor, &key, NULL, MDBX_NEXT_NODUP)) == 0) { + if (MDBX_DEBUG > 0 && rescue) { + cursor->mc_flags |= C_SKIPORD; + if (cursor->mc_xcursor) + cursor->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD; + } + + bool have_raw = false; + int count = 0; + MDBX_val key; + while (MDBX_SUCCESS == + (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { if (user_break) { rc = MDBX_EINTR; break; } - char *str; - MDBX_dbi db2; + if (memchr(key.iov_base, '\0', key.iov_len)) continue; - count++; - str = mdbx_malloc(key.iov_len + 1); - memcpy(str, key.iov_base, key.iov_len); - str[key.iov_len] = '\0'; - rc = mdbx_dbi_open(txn, str, 0, &db2); - if (rc == MDBX_SUCCESS) { + subname = mdbx_malloc(key.iov_len + 1); + memcpy(subname, key.iov_base, key.iov_len); + subname[key.iov_len] = '\0'; + + MDBX_dbi sub_dbi; + rc = mdbx_dbi_open_ex(txn, subname, MDBX_ACCEDE, &sub_dbi, + rescue ? equal_or_greater : nullptr, + rescue ? equal_or_greater : nullptr); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_INCOMPATIBLE) { + have_raw = true; + continue; + } + error("mdbx_dbi_open", rc); + if (!rescue) + break; + } else { + count++; if (list) { - printf("%s\n", str); - list++; + printf("%s\n", subname); } else { - rc = dumpit(txn, db2, str); - if (rc) { + rc = dump_sdb(txn, sub_dbi, subname); + if (unlikely(rc != MDBX_SUCCESS)) { if (!rescue) break; fprintf(stderr, "%s: %s: ignore %s for `%s` and continue\n", prog, - envname, mdbx_strerror(rc), str); + envname, mdbx_strerror(rc), subname); /* Here is a hack for rescue mode, don't do that: * - we should restart transaction in case error due * database corruption; @@ -345,39 +425,51 @@ int main(int argc, char *argv[]) { * mode and transaction is the same, i.e. has the same address * and so on. */ rc = mdbx_txn_reset(txn); - if (rc) { - fprintf(stderr, "mdbx_txn_reset failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_reset", rc); goto env_close; } rc = mdbx_txn_renew(txn); - if (rc) { - fprintf(stderr, "mdbx_txn_renew failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_renew", rc); goto env_close; } } } - mdbx_dbi_close(env, db2); + rc = mdbx_dbi_close(env, sub_dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_close", rc); + break; + } } - mdbx_free(str); - if (rc) - continue; + mdbx_free(subname); } mdbx_cursor_close(cursor); - if (!count) { + cursor = nullptr; + + if (have_raw && (!count /* || rescue */)) + rc = dump_sdb(txn, MAIN_DBI, nullptr); + else if (!count) { fprintf(stderr, "%s: %s does not contain multiple databases\n", prog, envname); rc = MDBX_NOTFOUND; - } else if (rc == MDBX_INCOMPATIBLE) { - /* LY: the record it not a named sub-db. */ - rc = MDBX_SUCCESS; } } else { - rc = dumpit(txn, dbi, subname); + rc = dump_sdb(txn, dbi, subname); + } + + switch (rc) { + case MDBX_NOTFOUND: + rc = MDBX_SUCCESS; + case MDBX_SUCCESS: + break; + case MDBX_EINTR: + fprintf(stderr, "Interrupted by signal/user\n"); + break; + default: + if (unlikely(rc != MDBX_SUCCESS)) + error("mdbx_cursor_get", rc); } - if (rc && rc != MDBX_NOTFOUND) - fprintf(stderr, "%s: %s: %s\n", prog, envname, mdbx_strerror(rc)); mdbx_dbi_close(env, dbi); txn_abort: diff --git a/libs/libmdbx/src/src/mdbx_load.c b/libs/libmdbx/src/src/mdbx_load.c index eb39cc1174..463f020706 100644 --- a/libs/libmdbx/src/src/mdbx_load.c +++ b/libs/libmdbx/src/src/mdbx_load.c @@ -45,178 +45,305 @@ static void signal_handler(int sig) { #endif /* !WINDOWS */ -#define PRINT 1 -#define NOHDR 2 -static int mode; - -static char *subname = NULL; +static char *prog; static size_t lineno; -static int version; +static void error(const char *func, int rc) { + if (lineno) + fprintf(stderr, "%s: at input line %" PRIiSIZE ": %s() error %d, %s\n", + prog, lineno, func, rc, mdbx_strerror(rc)); + else + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, + mdbx_strerror(rc)); +} -static int dbi_flags; -static char *prog; -static bool Eof; +static char *valstr(char *line, const char *item) { + const size_t len = strlen(item); + if (strncmp(line, item, len) != 0) + return nullptr; + if (line[len] != '=') { + if (line[len] > ' ') + return nullptr; + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", + prog, lineno, item); + exit(EXIT_FAILURE); + } + char *ptr = strchr(line, '\n'); + if (ptr) + *ptr = '\0'; + return line + len + 1; +} + +static bool valnum(char *line, const char *item, uint64_t *value) { + char *str = valstr(line, item); + if (!str) + return false; + + char *end = nullptr; + *value = strtoull(str, &end, 0); + if (end && *end) { + fprintf(stderr, + "%s: line %" PRIiSIZE ": unexpected number format for '%s'\n", prog, + lineno, item); + exit(EXIT_FAILURE); + } + return true; +} +static bool valbool(char *line, const char *item, bool *value) { + uint64_t u64; + if (!valnum(line, item, &u64)) + return false; + + if (u64 > 1) { + fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected value for '%s'\n", prog, + lineno, item); + exit(EXIT_FAILURE); + } + *value = u64 != 0; + return true; +} + +/*----------------------------------------------------------------------------*/ + +static char *subname = nullptr; +static int dbi_flags; +static txnid_t txnid; +static uint64_t sequence; +static mdbx_canary canary; static MDBX_envinfo envinfo; + +#define PRINT 1 +#define NOHDR 2 +#define GLOBAL 4 +static int mode = GLOBAL; + static MDBX_val kbuf, dbuf; static MDBX_val k0buf; #define STRLENOF(s) (sizeof(s) - 1) typedef struct flagbit { - int bit; + unsigned bit; char *name; - int len; + unsigned len; } flagbit; #define S(s) s, STRLENOF(s) -flagbit dbflags[] = {{MDBX_REVERSEKEY, S("reversekey")}, - {MDBX_DUPSORT, S("dupsort")}, - {MDBX_INTEGERKEY, S("integerkey")}, - {MDBX_DUPFIXED, S("dupfixed")}, - {MDBX_INTEGERDUP, S("integerdup")}, - {MDBX_REVERSEDUP, S("reversedup")}, - {0, NULL, 0}}; +flagbit dbflags[] = { + {MDBX_REVERSEKEY, S("reversekey")}, {MDBX_DUPSORT, S("duplicates")}, + {MDBX_DUPSORT, S("dupsort")}, {MDBX_INTEGERKEY, S("integerkey")}, + {MDBX_DUPFIXED, S("dupfixed")}, {MDBX_INTEGERDUP, S("integerdup")}, + {MDBX_REVERSEDUP, S("reversedup")}, {0, nullptr, 0}}; + +static int readhdr(void) { + /* reset parameters */ + if (subname) { + free(subname); + subname = nullptr; + } + dbi_flags = 0; + txnid = 0; + sequence = 0; -static void readhdr(void) { - char *ptr; + while (true) { + errno = 0; + if (fgets(dbuf.iov_base, (int)dbuf.iov_len, stdin) == nullptr) + return errno ? errno : EOF; + if (user_break) + return MDBX_EINTR; - dbi_flags = 0; - while (fgets(dbuf.iov_base, (int)dbuf.iov_len, stdin) != NULL) { lineno++; + uint64_t u64; + + if (valnum(dbuf.iov_base, "VERSION", &u64)) { + if (u64 != 3) { + fprintf(stderr, + "%s: line %" PRIiSIZE ": unsupported value %" PRIu64 + " for %s\n", + prog, lineno, u64, "VERSION"); + exit(EXIT_FAILURE); + } + continue; + } - if (!strncmp(dbuf.iov_base, "db_pagesize=", STRLENOF("db_pagesize="))) { - envinfo.mi_dxb_pagesize = - atoi((char *)dbuf.iov_base + STRLENOF("db_pagesize=")); + if (valnum(dbuf.iov_base, "db_pagesize", &u64)) { + if (!(mode & GLOBAL) && envinfo.mi_dxb_pagesize != u64) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore value %" PRIu64 + " for '%s' in non-global context\n", + prog, lineno, u64, "db_pagesize"); + else if (u64 < MDBX_MIN_PAGESIZE || u64 > MDBX_MAX_PAGESIZE) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore unsupported value %" PRIu64 + " for %s\n", + prog, lineno, u64, "db_pagesize"); + else + envinfo.mi_dxb_pagesize = (uint32_t)u64; continue; } - if (!strncmp(dbuf.iov_base, "duplicates=", STRLENOF("duplicates="))) { - dbi_flags |= MDBX_DUPSORT; + char *str = valstr(dbuf.iov_base, "format"); + if (str) { + if (strcmp(str, "print") == 0) { + mode |= PRINT; + continue; + } + if (strcmp(str, "bytevalue") == 0) { + mode &= ~PRINT; + continue; + } + fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", + prog, lineno, str, "format"); + exit(EXIT_FAILURE); + } + + str = valstr(dbuf.iov_base, "database"); + if (str) { + if (*str) { + subname = mdbx_strdup(str); + if (!subname) { + perror("strdup()"); + exit(EXIT_FAILURE); + } + } continue; } - if (!strncmp(dbuf.iov_base, "VERSION=", STRLENOF("VERSION="))) { - version = atoi((char *)dbuf.iov_base + STRLENOF("VERSION=")); - if (version > 3) { - fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported VERSION %d\n", - prog, lineno, version); + str = valstr(dbuf.iov_base, "type"); + if (str) { + if (strcmp(str, "btree") != 0) { + fprintf(stderr, + "%s: line %" PRIiSIZE ": unsupported value '%s' for %s\n", prog, + lineno, str, "type"); exit(EXIT_FAILURE); } continue; } - if (!strncmp(dbuf.iov_base, "HEADER=END", STRLENOF("HEADER=END"))) - return; + if (valnum(dbuf.iov_base, "mapaddr", &u64)) { + if (u64) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 + " for %s\n", + prog, lineno, u64, "mapaddr"); + continue; + } - if (!strncmp(dbuf.iov_base, "format=", STRLENOF("format="))) { - if (!strncmp((char *)dbuf.iov_base + STRLENOF("FORMAT="), "print", - STRLENOF("print"))) - mode |= PRINT; - else if (strncmp((char *)dbuf.iov_base + STRLENOF("FORMAT="), "bytevalue", - STRLENOF("bytevalue"))) { - fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported FORMAT %s\n", prog, - lineno, (char *)dbuf.iov_base + STRLENOF("FORMAT=")); - exit(EXIT_FAILURE); - } + if (valnum(dbuf.iov_base, "mapsize", &u64)) { + if (!(mode & GLOBAL)) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore value %" PRIu64 + " for '%s' in non-global context\n", + prog, lineno, u64, "mapsize"); + else if (u64 < MIN_MAPSIZE || u64 > MAX_MAPSIZE64) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 + " for %s\n", + prog, lineno, u64, "mapsize"); + else + envinfo.mi_mapsize = (size_t)u64; continue; } - if (!strncmp(dbuf.iov_base, "database=", STRLENOF("database="))) { - ptr = memchr(dbuf.iov_base, '\n', dbuf.iov_len); - if (ptr) - *ptr = '\0'; - if (subname) - mdbx_free(subname); - subname = mdbx_strdup((char *)dbuf.iov_base + STRLENOF("database=")); + if (valnum(dbuf.iov_base, "maxreaders", &u64)) { + if (!(mode & GLOBAL)) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore value %" PRIu64 + " for '%s' in non-global context\n", + prog, lineno, u64, "maxreaders"); + else if (u64 < 1 || u64 > MDBX_READERS_LIMIT) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 + " for %s\n", + prog, lineno, u64, "maxreaders"); + else + envinfo.mi_maxreaders = (int)u64; continue; } - if (!strncmp(dbuf.iov_base, "type=", STRLENOF("type="))) { - if (strncmp((char *)dbuf.iov_base + STRLENOF("type="), "btree", - STRLENOF("btree"))) { - fprintf(stderr, "%s: line %" PRIiSIZE ": unsupported type %s\n", prog, - lineno, (char *)dbuf.iov_base + STRLENOF("type=")); - exit(EXIT_FAILURE); - } + if (valnum(dbuf.iov_base, "txnid", &u64)) { + if (u64 < MIN_TXNID || u64 > MAX_TXNID) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore unsupported value 0x%" PRIx64 + " for %s\n", + prog, lineno, u64, "txnid"); + txnid = u64; continue; } - if (!strncmp(dbuf.iov_base, "mapaddr=", STRLENOF("mapaddr="))) { - int i; - ptr = memchr(dbuf.iov_base, '\n', dbuf.iov_len); - if (ptr) - *ptr = '\0'; - void *unused; - i = sscanf((char *)dbuf.iov_base + STRLENOF("mapaddr="), "%p", &unused); - if (i != 1) { - fprintf(stderr, "%s: line %" PRIiSIZE ": invalid mapaddr %s\n", prog, - lineno, (char *)dbuf.iov_base + STRLENOF("mapaddr=")); - exit(EXIT_FAILURE); - } + if (valnum(dbuf.iov_base, "sequence", &u64)) { + sequence = u64; continue; } - if (!strncmp(dbuf.iov_base, "mapsize=", STRLENOF("mapsize="))) { - int i; - ptr = memchr(dbuf.iov_base, '\n', dbuf.iov_len); - if (ptr) - *ptr = '\0'; - i = sscanf((char *)dbuf.iov_base + STRLENOF("mapsize="), "%" PRIu64, - &envinfo.mi_mapsize); - if (i != 1) { - fprintf(stderr, "%s: line %" PRIiSIZE ": invalid mapsize %s\n", prog, - lineno, (char *)dbuf.iov_base + STRLENOF("mapsize=")); + str = valstr(dbuf.iov_base, "geometry"); + if (str) { + if (!(mode & GLOBAL)) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore values %s" + " for '%s' in non-global context\n", + prog, lineno, str, "geometry"); + else if (sscanf(str, + "l%" PRIu64 ",c%" PRIu64 ",u%" PRIu64 ",s%" PRIu64 + ",g%" PRIu64, + &envinfo.mi_geo.lower, &envinfo.mi_geo.current, + &envinfo.mi_geo.upper, &envinfo.mi_geo.shrink, + &envinfo.mi_geo.grow) != 5) { + fprintf(stderr, + "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", + prog, lineno, "geometry"); exit(EXIT_FAILURE); } continue; } - if (!strncmp(dbuf.iov_base, "maxreaders=", STRLENOF("maxreaders="))) { - int i; - ptr = memchr(dbuf.iov_base, '\n', dbuf.iov_len); - if (ptr) - *ptr = '\0'; - i = sscanf((char *)dbuf.iov_base + STRLENOF("maxreaders="), "%u", - &envinfo.mi_maxreaders); - if (i != 1) { - fprintf(stderr, "%s: line %" PRIiSIZE ": invalid maxreaders %s\n", prog, - lineno, (char *)dbuf.iov_base + STRLENOF("maxreaders=")); + str = valstr(dbuf.iov_base, "canary"); + if (str) { + if (!(mode & GLOBAL)) + fprintf(stderr, + "%s: line %" PRIiSIZE ": ignore values %s" + " for '%s' in non-global context\n", + prog, lineno, str, "canary"); + else if (sscanf(str, "v%" PRIu64 ",x%" PRIu64 ",y%" PRIu64 ",z%" PRIu64, + &canary.v, &canary.x, &canary.y, &canary.z) != 4) { + fprintf(stderr, + "%s: line %" PRIiSIZE ": unexpected line format for '%s'\n", + prog, lineno, "canary"); exit(EXIT_FAILURE); } continue; } - int i; - for (i = 0; dbflags[i].bit; i++) { - if (!strncmp(dbuf.iov_base, dbflags[i].name, dbflags[i].len) && - ((char *)dbuf.iov_base)[dbflags[i].len] == '=') { - if (((char *)dbuf.iov_base)[dbflags[i].len + 1] == '1') + for (int i = 0; dbflags[i].bit; i++) { + bool value; + if (valbool(dbuf.iov_base, dbflags[i].name, &value)) { + if (value) dbi_flags |= dbflags[i].bit; - break; + else + dbi_flags &= ~dbflags[i].bit; + goto next; } } - if (!dbflags[i].bit) { - ptr = memchr(dbuf.iov_base, '=', dbuf.iov_len); - if (!ptr) { - fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected format\n", prog, - lineno); - exit(EXIT_FAILURE); - } else { - *ptr = '\0'; - fprintf(stderr, - "%s: line %" PRIiSIZE ": unrecognized keyword ignored: %s\n", - prog, lineno, (char *)dbuf.iov_base); - } + + str = valstr(dbuf.iov_base, "HEADER"); + if (str) { + if (strcmp(str, "END") == 0) + return MDBX_SUCCESS; } + + fprintf(stderr, + "%s: line %" PRIiSIZE ": unrecognized keyword ignored: %s\n", prog, + lineno, (char *)dbuf.iov_base); + next:; } - Eof = true; + return EOF; } -static void badend(void) { +static int badend(void) { fprintf(stderr, "%s: line %" PRIiSIZE ": unexpected end of input\n", prog, lineno); + return errno ? errno : MDBX_ENODATA; } static int unhex(unsigned char *c2) { @@ -237,29 +364,26 @@ static int readline(MDBX_val *out, MDBX_val *buf) { size_t len, l2; int c; + if (user_break) + return MDBX_EINTR; + + errno = 0; if (!(mode & NOHDR)) { c = fgetc(stdin); - if (c == EOF) { - Eof = true; - return EOF; - } + if (c == EOF) + return errno ? errno : EOF; if (c != ' ') { lineno++; - if (fgets(buf->iov_base, (int)buf->iov_len, stdin) == NULL) { - badend: - Eof = true; - badend(); - return EOF; + errno = 0; + if (fgets(buf->iov_base, (int)buf->iov_len, stdin)) { + if (c == 'D' && !strncmp(buf->iov_base, "ATA=END", STRLENOF("ATA=END"))) + return EOF; } - if (c == 'D' && !strncmp(buf->iov_base, "ATA=END", STRLENOF("ATA=END"))) - return EOF; - goto badend; + return badend(); } } - if (fgets(buf->iov_base, (int)buf->iov_len, stdin) == NULL) { - Eof = true; - return EOF; - } + if (fgets(buf->iov_base, (int)buf->iov_len, stdin) == nullptr) + return errno ? errno : EOF; lineno++; c1 = buf->iov_base; @@ -270,18 +394,15 @@ static int readline(MDBX_val *out, MDBX_val *buf) { while (c1[len - 1] != '\n') { buf->iov_base = mdbx_realloc(buf->iov_base, buf->iov_len * 2); if (!buf->iov_base) { - Eof = true; fprintf(stderr, "%s: line %" PRIiSIZE ": out of memory, line too long\n", prog, lineno); - return EOF; + return MDBX_ENOMEM; } c1 = buf->iov_base; c1 += l2; - if (fgets((char *)c1, (int)buf->iov_len + 1, stdin) == NULL) { - Eof = true; - badend(); - return EOF; - } + errno = 0; + if (fgets((char *)c1, (int)buf->iov_len + 1, stdin) == nullptr) + return errno ? errno : EOF; buf->iov_len *= 2; len = strlen((char *)c1); l2 += len; @@ -297,11 +418,8 @@ static int readline(MDBX_val *out, MDBX_val *buf) { if (c2[1] == '\\') { *c1++ = '\\'; } else { - if (c2 + 3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) { - Eof = true; - badend(); - return EOF; - } + if (c2 + 3 > end || !isxdigit(c2[1]) || !isxdigit(c2[2])) + return badend(); *c1++ = (char)unhex(++c2); } c2 += 2; @@ -312,17 +430,11 @@ static int readline(MDBX_val *out, MDBX_val *buf) { } } else { /* odd length not allowed */ - if (len & 1) { - Eof = true; - badend(); - return EOF; - } + if (len & 1) + return badend(); while (c2 < end) { - if (!isxdigit(*c2) || !isxdigit(c2[1])) { - Eof = true; - badend(); - return EOF; - } + if (!isxdigit(*c2) || !isxdigit(c2[1])) + return badend(); *c1++ = (char)unhex(c2); c2 += 2; } @@ -330,12 +442,12 @@ static int readline(MDBX_val *out, MDBX_val *buf) { c2 = out->iov_base = buf->iov_base; out->iov_len = c1 - c2; - return 0; + return MDBX_SUCCESS; } static void usage(void) { fprintf(stderr, - "usage: %s [-V] [-q] [-a] [-f file] [-s name] [-N] [-T] [-r] [-n] " + "usage: %s [-V] [-q] [-a] [-f file] [-s name] [-N] [-T] [-r] " "dbpath\n" " -V\t\tprint version and exit\n" " -q\t\tbe quiet\n" @@ -345,29 +457,29 @@ static void usage(void) { " -s name\tload into named subDB\n" " -N\t\tuse NOOVERWRITE on puts\n" " -T\t\tread plaintext\n" - " -r\t\trescure mode (ignore errors to load corrupted DB dump)\n" - " -n\t\tNOSUBDIR mode for open\n", + " -r\t\trescure mode (ignore errors to load corrupted DB dump)\n", prog); exit(EXIT_FAILURE); } -static int anyway_greater(const MDBX_val *a, const MDBX_val *b) { - (void)a; - (void)b; - return 1; +static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return (a->iov_len == b->iov_len && + memcmp(a->iov_base, b->iov_base, a->iov_len) == 0) + ? 0 + : 1; } int main(int argc, char *argv[]) { int i, rc; - MDBX_env *env = NULL; - MDBX_txn *txn = NULL; - MDBX_cursor *mc = NULL; + MDBX_env *env = nullptr; + MDBX_txn *txn = nullptr; + MDBX_cursor *mc = nullptr; MDBX_dbi dbi; - char *envname = NULL; + char *envname = nullptr; int envflags = MDBX_UTTERLY_NOSYNC, putflags = 0; - int append = 0; - int quiet = 0; - int rescue = 0; + bool append = false; + bool quiet = false; + bool rescue = false; MDBX_val prevk; prog = argv[0]; @@ -391,10 +503,10 @@ int main(int argc, char *argv[]) { mdbx_build.options); return EXIT_SUCCESS; case 'a': - append = 1; + append = true; break; case 'f': - if (freopen(optarg, "r", stdin) == NULL) { + if (freopen(optarg, "r", stdin) == nullptr) { fprintf(stderr, "%s: %s: open: %s\n", prog, optarg, mdbx_strerror(errno)); exit(EXIT_FAILURE); @@ -413,10 +525,10 @@ int main(int argc, char *argv[]) { mode |= NOHDR | PRINT; break; case 'q': - quiet = 1; + quiet = true; break; case 'r': - rescue = 1; + rescue = true; break; default: usage(); @@ -444,56 +556,73 @@ int main(int argc, char *argv[]) { printf("mdbx_load %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); - fflush(NULL); + fflush(nullptr); dbuf.iov_len = 4096; dbuf.iov_base = mdbx_malloc(dbuf.iov_len); /* read first header for mapsize= */ - if (!(mode & NOHDR)) - readhdr(); + if (!(mode & NOHDR)) { + rc = readhdr(); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == EOF) + rc = MDBX_ENODATA; + error("readheader", rc); + goto env_close; + } + } rc = mdbx_env_create(&env); - if (rc) { - fprintf(stderr, "mdbx_env_create failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_create", rc); return EXIT_FAILURE; } mdbx_env_set_maxdbs(env, 2); - -#ifdef MDBX_FIXEDMAP - if (info.mi_mapaddr) - envflags |= MDBX_FIXEDMAP; -#endif + if (envinfo.mi_maxreaders) { + rc = mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_set_maxreaders", rc); + goto env_close; + } + } if (envinfo.mi_mapsize) { - if (envinfo.mi_mapsize > INTPTR_MAX) { - fprintf(stderr, - "Database size is too large for current system (mapsize=%" PRIu64 - " is great than system-limit %zi)\n", - envinfo.mi_mapsize, INTPTR_MAX); - goto env_close; + if (envinfo.mi_geo.current) { + rc = mdbx_env_set_geometry( + env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current, + (intptr_t)envinfo.mi_geo.upper, (intptr_t)envinfo.mi_geo.shrink, + (intptr_t)envinfo.mi_geo.grow, + envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); + } else { + if (envinfo.mi_mapsize > MAX_MAPSIZE) { + fprintf( + stderr, + "Database size is too large for current system (mapsize=%" PRIu64 + " is great than system-limit %zu)\n", + envinfo.mi_mapsize, (size_t)MAX_MAPSIZE); + goto env_close; + } + rc = mdbx_env_set_geometry( + env, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, + (intptr_t)envinfo.mi_mapsize, 0, 0, + envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); } - rc = mdbx_env_set_geometry(env, 0, 0, (intptr_t)envinfo.mi_mapsize, -1, -1, - -1); - if (rc) { - fprintf(stderr, "mdbx_env_set_geometry failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_set_geometry", rc); goto env_close; } } rc = mdbx_env_open(env, envname, envflags, 0664); - if (rc) { - fprintf(stderr, "mdbx_env_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_open", rc); goto env_close; } kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, MDBX_DUPSORT); if (kbuf.iov_len >= INTPTR_MAX / 4) { - fprintf(stderr, "mdbx_env_get_maxkeysize failed, returns %zu\n", + fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n", kbuf.iov_len); goto env_close; } @@ -503,41 +632,83 @@ int main(int argc, char *argv[]) { k0buf.iov_base = (char *)kbuf.iov_base + kbuf.iov_len; prevk.iov_base = k0buf.iov_base; - while (!Eof) { + while (rc == MDBX_SUCCESS) { if (user_break) { rc = MDBX_EINTR; break; } - rc = mdbx_txn_begin(env, NULL, 0, &txn); - if (rc) { - fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc, - mdbx_strerror(rc)); + rc = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_begin", rc); goto env_close; } + if (mode & GLOBAL) { + mode -= GLOBAL; + if (canary.v | canary.x | canary.y | canary.z) { + rc = mdbx_canary_put(txn, &canary); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_canary_put", rc); + goto txn_abort; + } + } + } + + const char *const dbi_name = subname ? subname : "@MAIN"; rc = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi, - append ? anyway_greater : NULL, - append ? anyway_greater : NULL); - if (rc) { - fprintf(stderr, "mdbx_open failed, error %d %s\n", rc, mdbx_strerror(rc)); + append ? equal_or_greater : nullptr, + append ? equal_or_greater : nullptr); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_open_ex", rc); goto txn_abort; } + uint64_t present_sequence; + rc = mdbx_dbi_sequence(txn, dbi, &present_sequence, 0); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", rc); + goto txn_abort; + } + if (present_sequence > sequence) { + fprintf(stderr, + "present sequence for '%s' value (%" PRIu64 + ") is greated than loaded (%" PRIu64 ")\n", + dbi_name, present_sequence, sequence); + rc = MDBX_RESULT_TRUE; + goto txn_abort; + } + if (present_sequence < sequence) { + rc = mdbx_dbi_sequence(txn, dbi, nullptr, sequence - present_sequence); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", rc); + goto txn_abort; + } + } + rc = mdbx_cursor_open(txn, dbi, &mc); - if (rc) { - fprintf(stderr, "mdbx_cursor_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); goto txn_abort; } + /* if (append) { + mc->mc_flags |= C_SKIPORD; + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD; + } */ int batch = 0; prevk.iov_len = 0; - while (1) { + while (rc == MDBX_SUCCESS) { MDBX_val key; rc = readline(&key, &kbuf); - if (rc) /* rc == EOF */ + if (rc != MDBX_SUCCESS) /* rc == EOF */ + break; + + if (user_break) { + rc = MDBX_EINTR; break; + } MDBX_val data; rc = readline(&data, &dbuf); @@ -566,50 +737,81 @@ int main(int argc, char *argv[]) { mdbx_strerror(rc)); continue; } - if (rc) { - fprintf(stderr, "mdbx_cursor_put failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_put", rc); goto txn_abort; } batch++; - if (batch == 100) { + + MDBX_txn_info txn_info; + rc = mdbx_txn_info(txn, &txn_info, false); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_info", rc); + goto txn_abort; + } + + if (batch == 10000 || txn_info.txn_space_dirty > MEGABYTE * 16) { + mdbx_cursor_close(mc); + mc = nullptr; rc = mdbx_txn_commit(txn); - if (rc) { - fprintf(stderr, "%s: line %" PRIiSIZE ": txn_commit: %s\n", prog, - lineno, mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_commit", rc); goto env_close; } - rc = mdbx_txn_begin(env, NULL, 0, &txn); - if (rc) { - fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc, - mdbx_strerror(rc)); + batch = 0; + + rc = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_begin", rc); goto env_close; } rc = mdbx_cursor_open(txn, dbi, &mc); - if (rc) { - fprintf(stderr, "mdbx_cursor_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); goto txn_abort; } - batch = 0; + /* if (append) { + mc->mc_flags |= C_SKIPORD; + if (mc->mc_xcursor) + mc->mc_xcursor->mx_cursor.mc_flags |= C_SKIPORD; + } */ } } + + mdbx_cursor_close(mc); + mc = nullptr; rc = mdbx_txn_commit(txn); - txn = NULL; - if (rc) { - fprintf(stderr, "%s: line %" PRIiSIZE ": txn_commit: %s\n", prog, lineno, - mdbx_strerror(rc)); + txn = nullptr; + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_commit", rc); + goto env_close; + } + rc = mdbx_dbi_close(env, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_close", rc); goto env_close; } - mdbx_dbi_close(env, dbi); - subname = NULL; /* try read next header */ if (!(mode & NOHDR)) - readhdr(); + rc = readhdr(); + } + + switch (rc) { + case EOF: + rc = MDBX_SUCCESS; + case MDBX_SUCCESS: + break; + case MDBX_EINTR: + fprintf(stderr, "Interrupted by signal/user\n"); + break; + default: + if (unlikely(rc != MDBX_SUCCESS)) + error("readline", rc); } txn_abort: + mdbx_cursor_close(mc); mdbx_txn_abort(txn); env_close: mdbx_env_close(env); diff --git a/libs/libmdbx/src/src/mdbx_stat.c b/libs/libmdbx/src/src/mdbx_stat.c index d1a805f925..df12717aad 100644 --- a/libs/libmdbx/src/src/mdbx_stat.c +++ b/libs/libmdbx/src/src/mdbx_stat.c @@ -43,7 +43,7 @@ static void signal_handler(int sig) { #endif /* !WINDOWS */ -static void prstat(MDBX_stat *ms) { +static void print_stat(MDBX_stat *ms) { printf(" Pagesize: %u\n", ms->ms_psize); printf(" Tree depth: %u\n", ms->ms_depth); printf(" Branch pages: %" PRIu64 "\n", ms->ms_branch_pages); @@ -52,17 +52,16 @@ static void prstat(MDBX_stat *ms) { printf(" Entries: %" PRIu64 "\n", ms->ms_entries); } -static void usage(char *prog) { +static void usage(const char *prog) { fprintf(stderr, - "usage: %s [-V] [-e] [-f[f[f]]] [-r[r]] [-a|-s name] [-n] dbpath\n" + "usage: %s [-V] [-e] [-f[f[f]]] [-r[r]] [-a|-s name] dbpath\n" " -V\t\tprint version and exit\n" " -e\t\tshow whole DB info\n" " -f\t\tshow GC info\n" " -r\t\tshow readers\n" " -a\t\tprint stat of main DB and all subDBs\n" " \t\t(default) print stat of only the main DB\n" - " -s name\tprint stat of only the named subDB\n" - " -n\t\tNOSUBDIR mode for open\n", + " -s name\tprint stat of only the named subDB\n", prog); exit(EXIT_FAILURE); } @@ -88,6 +87,11 @@ static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid, return user_break ? MDBX_RESULT_TRUE : MDBX_RESULT_FALSE; } +const char *prog; +static void error(const char *func, int rc) { + fprintf(stderr, "%s: %s() error %d %s\n", prog, func, rc, mdbx_strerror(rc)); +} + int main(int argc, char *argv[]) { int o, rc; MDBX_env *env; @@ -95,9 +99,9 @@ int main(int argc, char *argv[]) { MDBX_dbi dbi; MDBX_stat mst; MDBX_envinfo mei; - char *prog = argv[0]; + prog = argv[0]; char *envname; - char *subname = NULL; + char *subname = nullptr; int alldbs = 0, envinfo = 0, envflags = 0, freinfo = 0, rdrinfo = 0; if (argc < 2) @@ -167,41 +171,51 @@ int main(int argc, char *argv[]) { printf("mdbx_stat %s (%s, T-%s)\nRunning for %s...\n", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname); - fflush(NULL); + fflush(nullptr); rc = mdbx_env_create(&env); - if (rc) { - fprintf(stderr, "mdbx_env_create failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_create", rc); return EXIT_FAILURE; } - if (alldbs || subname) - mdbx_env_set_maxdbs(env, 4); + if (alldbs || subname) { + rc = mdbx_env_set_maxdbs(env, 2); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_set_maxdbs", rc); + goto env_close; + } + } rc = mdbx_env_open(env, envname, envflags | MDBX_RDONLY, 0664); - if (rc) { - fprintf(stderr, "mdbx_env_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_open", rc); goto env_close; } - rc = mdbx_txn_begin(env, NULL, MDBX_RDONLY, &txn); - if (rc) { - fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc, - mdbx_strerror(rc)); - goto env_close; + rc = mdbx_txn_begin(env, nullptr, MDBX_RDONLY, &txn); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_txn_begin", rc); + goto txn_abort; } if (envinfo || freinfo) { - (void)mdbx_env_info_ex(env, txn, &mei, sizeof(mei)); + rc = mdbx_env_info_ex(env, txn, &mei, sizeof(mei)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_info_ex", rc); + goto txn_abort; + } } else { /* LY: zap warnings from gcc */ memset(&mei, 0, sizeof(mei)); } if (envinfo) { - (void)mdbx_env_stat_ex(env, txn, &mst, sizeof(mst)); + rc = mdbx_env_stat_ex(env, txn, &mst, sizeof(mst)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_env_stat_ex", rc); + goto txn_abort; + } printf("Environment Info\n"); printf(" Pagesize: %u\n", mst.ms_psize); if (mei.mi_geo.lower != mei.mi_geo.upper) { @@ -241,11 +255,19 @@ int main(int argc, char *argv[]) { if (rdrinfo) { rc = mdbx_reader_list(env, reader_list_func, nullptr); + if (MDBX_IS_ERROR(rc)) { + error("mdbx_reader_list", rc); + goto txn_abort; + } if (rc == MDBX_RESULT_TRUE) printf("Reader Table is empty\n"); else if (rc == MDBX_SUCCESS && rdrinfo > 1) { int dead; rc = mdbx_reader_check(env, &dead); + if (MDBX_IS_ERROR(rc)) { + error("mdbx_reader_check", rc); + goto txn_abort; + } if (rc == MDBX_RESULT_TRUE) { printf(" %d stale readers cleared.\n", dead); rc = mdbx_reader_list(env, reader_list_func, nullptr); @@ -254,38 +276,31 @@ int main(int argc, char *argv[]) { } else printf(" No stale readers.\n"); } - if (MDBX_IS_ERROR(rc)) { - fprintf(stderr, "mdbx_txn_begin failed, error %d %s\n", rc, - mdbx_strerror(rc)); - goto env_close; - } if (!(subname || alldbs || freinfo)) - goto env_close; + goto txn_abort; } if (freinfo) { - MDBX_cursor *cursor; - MDBX_val key, data; - pgno_t pages = 0, *iptr; - pgno_t reclaimable = 0; - printf("Garbage Collection\n"); dbi = 0; + MDBX_cursor *cursor; rc = mdbx_cursor_open(txn, dbi, &cursor); - if (rc) { - fprintf(stderr, "mdbx_cursor_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); goto txn_abort; } rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst)); - if (rc) { - fprintf(stderr, "mdbx_dbi_stat failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_stat", rc); goto txn_abort; } - prstat(&mst); - while ((rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT)) == - MDBX_SUCCESS) { + print_stat(&mst); + + pgno_t pages = 0, *iptr; + pgno_t reclaimable = 0; + MDBX_val key, data; + while (MDBX_SUCCESS == + (rc = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT))) { if (user_break) { rc = MDBX_EINTR; break; @@ -333,6 +348,7 @@ int main(int argc, char *argv[]) { } } mdbx_cursor_close(cursor); + cursor = nullptr; switch (rc) { case MDBX_SUCCESS: @@ -342,8 +358,7 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Interrupted by signal/user\n"); goto txn_abort; default: - fprintf(stderr, "mdbx_cursor_get failed, error %d %s\n", rc, - mdbx_strerror(rc)); + error("mdbx_cursor_get", rc); goto txn_abort; } @@ -381,59 +396,75 @@ int main(int argc, char *argv[]) { printf(" GC: %" PRIaPGNO " pages\n", pages); } - rc = mdbx_dbi_open(txn, subname, 0, &dbi); - if (rc) { - fprintf(stderr, "mdbx_open failed, error %d %s\n", rc, mdbx_strerror(rc)); + rc = mdbx_dbi_open(txn, subname, MDBX_ACCEDE, &dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_open", rc); goto txn_abort; } - rc = mdbx_dbi_stat(txn, dbi, &mst, sizeof(mst)); - if (rc) { - fprintf(stderr, "mdbx_dbi_stat failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_stat", rc); goto txn_abort; } printf("Status of %s\n", subname ? subname : "Main DB"); - prstat(&mst); + print_stat(&mst); if (alldbs) { MDBX_cursor *cursor; - MDBX_val key; - rc = mdbx_cursor_open(txn, dbi, &cursor); - if (rc) { - fprintf(stderr, "mdbx_cursor_open failed, error %d %s\n", rc, - mdbx_strerror(rc)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_cursor_open", rc); goto txn_abort; } - while ((rc = mdbx_cursor_get(cursor, &key, NULL, MDBX_NEXT_NODUP)) == 0) { - char *str; - MDBX_dbi db2; + + MDBX_val key; + while (MDBX_SUCCESS == + (rc = mdbx_cursor_get(cursor, &key, nullptr, MDBX_NEXT_NODUP))) { + MDBX_dbi subdbi; if (memchr(key.iov_base, '\0', key.iov_len)) continue; - str = mdbx_malloc(key.iov_len + 1); - memcpy(str, key.iov_base, key.iov_len); - str[key.iov_len] = '\0'; - rc = mdbx_dbi_open(txn, str, 0, &db2); + subname = mdbx_malloc(key.iov_len + 1); + memcpy(subname, key.iov_base, key.iov_len); + subname[key.iov_len] = '\0'; + rc = mdbx_dbi_open(txn, subname, MDBX_ACCEDE, &subdbi); if (rc == MDBX_SUCCESS) - printf("Status of %s\n", str); - mdbx_free(str); - if (rc) - continue; - rc = mdbx_dbi_stat(txn, db2, &mst, sizeof(mst)); - if (rc) { - fprintf(stderr, "mdbx_dbi_stat failed, error %d %s\n", rc, - mdbx_strerror(rc)); + printf("Status of %s\n", subname); + mdbx_free(subname); + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_INCOMPATIBLE) + continue; + error("mdbx_dbi_open", rc); + goto txn_abort; + } + + rc = mdbx_dbi_stat(txn, subdbi, &mst, sizeof(mst)); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_stat", rc); + goto txn_abort; + } + print_stat(&mst); + + rc = mdbx_dbi_close(env, subdbi); + if (unlikely(rc != MDBX_SUCCESS)) { + error("mdbx_dbi_close", rc); goto txn_abort; } - prstat(&mst); - mdbx_dbi_close(env, db2); } mdbx_cursor_close(cursor); + cursor = nullptr; } - if (rc == MDBX_NOTFOUND) - rc = MDBX_SUCCESS; + switch (rc) { + case MDBX_SUCCESS: + case MDBX_NOTFOUND: + break; + case MDBX_EINTR: + fprintf(stderr, "Interrupted by signal/user\n"); + break; + default: + if (unlikely(rc != MDBX_SUCCESS)) + error("mdbx_cursor_get", rc); + } mdbx_dbi_close(env, dbi); txn_abort: diff --git a/libs/libmdbx/src/src/options.h b/libs/libmdbx/src/src/options.h index 715f883bfc..f3805b1eeb 100644 --- a/libs/libmdbx/src/src/options.h +++ b/libs/libmdbx/src/src/options.h @@ -13,6 +13,11 @@ * */ +/* Support for huge write-transactions */ +#ifndef MDBX_HUGE_TRANSACTIONS +#define MDBX_HUGE_TRANSACTIONS 0 +#endif /* MDBX_HUGE_TRANSACTIONS */ + /* using fcntl(F_FULLFSYNC) with 5-10 times slowdown */ #define MDBX_OSX_WANNA_DURABILITY 0 /* using fsync() with chance of data lost on power failure */ diff --git a/libs/libmdbx/src/src/osal.c b/libs/libmdbx/src/src/osal.c index 94902527b1..e9fff57399 100644 --- a/libs/libmdbx/src/src/osal.c +++ b/libs/libmdbx/src/src/osal.c @@ -362,106 +362,98 @@ char *mdbx_strdup(const char *str) { /*----------------------------------------------------------------------------*/ -MDBX_INTERNAL_FUNC int mdbx_condmutex_init(mdbx_condmutex_t *condmutex) { +MDBX_INTERNAL_FUNC int mdbx_condpair_init(mdbx_condpair_t *condpair) { + int rc; + memset(condpair, 0, sizeof(mdbx_condpair_t)); #if defined(_WIN32) || defined(_WIN64) - int rc = MDBX_SUCCESS; - condmutex->event = NULL; - condmutex->mutex = CreateMutexW(NULL, FALSE, NULL); - if (!condmutex->mutex) - return GetLastError(); - - condmutex->event = CreateEventW(NULL, TRUE, FALSE, NULL); - if (!condmutex->event) { + if ((condpair->mutex = CreateMutexW(NULL, FALSE, NULL)) == NULL) { rc = GetLastError(); - (void)CloseHandle(condmutex->mutex); - condmutex->mutex = NULL; + goto bailout_mutex; } - return rc; -#else - memset(condmutex, 0, sizeof(mdbx_condmutex_t)); - int rc = pthread_mutex_init(&condmutex->mutex, NULL); - if (rc == 0) { - rc = pthread_cond_init(&condmutex->cond, NULL); - if (rc != 0) - (void)pthread_mutex_destroy(&condmutex->mutex); + if ((condpair->event[0] = CreateEventW(NULL, FALSE, FALSE, NULL)) == NULL) { + rc = GetLastError(); + goto bailout_event; } - return rc; -#endif -} + if ((condpair->event[1] = CreateEventW(NULL, FALSE, FALSE, NULL)) != NULL) + return MDBX_SUCCESS; -static bool is_allzeros(const void *ptr, size_t bytes) { - const uint8_t *u8 = ptr; - for (size_t i = 0; i < bytes; ++i) - if (u8[i] != 0) - return false; - return true; + rc = GetLastError(); + (void)CloseHandle(condpair->event[0]); +bailout_event: + (void)CloseHandle(condpair->mutex); +#else + rc = pthread_mutex_init(&condpair->mutex, NULL); + if (unlikely(rc != 0)) + goto bailout_mutex; + rc = pthread_cond_init(&condpair->cond[0], NULL); + if (unlikely(rc != 0)) + goto bailout_cond; + rc = pthread_cond_init(&condpair->cond[1], NULL); + if (likely(rc == 0)) + return MDBX_SUCCESS; + + (void)pthread_cond_destroy(&condpair->cond[0]); +bailout_cond: + (void)pthread_mutex_destroy(&condpair->mutex); +#endif +bailout_mutex: + memset(condpair, 0, sizeof(mdbx_condpair_t)); + return rc; } -MDBX_INTERNAL_FUNC int mdbx_condmutex_destroy(mdbx_condmutex_t *condmutex) { - int rc = MDBX_EINVAL; +MDBX_INTERNAL_FUNC int mdbx_condpair_destroy(mdbx_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) - if (condmutex->event) { - rc = CloseHandle(condmutex->event) ? MDBX_SUCCESS : GetLastError(); - if (rc == MDBX_SUCCESS) - condmutex->event = NULL; - } - if (condmutex->mutex) { - rc = CloseHandle(condmutex->mutex) ? MDBX_SUCCESS : GetLastError(); - if (rc == MDBX_SUCCESS) - condmutex->mutex = NULL; - } + int rc = CloseHandle(condpair->mutex) ? MDBX_SUCCESS : GetLastError(); + rc = CloseHandle(condpair->event[0]) ? rc : GetLastError(); + rc = CloseHandle(condpair->event[1]) ? rc : GetLastError(); #else - if (!is_allzeros(&condmutex->cond, sizeof(condmutex->cond))) { - rc = pthread_cond_destroy(&condmutex->cond); - if (rc == 0) - memset(&condmutex->cond, 0, sizeof(condmutex->cond)); - } - if (!is_allzeros(&condmutex->mutex, sizeof(condmutex->mutex))) { - rc = pthread_mutex_destroy(&condmutex->mutex); - if (rc == 0) - memset(&condmutex->mutex, 0, sizeof(condmutex->mutex)); - } + int err, rc = pthread_mutex_destroy(&condpair->mutex); + rc = (err = pthread_cond_destroy(&condpair->cond[0])) ? err : rc; + rc = (err = pthread_cond_destroy(&condpair->cond[1])) ? err : rc; #endif + memset(condpair, 0, sizeof(mdbx_condpair_t)); return rc; } -MDBX_INTERNAL_FUNC int mdbx_condmutex_lock(mdbx_condmutex_t *condmutex) { +MDBX_INTERNAL_FUNC int mdbx_condpair_lock(mdbx_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) - DWORD code = WaitForSingleObject(condmutex->mutex, INFINITE); + DWORD code = WaitForSingleObject(condpair->mutex, INFINITE); return waitstatus2errcode(code); #else - return pthread_mutex_lock(&condmutex->mutex); + return pthread_mutex_lock(&condpair->mutex); #endif } -MDBX_INTERNAL_FUNC int mdbx_condmutex_unlock(mdbx_condmutex_t *condmutex) { +MDBX_INTERNAL_FUNC int mdbx_condpair_unlock(mdbx_condpair_t *condpair) { #if defined(_WIN32) || defined(_WIN64) - return ReleaseMutex(condmutex->mutex) ? MDBX_SUCCESS : GetLastError(); + return ReleaseMutex(condpair->mutex) ? MDBX_SUCCESS : GetLastError(); #else - return pthread_mutex_unlock(&condmutex->mutex); + return pthread_mutex_unlock(&condpair->mutex); #endif } -MDBX_INTERNAL_FUNC int mdbx_condmutex_signal(mdbx_condmutex_t *condmutex) { +MDBX_INTERNAL_FUNC int mdbx_condpair_signal(mdbx_condpair_t *condpair, + bool part) { #if defined(_WIN32) || defined(_WIN64) - return SetEvent(condmutex->event) ? MDBX_SUCCESS : GetLastError(); + return SetEvent(condpair->event[part]) ? MDBX_SUCCESS : GetLastError(); #else - return pthread_cond_signal(&condmutex->cond); + return pthread_cond_signal(&condpair->cond[part]); #endif } -MDBX_INTERNAL_FUNC int mdbx_condmutex_wait(mdbx_condmutex_t *condmutex) { +MDBX_INTERNAL_FUNC int mdbx_condpair_wait(mdbx_condpair_t *condpair, + bool part) { #if defined(_WIN32) || defined(_WIN64) - DWORD code = - SignalObjectAndWait(condmutex->mutex, condmutex->event, INFINITE, FALSE); + DWORD code = SignalObjectAndWait(condpair->mutex, condpair->event[part], + INFINITE, FALSE); if (code == WAIT_OBJECT_0) { - code = WaitForSingleObject(condmutex->mutex, INFINITE); + code = WaitForSingleObject(condpair->mutex, INFINITE); if (code == WAIT_OBJECT_0) - return ResetEvent(condmutex->event) ? MDBX_SUCCESS : GetLastError(); + return MDBX_SUCCESS; } return waitstatus2errcode(code); #else - return pthread_cond_wait(&condmutex->cond, &condmutex->mutex); + return pthread_cond_wait(&condpair->cond[part], &condpair->mutex); #endif } diff --git a/libs/libmdbx/src/src/osal.h b/libs/libmdbx/src/src/osal.h index d9012ce768..a6188bb382 100644 --- a/libs/libmdbx/src/src/osal.h +++ b/libs/libmdbx/src/src/osal.h @@ -35,7 +35,7 @@ #define _CRT_SECURE_NO_WARNINGS #endif #if !defined(_NO_CRT_STDIO_INLINE) && MDBX_BUILD_SHARED_LIBRARY && \ - !defined(MDBX_TOOLS) + !defined(MDBX_TOOLS) && MDBX_AVOID_CRT #define _NO_CRT_STDIO_INLINE #endif #elif !defined(_POSIX_C_SOURCE) @@ -164,8 +164,8 @@ typedef unsigned mdbx_thread_key_t; #define THREAD_RESULT DWORD typedef struct { HANDLE mutex; - HANDLE event; -} mdbx_condmutex_t; + HANDLE event[2]; +} mdbx_condpair_t; typedef CRITICAL_SECTION mdbx_fastmutex_t; #if MDBX_AVOID_CRT @@ -226,8 +226,8 @@ typedef pthread_key_t mdbx_thread_key_t; #define THREAD_RESULT void * typedef struct { pthread_mutex_t mutex; - pthread_cond_t cond; -} mdbx_condmutex_t; + pthread_cond_t cond[2]; +} mdbx_condpair_t; typedef pthread_mutex_t mdbx_fastmutex_t; #define mdbx_malloc malloc #define mdbx_calloc calloc @@ -546,12 +546,13 @@ MDBX_INTERNAL_FUNC int mdbx_memalign_alloc(size_t alignment, size_t bytes, MDBX_INTERNAL_FUNC void mdbx_memalign_free(void *ptr); #endif -MDBX_INTERNAL_FUNC int mdbx_condmutex_init(mdbx_condmutex_t *condmutex); -MDBX_INTERNAL_FUNC int mdbx_condmutex_lock(mdbx_condmutex_t *condmutex); -MDBX_INTERNAL_FUNC int mdbx_condmutex_unlock(mdbx_condmutex_t *condmutex); -MDBX_INTERNAL_FUNC int mdbx_condmutex_signal(mdbx_condmutex_t *condmutex); -MDBX_INTERNAL_FUNC int mdbx_condmutex_wait(mdbx_condmutex_t *condmutex); -MDBX_INTERNAL_FUNC int mdbx_condmutex_destroy(mdbx_condmutex_t *condmutex); +MDBX_INTERNAL_FUNC int mdbx_condpair_init(mdbx_condpair_t *condpair); +MDBX_INTERNAL_FUNC int mdbx_condpair_lock(mdbx_condpair_t *condpair); +MDBX_INTERNAL_FUNC int mdbx_condpair_unlock(mdbx_condpair_t *condpair); +MDBX_INTERNAL_FUNC int mdbx_condpair_signal(mdbx_condpair_t *condpair, + bool part); +MDBX_INTERNAL_FUNC int mdbx_condpair_wait(mdbx_condpair_t *condpair, bool part); +MDBX_INTERNAL_FUNC int mdbx_condpair_destroy(mdbx_condpair_t *condpair); MDBX_INTERNAL_FUNC int mdbx_fastmutex_init(mdbx_fastmutex_t *fastmutex); MDBX_INTERNAL_FUNC int mdbx_fastmutex_acquire(mdbx_fastmutex_t *fastmutex); diff --git a/libs/libmdbx/src/test/config.cc b/libs/libmdbx/src/test/config.cc index f7f960c579..5e1979718a 100644 --- a/libs/libmdbx/src/test/config.cc +++ b/libs/libmdbx/src/test/config.cc @@ -395,6 +395,8 @@ void dump(const char *title) { i->params.keygen.split, i->params.keygen.width - i->params.keygen.split); log_verbose("keygen.seed: %u\n", i->params.keygen.seed); + log_verbose("keygen.zerofill: %s\n", + i->params.keygen.zero_fill ? "Yes" : "No"); log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min, i->params.keylen_max); log_verbose("data: minlen %u, maxlen %u\n", i->params.datalen_min, @@ -450,18 +452,14 @@ using namespace config; actor_config::actor_config(actor_testcase testcase, const actor_params ¶ms, unsigned space_id, unsigned wait4id) - : params(params) { - this->space_id = space_id; - this->actor_id = 1 + (unsigned)global::actors.size(); - this->testcase = testcase; - this->wait4id = wait4id; - signal_nops = 0; -} + : actor_config_pod(1 + unsigned(global::actors.size()), testcase, space_id, + wait4id), + params(params) {} const std::string actor_config::serialize(const char *prefix) const { simple_checksum checksum; - std::string result; + if (prefix) result.append(prefix); @@ -473,13 +471,13 @@ const std::string actor_config::serialize(const char *prefix) const { result.append(params.pathname_log); result.push_back('|'); - static_assert(std::is_pod<actor_params_pod>::value, + static_assert(std::is_trivially_copyable<actor_params_pod>::value, "actor_params_pod should by POD"); result.append(data2hex(static_cast<const actor_params_pod *>(¶ms), sizeof(actor_params_pod), checksum)); result.push_back('|'); - static_assert(std::is_pod<actor_config_pod>::value, + static_assert(std::is_trivially_copyable<actor_config_pod>::value, "actor_config_pod should by POD"); result.append(data2hex(static_cast<const actor_config_pod *>(this), sizeof(actor_config_pod), checksum)); @@ -525,7 +523,7 @@ bool actor_config::deserialize(const char *str, actor_config &config) { TRACE("<< actor_config::deserialize: slash-3\n"); return false; } - static_assert(std::is_pod<actor_params_pod>::value, + static_assert(std::is_trivially_copyable<actor_params_pod>::value, "actor_params_pod should by POD"); if (!hex2data(str, slash, static_cast<actor_params_pod *>(&config.params), sizeof(actor_params_pod), checksum)) { @@ -540,7 +538,7 @@ bool actor_config::deserialize(const char *str, actor_config &config) { TRACE("<< actor_config::deserialize: slash-4\n"); return false; } - static_assert(std::is_pod<actor_config_pod>::value, + static_assert(std::is_trivially_copyable<actor_config_pod>::value, "actor_config_pod should by POD"); if (!hex2data(str, slash, static_cast<actor_config_pod *>(&config), sizeof(actor_config_pod), checksum)) { diff --git a/libs/libmdbx/src/test/config.h b/libs/libmdbx/src/test/config.h index 7efd09dd54..2ab4742a2e 100644 --- a/libs/libmdbx/src/test/config.h +++ b/libs/libmdbx/src/test/config.h @@ -124,7 +124,8 @@ inline bool parse_option_intptr(int argc, char *const argv[], int &narg, #pragma pack(push, 1) struct keygen_params_pod { - /* Параметры генератора пар key-value. + /* Параметры генератора пар key-value. Также может быть полезным описание + * алгоритма генерации в keygen.h * * Ключи и значения генерируются по задаваемым параметрам на основе "плоской" * исходной координаты. При этом, в общем случае, в процессе тестов исходная @@ -141,20 +142,20 @@ struct keygen_params_pod { * - libmdbx поддерживает два существенно различающихся вида таблиц, * "уникальные" (без дубликатов и без multi-value), и так называемые * "с дубликатами" (c multi-value). - * - Для таблиц "без дубликатов" только размер связанных к ключами значений + * - Для таблиц "без дубликатов" только размер связанных с ключами значений * (данных) оказывает влияния на работу движка, непосредственно содержимое * данных не анализируется движком и не оказывает влияния на его работу. * - Для таблиц "с дубликатами", при наличии более одного значения для * некоторого ключа, формируется дочернее btree-поддерево. Это дерево - * формируется в отдельном "кусте" страниц и обслуживается независимо - * от окружения родительского ключа. + * формируется во вложенной странице или отдельном "кусте" страниц, + * и обслуживается независимо от окружения родительского ключа. * - Таким образом, паттерн генерации значений имеет смысл только для * таблиц "с дубликатами" и только в контексте одного значения ключа. - * Иначе говоря, нет смысла в со-координации генерации паттернов для - * ключей и значений. Более того, генерацию значений всегда необходимо - * рассматривать в контексте связки с одним значением ключа. - * - Тем не менее, во всех случаях достаточно важным является равномерная - * всех возможных сочетаний длин ключей и данных. + * Иначе говоря, не имеет смысла взаимная координация при генерации + * значений для разных ключей. Поэтому генерацию значений следует + * рассматривать только в контексте связки с одним значением ключа. + * - Тем не менее, во всех случаях достаточно важным является равновероятное + * распределение всех возможных сочетаний длин ключей и данных. * * width: * Большинство тестов предполагают создание или итерирование некоторого @@ -166,7 +167,7 @@ struct keygen_params_pod { * степени двойки. Это ограничение можно снять, но ценой увеличения * вычислительной сложности, включая потерю простоты и прозрачности. * - * С другой стороны, не-битовый width может быть полезен: + * С другой стороны, не-n-битовый width может быть полезен: * - Позволит генерировать ключи/значения в точно задаваемом диапазоне. * Например, перебрать в псевдо-случайном порядке 10001 значение. * - Позволит поровну разделять заданное пространство (диапазон) @@ -203,7 +204,7 @@ struct keygen_params_pod { * rotate и offset: * Для проверки слияния и разделения страниц внутри движка требуются * генерация ключей/значений в виде не-смежных последовательностей, как-бы - * в виде "пунктира", который постепенно заполняет весь заданных диапазон. + * в виде "пунктира", который постепенно заполняет весь заданный диапазон. * * Параметры позволяют генерировать такой "пунктир". Соответственно rotate * задает циклический сдвиг вправо, а offset задает смещение, точнее говоря @@ -224,55 +225,62 @@ struct keygen_params_pod { * номера будет отрезано для генерации значения. */ - uint8_t width; - uint8_t mesh; - uint8_t rotate; - uint8_t split; - uint32_t seed; - uint64_t offset; - keygen_case keycase; + uint8_t width{0}; + uint8_t mesh{0}; + uint8_t rotate{0}; + uint8_t split{0}; + uint32_t seed{0}; + uint64_t offset{0}; + keygen_case keycase{kc_random}; + bool zero_fill{false}; }; struct actor_params_pod { - unsigned mode_flags; - unsigned table_flags; - intptr_t size_lower; - intptr_t size_now; - intptr_t size_upper; - int shrink_threshold; - int growth_step; - int pagesize; - - unsigned test_duration; - unsigned test_nops; - unsigned nrepeat; - unsigned nthreads; - - unsigned keylen_min, keylen_max; - unsigned datalen_min, datalen_max; - - unsigned batch_read; - unsigned batch_write; - - unsigned delaystart; - unsigned waitfor_nops; - unsigned inject_writefaultn; - - unsigned max_readers; - unsigned max_tables; + unsigned mode_flags{0}; + unsigned table_flags{0}; + intptr_t size_lower{0}; + intptr_t size_now{0}; + intptr_t size_upper{0}; + int shrink_threshold{0}; + int growth_step{0}; + int pagesize{0}; + + unsigned test_duration{0}; + unsigned test_nops{0}; + unsigned nrepeat{0}; + unsigned nthreads{0}; + + unsigned keylen_min{0}, keylen_max{0}; + unsigned datalen_min{0}, datalen_max{0}; + + unsigned batch_read{0}; + unsigned batch_write{0}; + + unsigned delaystart{0}; + unsigned waitfor_nops{0}; + unsigned inject_writefaultn{0}; + + unsigned max_readers{0}; + unsigned max_tables{0}; keygen_params_pod keygen; - uint8_t loglevel; - bool drop_table; - bool ignore_dbfull; - bool speculum; + uint8_t loglevel{0}; + bool drop_table{0}; + bool ignore_dbfull{0}; + bool speculum{0}; }; struct actor_config_pod { - unsigned actor_id, space_id; - actor_testcase testcase; - unsigned wait4id; - unsigned signal_nops; + unsigned actor_id{0}, space_id{0}; + actor_testcase testcase{ac_none}; + unsigned wait4id{0}; + unsigned signal_nops{0}; + + actor_config_pod() = default; + actor_config_pod(unsigned actor_id, actor_testcase testcase, + unsigned space_id, unsigned wait4id) + : actor_id(actor_id), space_id(space_id), testcase(testcase), + wait4id(wait4id) {} }; #pragma pack(pop) @@ -286,8 +294,9 @@ void dump(const char *title = "config-dump: "); struct actor_params : public config::actor_params_pod { std::string pathname_log; std::string pathname_db; - void set_defaults(const std::string &tmpdir); + actor_params() = default; + void set_defaults(const std::string &tmpdir); unsigned mdbx_keylen_min() const; unsigned mdbx_keylen_max() const; unsigned mdbx_datalen_min() const; @@ -299,10 +308,11 @@ struct actor_config : public config::actor_config_pod { bool wanna_event4signalling() const { return true /* TODO ? */; } + actor_config() = default; actor_config(actor_testcase testcase, const actor_params ¶ms, unsigned space_id, unsigned wait4id); - actor_config(const char *str) { + actor_config(const char *str) : actor_config() { if (!deserialize(str, *this)) failure("Invalid internal parameter '%s'\n", str); } diff --git a/libs/libmdbx/src/test/dump-load.sh b/libs/libmdbx/src/test/dump-load.sh new file mode 100644 index 0000000000..55fa5c7f33 --- /dev/null +++ b/libs/libmdbx/src/test/dump-load.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +echo "------------------------------------------------------------------------------" + +if [ -z "$1" ]; then + echo "No mdbx-db pathname given"; + exit 2 +elif [ ! -e "$1" ]; then + echo "The mdbx-db '$1' don't exists"; + exit 2 +else + echo ">>>>>>>>>> $1" + RECO="$1.recovered" + rm -f dump1.txt dump2.txt "$RECO" + if ./mdbx_chk "$1"; then + echo ">>>>>>>>>> SOURCE VALID" + (./mdbx_dump -a "$1" > dump1.txt && \ + ./mdbx_load -nf dump1.txt "$RECO" && \ + ./mdbx_chk "$RECO" && \ + echo ">>>>>>>>>> DUMP/LOAD/CHK OK") || (echo ">>>>>>>>>> DUMP/LOAD/CHK FAILED"; exit 1) + REMOVE_RECO=1 + elif ./mdbx_chk -i "$1"; then + echo ">>>>>>>>>> SOURCE HAS WRONG-ORDER, TRY RECOVERY" + (./mdbx_dump -a "$1" > dump1.txt && \ + ./mdbx_load -anf dump1.txt "$RECO" && \ + ./mdbx_chk -i "$RECO" && \ + echo ">>>>>>>>>> DUMP/LOAD/CHK OK") || (echo ">>>>>>>>>> DUMP/LOAD/CHK FAILED"; exit 1) + REMOVE_RECO=0 + else + echo ">>>>>>>>>> SOURCE CORRUPTED, TRY RECOVERY" + (./mdbx_dump -ar "$1" > dump1.txt && \ + ./mdbx_load -ranf dump1.txt "$RECO" && \ + ./mdbx_chk -i "$RECO" && \ + echo ">>>>>>>>>> DUMP/LOAD/CHK OK") || (echo ">>>>>>>>>> DUMP/LOAD/CHK FAILED"; exit 1) + REMOVE_RECO=0 + fi + ./mdbx_dump -a "$RECO" > dump2.txt && diff -u dump1.txt dump2.txt && \ + rm -f dump1.txt dump2.txt && [ $REMOVE_RECO -ne 0 ] && rm -f "$RECO" + exit 0 +fi diff --git a/libs/libmdbx/src/test/hill.cc b/libs/libmdbx/src/test/hill.cc index 37f748c9f7..efc43abe2a 100644 --- a/libs/libmdbx/src/test/hill.cc +++ b/libs/libmdbx/src/test/hill.cc @@ -232,6 +232,47 @@ bool testcase_hill::run() { } } + if (txn_guard) { + MDBX_stat stat; + err = mdbx_dbi_stat(txn_guard.get(), dbi, &stat, sizeof(stat)); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_stat()", err); + + uint32_t nested_deepmask; + err = mdbx_dbi_dupsort_depthmask(txn_guard.get(), dbi, &nested_deepmask); + if (unlikely(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE)) + failure_perror("mdbx_dbi_stat_nested_deepmask()", err); + + if (err != MDBX_SUCCESS) { + log_notice("hill: reached %d tree depth", stat.ms_depth); + } else { + std::string str; + int prev = -2, i = 0; + do { + while (!(nested_deepmask & 1)) + ++i, nested_deepmask >>= 1; + if (prev + 1 == i) { + if (str.back() != '-') + str.push_back('-'); + prev = i; + continue; + } + if (!str.empty()) { + if (str.back() == '-') + str.append(std::to_string(prev)); + str.push_back(','); + } + str.append(std::to_string(i)); + prev = i; + } while (++i, nested_deepmask >>= 1); + if (str.back() == '-') + str.append(std::to_string(prev)); + + log_notice("hill: reached %d tree depth & %s sub-tree depth(s)", + stat.ms_depth, str.c_str()); + } + } + while (serial_count > 1) { if (unlikely(!keyvalue_maker.increment(serial_count, -2))) failure("downhill: unexpected key-space underflow"); diff --git a/libs/libmdbx/src/test/keygen.cc b/libs/libmdbx/src/test/keygen.cc index 374537dd6e..05070afe02 100644 --- a/libs/libmdbx/src/test/keygen.cc +++ b/libs/libmdbx/src/test/keygen.cc @@ -78,9 +78,11 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, assert(mapping.mesh <= mapping.width); assert(mapping.rotate <= mapping.width); assert(mapping.offset <= mask(mapping.width)); - assert(!(key_essentials.flags & - ~(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT))); - assert(!(value_essentials.flags & ~(MDBX_INTEGERDUP | MDBX_REVERSEDUP))); + assert( + !(key_essentials.flags & ~(essentials::prng_fill_flag | MDBX_INTEGERKEY | + MDBX_REVERSEKEY | MDBX_DUPSORT))); + assert(!(value_essentials.flags & + ~(essentials::prng_fill_flag | MDBX_INTEGERDUP | MDBX_REVERSEDUP))); log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial, value_age); @@ -213,6 +215,11 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, (uint32_t)actor.datalen_max, (uint32_t)mdbx_limits_valsize_max(actor.pagesize, key_essentials.flags)); + if (!actor.keygen.zero_fill) { + key_essentials.flags |= essentials::prng_fill_flag; + value_essentials.flags |= essentials::prng_fill_flag; + } + (void)thread_number; mapping = actor.keygen; salt = (actor.keygen.seed + actor_id) * UINT64_C(14653293970879851569); @@ -298,6 +305,10 @@ void __hot maker::mk_begin(const serial_t serial, const essentials ¶ms, void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, result &out) { + static_assert((essentials::prng_fill_flag & + (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERKEY | + MDBX_INTEGERDUP | MDBX_REVERSEKEY | MDBX_REVERSEDUP)) == 0, + "WTF?"); out.value.iov_base = out.bytes; if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) { assert(params.maxlen == params.minlen); @@ -308,7 +319,11 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, out.u32 = (uint32_t)serial; } else if (params.flags & (MDBX_REVERSEKEY | MDBX_REVERSEDUP)) { if (out.value.iov_len > 8) { - memset(out.bytes, '\0', out.value.iov_len - 8); + if (params.flags & essentials::prng_fill_flag) { + uint64_t state = serial ^ UINT64_C(0x41803711c9b75f19); + prng_fill(state, out.bytes, out.value.iov_len - 8); + } else + memset(out.bytes, '\0', out.value.iov_len - 8); unaligned::store(out.bytes + out.value.iov_len - 8, htobe64(serial)); } else { out.u64 = htobe64(serial); @@ -317,8 +332,13 @@ void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, } } else { out.u64 = htole64(serial); - if (out.value.iov_len > 8) - memset(out.bytes + 8, '\0', out.value.iov_len - 8); + if (out.value.iov_len > 8) { + if (params.flags & essentials::prng_fill_flag) { + uint64_t state = serial ^ UINT64_C(0x923ab47b7ee6f6e4); + prng_fill(state, out.bytes + 8, out.value.iov_len - 8); + } else + memset(out.bytes + 8, '\0', out.value.iov_len - 8); + } } assert(out.value.iov_len >= params.minlen); diff --git a/libs/libmdbx/src/test/keygen.h b/libs/libmdbx/src/test/keygen.h index b5674f1ca6..c36cc1a2e7 100644 --- a/libs/libmdbx/src/test/keygen.h +++ b/libs/libmdbx/src/test/keygen.h @@ -23,9 +23,8 @@ namespace keygen { /* Под "генерацией ключей" здесь понимается генерация обоих значений для * пар key-value, т.е. не только ключей, но и ассоциированных с ними данных. - */ - -/* Генерацию ключей нельзя отнести к простым задачам, так как требования + * + * Генерацию ключей нельзя отнести к простым задачам, так как требования * примерно следующие: * - генерация разного количества уникальных ключей различной длины * в задаваемом диапазоне; @@ -67,7 +66,8 @@ namespace keygen { * 1) смещение (сложение) по модулю; * 2) циклический сдвиг; * 3) добавление абсолютного смещения (базы); - */ + * + * Также см. описание параметров генератора ключей и значений в config.h */ typedef uint64_t serial_t; @@ -103,13 +103,14 @@ buffer alloc(size_t limit); class maker { config::keygen_params_pod mapping; - serial_t base; - serial_t salt; + serial_t base{0}; + serial_t salt{0}; struct essentials { - uint16_t minlen; - uint16_t flags; - uint32_t maxlen; + uint16_t minlen{0}; + enum { prng_fill_flag = 1 }; + uint16_t flags{0}; + uint32_t maxlen{0}; } key_essentials, value_essentials; static void mk_begin(const serial_t serial, const essentials ¶ms, @@ -122,8 +123,6 @@ class maker { } public: - maker() { memset(this, 0, sizeof(*this)); } - void pair(serial_t serial, const buffer &key, buffer &value, serial_t value_age, const bool keylen_changeable); void setup(const config::actor_params_pod &actor, unsigned actor_id, diff --git a/libs/libmdbx/src/test/long_stochastic.sh b/libs/libmdbx/src/test/long_stochastic.sh index 58ec6d5c6e..7141ee62c5 100644 --- a/libs/libmdbx/src/test/long_stochastic.sh +++ b/libs/libmdbx/src/test/long_stochastic.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -if ! which make cc c++ tee lz4 >/dev/null; then - echo "Please install the following prerequisites: make cc c++ tee lz4" >&2 - exit 1 +if ! which make cc c++ tee lz4 banner >/dev/null; then + echo "Please install the following prerequisites: make cc c++ tee lz4 banner" >&2 + exit 1 fi set -euo pipefail @@ -19,72 +19,72 @@ UNAME="$(uname -s 2>/dev/null || echo Unknown)" # 1. clean data from prev runs and examine available RAM if [[ -v VALGRIND && ! -z "$VALGRIND" ]]; then - rm -f valgrind-*.log + rm -f valgrind-*.log else - VALGRIND=time + VALGRIND=time fi WANNA_MOUNT=0 case ${UNAME} in - Linux) - MAKE=make - if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then - for old_test_dir in $(ls -d /dev/shm/mdbx-test.[0-9]*); do - rm -rf $old_test_dir - done - TESTDB_DIR="/dev/shm/mdbx-test.$$" - fi - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - - if LC_ALL=C free | grep -q -i available; then - ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 7) / 1024)) - else - ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 4) / 1024)) - fi - ;; - - FreeBSD) - MAKE=gmake - if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then - for old_test_dir in $(ls -d /tmp/mdbx-test.[0-9]*); do - umount $old_test_dir && rm -r $old_test_dir - done - TESTDB_DIR="/tmp/mdbx-test.$$" - rm -rf $TESTDB_DIR && mkdir -p $TESTDB_DIR - WANNA_MOUNT=1 - else - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - fi - - ram_avail_mb=$(($(LC_ALL=C vmstat -s | grep -ie '[0-9] pages free$' | cut -d p -f 1) * ($(LC_ALL=C vmstat -s | grep -ie '[0-9] bytes per page$' | cut -d b -f 1) / 1024) / 1024)) - ;; - - Darwin) - MAKE=make - if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then - for vol in $(ls -d /Volumes/mdx[0-9]*[0-9]tst); do - disk=$(mount | grep $vol | cut -d ' ' -f 1) - echo "umount: volume $vol disk $disk" - hdiutil unmount $vol -force - hdiutil detach $disk - done - TESTDB_DIR="/Volumes/mdx$$tst" - WANNA_MOUNT=1 - else - mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* - fi - - pagesize=$(($(LC_ALL=C vm_stat | grep -o 'page size of [0-9]\+ bytes' | cut -d' ' -f 4) / 1024)) - freepages=$(LC_ALL=C vm_stat | grep '^Pages free:' | grep -o '[0-9]\+\.$' | cut -d'.' -f 1) - ram_avail_mb=$((pagesize * freepages / 1024)) - echo "pagesize ${pagesize}K, freepages ${freepages}, ram_avail_mb ${ram_avail_mb}" - - ;; - - *) - echo "FIXME: ${UNAME} not supported by this script" - exit 2 - ;; + Linux) + MAKE=make + if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then + for old_test_dir in $(ls -d /dev/shm/mdbx-test.[0-9]*); do + rm -rf $old_test_dir + done + TESTDB_DIR="/dev/shm/mdbx-test.$$" + fi + mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* + + if LC_ALL=C free | grep -q -i available; then + ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 7) / 1024)) + else + ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 4) / 1024)) + fi + ;; + + FreeBSD) + MAKE=gmake + if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then + for old_test_dir in $(ls -d /tmp/mdbx-test.[0-9]*); do + umount $old_test_dir && rm -r $old_test_dir + done + TESTDB_DIR="/tmp/mdbx-test.$$" + rm -rf $TESTDB_DIR && mkdir -p $TESTDB_DIR + WANNA_MOUNT=1 + else + mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* + fi + + ram_avail_mb=$(($(LC_ALL=C vmstat -s | grep -ie '[0-9] pages free$' | cut -d p -f 1) * ($(LC_ALL=C vmstat -s | grep -ie '[0-9] bytes per page$' | cut -d b -f 1) / 1024) / 1024)) + ;; + + Darwin) + MAKE=make + if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then + for vol in $(ls -d /Volumes/mdx[0-9]*[0-9]tst); do + disk=$(mount | grep $vol | cut -d ' ' -f 1) + echo "umount: volume $vol disk $disk" + hdiutil unmount $vol -force + hdiutil detach $disk + done + TESTDB_DIR="/Volumes/mdx$$tst" + WANNA_MOUNT=1 + else + mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/* + fi + + pagesize=$(($(LC_ALL=C vm_stat | grep -o 'page size of [0-9]\+ bytes' | cut -d' ' -f 4) / 1024)) + freepages=$(LC_ALL=C vm_stat | grep '^Pages free:' | grep -o '[0-9]\+\.$' | cut -d'.' -f 1) + ram_avail_mb=$((pagesize * freepages / 1024)) + echo "pagesize ${pagesize}K, freepages ${freepages}, ram_avail_mb ${ram_avail_mb}" + + ;; + + *) + echo "FIXME: ${UNAME} not supported by this script" + exit 2 + ;; esac ############################################################################### @@ -93,8 +93,8 @@ esac echo "=== ${ram_avail_mb}M RAM available" ram_reserve4logs_mb=1234 if [ $ram_avail_mb -lt $ram_reserve4logs_mb ]; then - echo "=== At least ${ram_reserve4logs_mb}Mb RAM required" - exit 3 + echo "=== At least ${ram_reserve4logs_mb}Mb RAM required" + exit 3 fi # @@ -116,36 +116,36 @@ fi # system immediately, as well some space is required for logs. # db_size_mb=$(((ram_avail_mb - ram_reserve4logs_mb) / 4)) -if [ $db_size_mb -gt 3072 ]; then - db_size_mb=3072 +if [ $db_size_mb -gt 17408 ]; then + db_size_mb=17408 fi echo "=== use ${db_size_mb}M for DB" ############################################################################### # 3. Create test-directory in ramfs/tmpfs, i.e. create/format/mount if required case ${UNAME} in - Linux) - ;; - - FreeBSD) - if [[ WANNA_MOUNT ]]; then - mount -t tmpfs tmpfs $TESTDB_DIR - fi - ;; - - Darwin) - if [[ WANNA_MOUNT ]]; then - ramdisk_size_mb=$((42 + db_size_mb * 2 + ram_reserve4logs_mb)) - number_of_sectors=$((ramdisk_size_mb * 2048)) - ramdev=$(hdiutil attach -nomount ram://${number_of_sectors}) - diskutil erasevolume ExFAT "mdx$$tst" ${ramdev} - fi - ;; - - *) - echo "FIXME: ${UNAME} not supported by this script" - exit 2 - ;; + Linux) + ;; + + FreeBSD) + if [[ WANNA_MOUNT ]]; then + mount -t tmpfs tmpfs $TESTDB_DIR + fi + ;; + + Darwin) + if [[ WANNA_MOUNT ]]; then + ramdisk_size_mb=$((42 + db_size_mb * 2 + ram_reserve4logs_mb)) + number_of_sectors=$((ramdisk_size_mb * 2048)) + ramdev=$(hdiutil attach -nomount ram://${number_of_sectors}) + diskutil erasevolume ExFAT "mdx$$tst" ${ramdev} + fi + ;; + + *) + echo "FIXME: ${UNAME} not supported by this script" + exit 2 + ;; esac ############################################################################### @@ -157,64 +157,115 @@ rm -f ${TESTDB_DIR}/* ############################################################################### # 5. run stochastic iterations -function rep9 { printf "%*s" $1 '' | tr ' ' '9'; } function join { local IFS="$1"; shift; echo "$*"; } function bit2option { local -n arr=$1; (( ($2&(1<<$3)) != 0 )) && echo -n '+' || echo -n '-'; echo "${arr[$3]}"; } options=(writemap coalesce lifo notls) function bits2list { - local -n arr=$1 - local i - local list=() - for ((i=0; i<${#arr[@]}; ++i)) do - list[$i]=$(bit2option $1 $2 $i) - done - join , "${list[@]}" + local -n arr=$1 + local i + local list=() + for ((i=0; i<${#arr[@]}; ++i)) do + list[$i]=$(bit2option $1 $2 $i) + done + join , "${list[@]}" } function probe { - echo "=============================================== $(date)" - echo "${caption}: $*" - rm -f ${TESTDB_DIR}/* \ - && ${VALGRIND} ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_DIR}/long.db "$@" | lz4 > ${TESTDB_DIR}/long.log.lz4 \ - && ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ - && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ - || (echo "FAILED"; exit 1) + echo "----------------------------------------------- $(date)" + echo "${caption}: $*" + rm -f ${TESTDB_DIR}/* \ + && ${VALGRIND} ./mdbx_test ${speculum} --ignore-dbfull --repeat=3 --pathname=${TESTDB_DIR}/long.db --cleanup-after=no "$@" \ + | tee >(lz4 > ${TESTDB_DIR}/long.log.lz4) | grep -e reach -e achieve \ + && ${VALGRIND} ./mdbx_chk ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \ + && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${VALGRIND} ./mdbx_chk ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \ + || (echo "FAILED"; exit 1) } #------------------------------------------------------------------------------ count=0 -for nops in $(seq 2 6); do - for ((wbatch=nops-1; wbatch > 0; --wbatch)); do - loops=$(((111 >> nops) / nops + 3)) - for ((rep=0; rep++ < loops; )); do - for ((bits=2**${#options[@]}; --bits >= 0; )); do - seed=$(($(date +%s) + RANDOM)) - caption="Probe #$((++count)) int-key,w/o-dups, repeat ${rep} of ${loops}" probe \ - --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \ - --keygen.seed=${seed} basic - caption="Probe #$((++count)) int-key,with-dups, repeat ${rep} of ${loops}" probe \ - --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \ - --keygen.seed=${seed} basic - caption="Probe #$((++count)) int-key,int-data, repeat ${rep} of ${loops}" probe \ - --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \ - --keygen.seed=${seed} basic - caption="Probe #$((++count)) w/o-dups, repeat ${rep} of ${loops}" probe \ - --pagesize=min --size-upper=${db_size_mb}M --table=-data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \ - --keygen.seed=${seed} basic - caption="Probe #$((++count)) with-dups, repeat ${rep} of ${loops}" probe \ - --pagesize=min --size-upper=${db_size_mb}M --table=+data.dups --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$( rep9 $nops ) --batch.write=$( rep9 $wbatch ) --mode=$(bits2list options $bits) \ - --keygen.seed=${seed} basic - done - done - done -done +cases='?' +for nops in 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do + echo "=======================================================================" + wbatch=$((nops / 10 + 1)) + speculum=$([ $nops -le 1000 ] && echo '--speculum' || true) + while true; do + echo "=======================================================================" + banner "$nops / $wbatch" + subcase=0 + for ((bits=2**${#options[@]}; --bits >= 0; )); do + seed=$(($(date +%s) + RANDOM)) + + split=30 + caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + + split=24 + caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + + split=16 + caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + + split=4 + caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --pagesize=min --size-upper=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2list options $bits) \ + --keygen.seed=${seed} basic + done # options + cases="${subcase}" + wbatch=$(((wbatch > 9) ? wbatch / 10 : 1)) + if [ $wbatch -eq 1 -o $((nops / wbatch)) -gt 1000 ]; then break; fi + done # batch (write-ops per txn) +done # n-ops echo "=== ALL DONE ====================== $(date)" diff --git a/libs/libmdbx/src/test/main.cc b/libs/libmdbx/src/test/main.cc index 656d7c52f9..c6b15ea29b 100644 --- a/libs/libmdbx/src/test/main.cc +++ b/libs/libmdbx/src/test/main.cc @@ -81,6 +81,7 @@ void __noreturn usage(void) { " --keygen.width=N TBD (see the source code)\n" " --keygen.mesh=N TBD (see the source code)\n" " --keygen.seed=N TBD (see the source code)\n" + " --keygen.zerofill=yes|NO TBD (see the source code)\n" " --keygen.split=N TBD (see the source code)\n" " --keygen.rotate=N TBD (see the source code)\n" " --keygen.offset=N TBD (see the source code)\n" @@ -136,6 +137,7 @@ void actor_params::set_defaults(const std::string &tmpdir) { pagesize = -1; keygen.seed = 1; + keygen.zero_fill = false; keygen.keycase = kc_random; keygen.width = (table_flags & MDBX_DUPSORT) ? 32 : 64; keygen.mesh = keygen.width; @@ -209,7 +211,11 @@ std::string thunk_param(const actor_config &config) { void cleanup() { log_trace(">> cleanup"); - /* TODO: remove each database */ + for (const auto &db_path : global::databases) { + int err = osal_removefile(db_path); + if (err != MDBX_SUCCESS && err != MDBX_ENOFILE) + failure_perror(db_path.c_str(), err); + } log_trace("<< cleanup"); } @@ -317,19 +323,22 @@ int main(int argc, char *const argv[]) { continue; if (config::parse_option(argc, argv, narg, "keygen.width", - params.keygen.width, 1, 64)) + params.keygen.width, 8, 64)) continue; if (config::parse_option(argc, argv, narg, "keygen.mesh", - params.keygen.mesh, 1, 64)) + params.keygen.mesh, 0, 64)) continue; if (config::parse_option(argc, argv, narg, "keygen.seed", params.keygen.seed, config::no_scale)) continue; + if (config::parse_option(argc, argv, narg, "keygen.zerofill", + params.keygen.zero_fill)) + continue; if (config::parse_option(argc, argv, narg, "keygen.split", - params.keygen.split, 1, 64)) + params.keygen.split, 0, 63)) continue; if (config::parse_option(argc, argv, narg, "keygen.rotate", - params.keygen.rotate, 1, 64)) + params.keygen.rotate, 0, 63)) continue; if (config::parse_option(argc, argv, narg, "keygen.offset", params.keygen.offset, config::binary)) @@ -587,7 +596,7 @@ int main(int argc, char *const argv[]) { } log_notice("RESULT: %s\n", failed ? "Failed" : "Successful"); - if (global::config::cleanup_before) { + if (global::config::cleanup_after) { if (failed) log_verbose("skip cleanup"); else diff --git a/libs/libmdbx/src/test/nested.cc b/libs/libmdbx/src/test/nested.cc index beebd88071..85df6fa62b 100644 --- a/libs/libmdbx/src/test/nested.cc +++ b/libs/libmdbx/src/test/nested.cc @@ -15,6 +15,25 @@ #include "test.h" #include <cmath> +/* LY: тест "эмуляцией time-to-live" с вложенными транзакциями: + * - организуется "скользящее окно", которое каждую транзакцию сдвигается + * вперед вдоль числовой оси. + * - по переднему краю "скользящего окна" записи добавляются в таблицу, + * а по заднему удаляются. + * - количество добавляемых/удаляемых записей псевдослучайно зависит + * от номера транзакции, но с экспоненциальным распределением. + * - размер "скользящего окна" также псевдослучайно зависит от номера + * транзакции с "отрицательным" экспоненциальным распределением + * MAX_WIDTH - exp(rnd(N)), при уменьшении окна сдвигается задний + * край и удаляются записи позади него. + * - групповое добавление данных в начало окна и групповое удаление в конце, + * преимущественно выполняются во вложенных транзакциях. + * - меньшая часть запускаемых вложенных транзакций отменяется, с последующим + * продолжением итераций с состояния предыдущиего коммита. + * + * Таким образом имитируется поведение таблицы с TTL: записи стохастически + * добавляются и удаляются, и изредка происходят массивные удаления. */ + bool testcase_nested::setup() { if (!inherited::setup()) return false; @@ -56,18 +75,6 @@ bool testcase_nested::teardown() { return inherited::teardown() && ok; } -static unsigned edge2window(uint64_t edge, unsigned window_max) { - const double rnd = u64_to_double1(bleach64(edge)); - const unsigned window = window_max - std::lrint(std::pow(window_max, rnd)); - return window; -} - -static unsigned edge2count(uint64_t edge, unsigned count_max) { - const double rnd = u64_to_double1(prng64_map1_white(edge)); - const unsigned count = std::lrint(std::pow(count_max, rnd)); - return count; -} - void testcase_nested::push_txn() { MDBX_txn *txn; unsigned flags = @@ -75,11 +82,7 @@ void testcase_nested::push_txn() { int err = mdbx_txn_begin(db_guard.get(), txn_guard.get(), flags, &txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin(nested)", err); -#if __cplusplus >= 201703L - stack.emplace(txn, serial, fifo, speculum); -#else - stack.push(std::make_tuple(scoped_txn_guard(txn), serial, fifo, speculum)); -#endif + stack.emplace(scoped_txn_guard(txn), serial, fifo, speculum); std::swap(txn_guard, std::get<0>(stack.top())); log_verbose("begin level#%zu txn #%" PRIu64 ", flags 0x%x, serial %" PRIu64, stack.size(), mdbx_txn_id(txn), flags, serial); @@ -150,7 +153,8 @@ bool testcase_nested::stochastic_breakable_restart_with_nested( } bool testcase_nested::trim_tail(unsigned window_width) { - if (window_width) { + if (window_width || flipcoin()) { + clear_stepbystep_passed += window_width == 0; while (fifo.size() > window_width) { uint64_t tail_serial = fifo.back().first; const unsigned tail_count = fifo.back().second; @@ -171,6 +175,7 @@ bool testcase_nested::trim_tail(unsigned window_width) { if (unlikely(!keyvalue_maker.increment(tail_serial, 1))) failure("nested: unexpected key-space overflow on the tail"); } + report(tail_count); } } else if (!fifo.empty()) { log_verbose("nested: purge state %" PRIu64 " - %" PRIu64 ", fifo-items %zu", @@ -178,7 +183,8 @@ bool testcase_nested::trim_tail(unsigned window_width) { fifo.size()); db_table_clear(dbi, txn_guard.get()); fifo.clear(); - speculum.clear(); + clear_wholetable_passed += 1; + report(1); } return true; } @@ -198,6 +204,7 @@ retry: log_notice("nested: head-insert skip due '%s'", mdbx_strerror(err)); head_count = n; stochastic_breakable_restart_with_nested(true); + dbfull_passed += 1; goto retry; } failure_perror("mdbx_put(head)", err); @@ -205,7 +212,10 @@ retry: if (unlikely(!keyvalue_maker.increment(serial, 1))) { log_notice("nested: unexpected key-space overflow"); - return false; + keyspace_overflow = true; + head_count = n; + stochastic_breakable_restart_with_nested(true); + goto retry; } } @@ -213,48 +223,19 @@ retry: } bool testcase_nested::run() { - /* LY: тест "эмуляцией time-to-live" с вложенными транзакциями: - * - организуется "скользящее окно", которое каждую транзакцию сдвигается - * вперед вдоль числовой оси. - * - по переднему краю "скользящего окна" записи добавляются в таблицу, - * а по заднему удаляются. - * - количество добавляемых/удаляемых записей псевдослучайно зависит - * от номера транзакции, но с экспоненциальным распределением. - * - размер "скользящего окна" также псевдослучайно зависит от номера - * транзакции с "отрицательным" экспоненциальным распределением - * MAX_WIDTH - exp(rnd(N)), при уменьшении окна сдвигается задний - * край и удаляются записи позади него. - * - групповое добавление данных в начало окна и групповое уделение в конце, - * в половине случаев выполняются во вложенных транзакциях. - * - половина запускаемых вложенных транзакций отменяется, последуюим - * повтором групповой операции. - * - * Таким образом имитируется поведение таблицы с TTL: записи стохастически - * добавляются и удаляются, но изредка происходят массивные удаления. */ - - /* LY: для параметризации используем подходящие параметры, которые не имеют - * здесь смысла в первоначальном значении. */ - const unsigned window_max_lower = 333; - const unsigned count_max_lower = 333; - - const unsigned window_max = (config.params.batch_read > window_max_lower) - ? config.params.batch_read - : window_max_lower; - const unsigned count_max = (config.params.batch_write > count_max_lower) - ? config.params.batch_write - : count_max_lower; - log_verbose("nested: using `batch_read` value %u for window_max", window_max); - log_verbose("nested: using `batch_write` value %u for count_max", count_max); - uint64_t seed = prng64_map2_white(config.params.keygen.seed) + config.actor_id; - while (should_continue()) { + clear_wholetable_passed = 0; + clear_stepbystep_passed = 0; + dbfull_passed = 0; + unsigned loops = 0; + while (true) { const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */; const unsigned window_width = - flipcoin_x4() ? 0 : edge2window(salt, window_max); - const unsigned head_count = edge2count(salt, count_max); - log_debug("nested: step #%zu (serial %" PRIu64 + (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); + const unsigned head_count = edge2count(salt); + log_debug("nested: step #%" PRIu64 " (serial %" PRIu64 ", window %u, count %u) salt %" PRIu64, nops_completed, serial, window_width, head_count, salt); @@ -269,16 +250,33 @@ bool testcase_nested::run() { return false; } - if (!grow_head(head_count)) - return false; - if (!stochastic_breakable_restart_with_nested()) - log_notice("nested: skip commit/restart after head-grow"); - if (!speculum_verify()) { - log_notice("nested: bailout after head-grow"); - return false; + if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || + !clear_stepbystep_passed)) { + unsigned underutilization_x256 = + txn_underutilization_x256(txn_guard.get()); + if (dbfull_passed > underutilization_x256) { + log_notice("nested: skip head-grow to avoid one more dbfull (was %u, " + "underutilization %.2f%%)", + dbfull_passed, underutilization_x256 / 2.560); + continue; + } + if (!grow_head(head_count)) + return false; + if (!stochastic_breakable_restart_with_nested()) + log_notice("nested: skip commit/restart after head-grow"); + if (!speculum_verify()) { + log_notice("nested: bailout after head-grow"); + return false; + } + loops += 1; + } else if (fifo.empty()) { + log_notice("nested: done %u whole loops, %" PRIu64 " ops, %" PRIu64 + " items", + loops, nops_completed, serial); + break; + } else { + log_notice("nested: done, wait for empty, skip head-grow"); } - - report(1); } while (!stack.empty()) diff --git a/libs/libmdbx/src/test/osal-unix.cc b/libs/libmdbx/src/test/osal-unix.cc index dc0774063a..757c8583c0 100644 --- a/libs/libmdbx/src/test/osal-unix.cc +++ b/libs/libmdbx/src/test/osal-unix.cc @@ -517,9 +517,9 @@ std::string osal_tempdir(void) { tempdir = getenv("TEMPDIR"); if (!tempdir) tempdir = getenv("TEMP"); - if (tempdir) { + if (tempdir && *tempdir) { std::string dir(tempdir); - if (!dir.empty() && dir.at(dir.length() - 1) != '/') + if (dir.back() != '/') dir.append("/"); return dir; } diff --git a/libs/libmdbx/src/test/test.cc b/libs/libmdbx/src/test/test.cc index 9da2cf2fff..beeba7abab 100644 --- a/libs/libmdbx/src/test/test.cc +++ b/libs/libmdbx/src/test/test.cc @@ -172,28 +172,29 @@ void testcase::txn_begin(bool readonly, unsigned flags) { if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_txn_begin()", rc); txn_guard.reset(txn); + need_speculum_assign = config.params.speculum && !readonly; log_trace("<< txn_begin(%s, 0x%04X)", readonly ? "read-only" : "read-write", flags); } int testcase::breakable_commit() { - int rc = MDBX_SUCCESS; log_trace(">> txn_commit"); assert(txn_guard); MDBX_txn *txn = txn_guard.release(); txn_inject_writefault(txn); - int err = mdbx_txn_commit(txn); - if (unlikely(err != MDBX_SUCCESS)) { - if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - rc = err; - err = mdbx_txn_abort(txn); - if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH && - err != MDBX_BAD_TXN)) - failure_perror("mdbx_txn_abort()", err); - } else - failure_perror("mdbx_txn_commit()", err); + int rc = mdbx_txn_commit(txn); + if (unlikely(rc != MDBX_SUCCESS) && + (rc != MDBX_MAP_FULL || !config.params.ignore_dbfull)) + failure_perror("mdbx_txn_commit()", rc); + + if (need_speculum_assign) { + need_speculum_assign = false; + if (unlikely(rc != MDBX_SUCCESS)) + speculum = speculum_commited; + else + speculum_commited = speculum; } log_trace("<< txn_commit: %s", rc ? "failed" : "Ok"); @@ -221,14 +222,17 @@ void testcase::txn_end(bool abort) { MDBX_txn *txn = txn_guard.release(); if (abort) { int err = mdbx_txn_abort(txn); - if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH && - err != MDBX_BAD_TXN)) + if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_abort()", err); + if (need_speculum_assign) + speculum = speculum_commited; } else { txn_inject_writefault(txn); int err = mdbx_txn_commit(txn); if (unlikely(err != MDBX_SUCCESS)) failure_perror("mdbx_txn_commit()", err); + if (need_speculum_assign) + speculum_commited = speculum; } log_trace("<< txn_end(%s)", abort ? "abort" : "commit"); @@ -336,18 +340,18 @@ void testcase::report(size_t nops_done) { return; nops_completed += nops_done; - log_debug("== complete +%" PRIuPTR " iteration, total %" PRIuPTR " done", + log_debug("== complete +%" PRIuPTR " iteration, total %" PRIu64 " done", nops_done, nops_completed); kick_progress(true); if (config.signal_nops && !signalled && config.signal_nops <= nops_completed) { - log_trace(">> signal(n-ops %" PRIuPTR ")", nops_completed); + log_trace(">> signal(n-ops %" PRIu64 ")", nops_completed); if (!global::singlemode) osal_broadcast(config.actor_id); signalled = true; - log_trace("<< signal(n-ops %" PRIuPTR ")", nops_completed); + log_trace("<< signal(n-ops %" PRIu64 ")", nops_completed); } } @@ -491,6 +495,7 @@ void testcase::db_table_drop(MDBX_dbi handle) { int rc = mdbx_drop(txn_guard.get(), handle, true); if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_drop(delete=true)", rc); + speculum.clear(); log_trace("<< testcase::db_table_drop"); } else { log_trace("<< testcase::db_table_drop: not needed"); @@ -502,6 +507,7 @@ void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) { int rc = mdbx_drop(txn ? txn : txn_guard.get(), handle, false); if (unlikely(rc != MDBX_SUCCESS)) failure_perror("mdbx_drop(delete=false)", rc); + speculum.clear(); log_trace("<< testcase::db_table_clear"); } diff --git a/libs/libmdbx/src/test/test.h b/libs/libmdbx/src/test/test.h index 8837cb598b..8ecf5c46fe 100644 --- a/libs/libmdbx/src/test/test.h +++ b/libs/libmdbx/src/test/test.h @@ -104,7 +104,9 @@ protected: using data_view = std::string; #endif static inline data_view S(const MDBX_val &v) { - return data_view(static_cast<const char *>(v.iov_base), v.iov_len); + return (v.iov_base && v.iov_len) + ? data_view(static_cast<const char *>(v.iov_base), v.iov_len) + : data_view(); } static inline data_view S(const keygen::buffer &b) { return S(b->value); } @@ -131,18 +133,22 @@ protected: return cmp < 0; } }; + + // for simplify the set<pair<key,value>> + // is used instead of multimap<key,value> using SET = std::set<Item, ItemCompare>; const actor_config &config; const mdbx_pid_t pid; - MDBX_dbi dbi; + MDBX_dbi dbi{0}; scoped_db_guard db_guard; scoped_txn_guard txn_guard; scoped_cursor_guard cursor_guard; - bool signalled; + bool signalled{false}; + bool need_speculum_assign{false}; - size_t nops_completed; + uint64_t nops_completed{0}; chrono::time start_timestamp; keygen::buffer key; keygen::buffer data; @@ -152,7 +158,7 @@ protected: mdbx_canary canary; } last; - SET speculum; + SET speculum{ItemCompare(this)}, speculum_commited{ItemCompare(this)}; bool speculum_verify(); int insert(const keygen::buffer &akey, const keygen::buffer &adata, unsigned flags); @@ -211,8 +217,7 @@ protected: public: testcase(const actor_config &config, const mdbx_pid_t pid) - : config(config), pid(pid), signalled(false), nops_completed(0), - speculum(ItemCompare(this)) { + : config(config), pid(pid) { start_timestamp.reset(); memset(&last, 0, sizeof(last)); } @@ -223,20 +228,10 @@ public: virtual ~testcase() {} }; -class testcase_ttl : public testcase { -public: - testcase_ttl(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} - bool run() override; -}; - class testcase_hill : public testcase { - using inherited = testcase; - SET speculum_commited; - public: testcase_hill(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid), speculum_commited(ItemCompare(this)) {} + : testcase(config, pid) {} bool run() override; }; @@ -286,11 +281,33 @@ public: bool run() override; }; -class testcase_nested : public testcase { +class testcase_ttl : public testcase { using inherited = testcase; + +protected: + struct { + unsigned max_window_size{0}; + unsigned max_step_size{0}; + } sliding; + unsigned edge2window(uint64_t edge); + unsigned edge2count(uint64_t edge); + +public: + testcase_ttl(const actor_config &config, const mdbx_pid_t pid) + : inherited(config, pid) {} + bool setup() override; + bool run() override; +}; + +class testcase_nested : public testcase_ttl { + using inherited = testcase_ttl; using FIFO = std::deque<std::pair<uint64_t, unsigned>>; - uint64_t serial; + uint64_t serial{0}; + unsigned clear_wholetable_passed{0}; + unsigned clear_stepbystep_passed{0}; + unsigned dbfull_passed{0}; + bool keyspace_overflow{false}; FIFO fifo; std::stack<std::tuple<scoped_txn_guard, uint64_t, FIFO, SET>> stack; @@ -306,7 +323,7 @@ class testcase_nested : public testcase { public: testcase_nested(const actor_config &config, const mdbx_pid_t pid) - : testcase(config, pid) {} + : inherited(config, pid) {} bool setup() override; bool run() override; bool teardown() override; diff --git a/libs/libmdbx/src/test/ttl.cc b/libs/libmdbx/src/test/ttl.cc index 92e99b82f6..e3927d9cd4 100644 --- a/libs/libmdbx/src/test/ttl.cc +++ b/libs/libmdbx/src/test/ttl.cc @@ -16,18 +16,99 @@ #include <cmath> #include <deque> -static unsigned edge2window(uint64_t edge, unsigned window_max) { - const double rnd = u64_to_double1(bleach64(edge)); - const unsigned window = window_max - std::lrint(std::pow(window_max, rnd)); - return window; -} +/* LY: тест "эмуляцией time-to-live": + * - организуется "скользящее окно", которое двигается вперед вдоль + * числовой оси каждую транзакцию. + * - по переднему краю "скользящего окна" записи добавляются в таблицу, + * а по заднему удаляются. + * - количество добавляемых/удаляемых записей псевдослучайно зависит + * от номера транзакции, но с экспоненциальным распределением. + * - размер "скользящего окна" также псевдослучайно зависит от номера + * транзакции с "отрицательным" экспоненциальным распределением + * MAX_WIDTH - exp(rnd(N)), при уменьшении окна сдвигается задний + * край и удаляются записи позади него. + * + * Таким образом имитируется поведение таблицы с TTL: записи стохастически + * добавляются и удаляются, но изредка происходит массивное удаление. + */ -static unsigned edge2count(uint64_t edge, unsigned count_max) { +unsigned testcase_ttl::edge2count(uint64_t edge) { const double rnd = u64_to_double1(prng64_map1_white(edge)); - const unsigned count = std::lrint(std::pow(count_max, rnd)); + const unsigned count = std::lrint(std::pow(sliding.max_step_size, rnd)); + // average value: (X - 1) / ln(X), where X = sliding.max_step_size return count; } +unsigned testcase_ttl::edge2window(uint64_t edge) { + const double rnd = u64_to_double1(bleach64(edge)); + const unsigned size = sliding.max_window_size - + std::lrint(std::pow(sliding.max_window_size, rnd)); + // average value: Y - (Y - 1) / ln(Y), where Y = sliding.max_window_size + return size; +} + +static inline double estimate(const double x, const double y) { + /* среднее кол-во операций N = X' * Y', где X' и Y' средние значения + * размера окна и кол-ва добавляемых за один шаг записей: + * X' = (X - 1) / ln(X), где X = sliding.max_step_size + * Y' = Y - (Y - 1) / ln(Y), где Y = sliding.max_window_size */ + return (x - 1) / std::log(x) * (y - (y - 1) / std::log(y)); +} + +bool testcase_ttl::setup() { + const unsigned window_top_lower = + 7 /* нижний предел для верхней границы диапазона, в котором будет + стохастически колебаться размер окна */ + ; + const unsigned count_top_lower = + 7 /* нижний предел для верхней границы диапазона, в котором будет + стохастически колебаться кол-во записей добавляемых на одном шаге */ + ; + + /* для параметризации используем подходящие параметры, + * которые не имеют здесь смысла в первоначальном значении. */ + const double ratio = + double(config.params.batch_read ? config.params.batch_read : 1) / + double(config.params.batch_write ? config.params.batch_write : 1); + + /* проще найти двоичным поиском (вариация метода Ньютона) */ + double hi = config.params.test_nops, lo = 1; + double x = std::sqrt(hi + lo) / ratio; + while (hi > lo) { + const double n = estimate(x, x * ratio); + if (n > config.params.test_nops) + hi = x - 1; + else + lo = x + 1; + x = (hi + lo) / 2; + } + + sliding.max_step_size = std::lrint(x); + if (sliding.max_step_size < count_top_lower) + sliding.max_step_size = count_top_lower; + sliding.max_window_size = std::lrint(x * ratio); + if (sliding.max_window_size < window_top_lower) + sliding.max_window_size = window_top_lower; + + while (estimate(sliding.max_step_size, sliding.max_window_size) > + config.params.test_nops * 2.0) { + if (ratio * sliding.max_step_size > sliding.max_window_size) { + if (sliding.max_step_size < count_top_lower) + break; + sliding.max_step_size = sliding.max_step_size * 7 / 8; + } else { + if (sliding.max_window_size < window_top_lower) + break; + sliding.max_window_size = sliding.max_window_size * 7 / 8; + } + } + + log_verbose("come up window_max %u from `batch_read`", + sliding.max_window_size); + log_verbose("come up step_max %u from `batch_write`", sliding.max_step_size); + return inherited::setup(); +} + bool testcase_ttl::run() { int err = db_open__begin__table_create_open_clean(dbi); if (unlikely(err != MDBX_SUCCESS)) { @@ -35,36 +116,6 @@ bool testcase_ttl::run() { return false; } - /* LY: тест "эмуляцией time-to-live": - * - организуется "скользящее окно", которое двигается вперед вдоль - * числовой оси каждую транзакцию. - * - по переднему краю "скользящего окна" записи добавляются в таблицу, - * а по заднему удаляются. - * - количество добавляемых/удаляемых записей псевдослучайно зависит - * от номера транзакции, но с экспоненциальным распределением. - * - размер "скользящего окна" также псевдослучайно зависит от номера - * транзакции с "отрицательным" экспоненциальным распределением - * MAX_WIDTH - exp(rnd(N)), при уменьшении окна сдвигается задний - * край и удаляются записи позади него. - * - * Таким образом имитируется поведение таблицы с TTL: записи стохастически - * добавляются и удаляются, но изредка происходят массивные удаления. - */ - - /* LY: для параметризации используем подходящие параметры, которые не имеют - * здесь смысла в первоначальном значении. */ - const unsigned window_max_lower = 333; - const unsigned count_max_lower = 333; - - const unsigned window_max = (config.params.batch_read > window_max_lower) - ? config.params.batch_read - : window_max_lower; - const unsigned count_max = (config.params.batch_write > count_max_lower) - ? config.params.batch_write - : count_max_lower; - log_verbose("ttl: using `batch_read` value %u for window_max", window_max); - log_verbose("ttl: using `batch_write` value %u for count_max", count_max); - uint64_t seed = prng64_map2_white(config.params.keygen.seed) + config.actor_id; keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); @@ -77,17 +128,23 @@ bool testcase_ttl::run() { std::deque<std::pair<uint64_t, unsigned>> fifo; uint64_t serial = 0; bool rc = false; - while (should_continue()) { + unsigned clear_wholetable_passed = 0; + unsigned clear_stepbystep_passed = 0; + unsigned dbfull_passed = 0; + unsigned loops = 0; + bool keyspace_overflow = false; + while (true) { const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */; const unsigned window_width = - flipcoin_x4() ? 0 : edge2window(salt, window_max); - unsigned head_count = edge2count(salt, count_max); - log_debug("ttl: step #%zu (serial %" PRIu64 + (!should_continue() || flipcoin_x4()) ? 0 : edge2window(salt); + unsigned head_count = edge2count(salt); + log_debug("ttl: step #%" PRIu64 " (serial %" PRIu64 ", window %u, count %u) salt %" PRIu64, nops_completed, serial, window_width, head_count, salt); - if (window_width) { + if (window_width || flipcoin()) { + clear_stepbystep_passed += window_width == 0; while (fifo.size() > window_width) { uint64_t tail_serial = fifo.back().first; const unsigned tail_count = fifo.back().second; @@ -97,7 +154,7 @@ bool testcase_ttl::run() { for (unsigned n = 0; n < tail_count; ++n) { log_trace("ttl: remove-tail %" PRIu64, tail_serial); generate_pair(tail_serial); - err = mdbx_del(txn_guard.get(), dbi, &key->value, &data->value); + err = remove(key, data); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { log_notice("ttl: tail-bailout due '%s'", mdbx_strerror(err)); @@ -108,11 +165,14 @@ bool testcase_ttl::run() { if (unlikely(!keyvalue_maker.increment(tail_serial, 1))) failure("ttl: unexpected key-space overflow on the tail"); } + report(tail_count); } } else { log_trace("ttl: purge state"); db_table_clear(dbi); fifo.clear(); + clear_wholetable_passed += 1; + report(1); } err = breakable_restart(); @@ -120,38 +180,68 @@ bool testcase_ttl::run() { log_notice("ttl: bailout at commit due '%s'", mdbx_strerror(err)); break; } - fifo.push_front(std::make_pair(serial, head_count)); - retry: - for (unsigned n = 0; n < head_count; ++n) { - log_trace("ttl: insert-head %" PRIu64, serial); - generate_pair(serial); - err = mdbx_put(txn_guard.get(), dbi, &key->value, &data->value, - insert_flags); - if (unlikely(err != MDBX_SUCCESS)) { - if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) { - log_notice("ttl: head-insert skip due '%s'", mdbx_strerror(err)); + if (!speculum_verify()) { + log_notice("ttl: bailout after tail-trim"); + return false; + } + + if (!keyspace_overflow && (should_continue() || !clear_wholetable_passed || + !clear_stepbystep_passed)) { + unsigned underutilization_x256 = + txn_underutilization_x256(txn_guard.get()); + if (dbfull_passed > underutilization_x256) { + log_notice("ttl: skip head-grow to avoid one more dbfull (was %u, " + "underutilization %.2f%%)", + dbfull_passed, underutilization_x256 / 2.560); + continue; + } + fifo.push_front(std::make_pair(serial, head_count)); + retry: + for (unsigned n = 0; n < head_count; ++n) { + log_trace("ttl: insert-head %" PRIu64, serial); + generate_pair(serial); + err = insert(key, data, insert_flags); + if (unlikely(err != MDBX_SUCCESS)) { + if ((err == MDBX_TXN_FULL || err == MDBX_MAP_FULL) && + config.params.ignore_dbfull) { + log_notice("ttl: head-insert skip due '%s'", mdbx_strerror(err)); + txn_restart(true, false); + serial = fifo.front().first; + fifo.front().second = head_count = n; + dbfull_passed += 1; + goto retry; + } + failure_perror("mdbx_put(head)", err); + } + + if (unlikely(!keyvalue_maker.increment(serial, 1))) { + log_notice("ttl: unexpected key-space overflow"); + keyspace_overflow = true; txn_restart(true, false); serial = fifo.front().first; fifo.front().second = head_count = n; goto retry; } - failure_perror("mdbx_put(head)", err); } - - if (unlikely(!keyvalue_maker.increment(serial, 1))) { - log_notice("ttl: unexpected key-space overflow"); - goto bailout; + err = breakable_restart(); + if (unlikely(err != MDBX_SUCCESS)) { + log_notice("ttl: head-commit skip due '%s'", mdbx_strerror(err)); + serial = fifo.front().first; + fifo.pop_front(); } + if (!speculum_verify()) { + log_notice("ttl: bailout after head-grow"); + return false; + } + loops += 1; + } else if (fifo.empty()) { + log_notice("ttl: done %u whole loops, %" PRIu64 " ops, %" PRIu64 " items", + loops, nops_completed, serial); + rc = true; + break; + } else { + log_notice("ttl: done, wait for empty, skip head-grow"); } - err = breakable_restart(); - if (unlikely(err != MDBX_SUCCESS)) { - log_notice("ttl: head-commit skip due '%s'", mdbx_strerror(err)); - serial = fifo.front().first; - fifo.pop_front(); - } - - report(1); - rc = true; } bailout: diff --git a/libs/libmdbx/src/test/utils.cc b/libs/libmdbx/src/test/utils.cc index 596e66e3f5..051671ff87 100644 --- a/libs/libmdbx/src/test/utils.cc +++ b/libs/libmdbx/src/test/utils.cc @@ -265,22 +265,24 @@ uint32_t prng32(uint64_t &state) { } void prng_fill(uint64_t &state, void *ptr, size_t bytes) { + uint32_t u32 = prng32(state); + while (bytes >= 4) { - *((uint32_t *)ptr) = prng32(state); + memcpy(ptr, &u32, 4); ptr = (uint32_t *)ptr + 1; bytes -= 4; + u32 = prng32(state); } switch (bytes & 3) { - case 3: { - uint32_t u32 = prng32(state); + case 3: memcpy(ptr, &u32, 3); - } break; + break; case 2: - *((uint16_t *)ptr) = (uint16_t)prng32(state); + memcpy(ptr, &u32, 2); break; case 1: - *((uint8_t *)ptr) = (uint8_t)prng32(state); + memcpy(ptr, &u32, 1); break; case 0: break; diff --git a/libs/libmdbx/src/test/utils.h b/libs/libmdbx/src/test/utils.h index a5061c1444..9e6d4627aa 100644 --- a/libs/libmdbx/src/test/utils.h +++ b/libs/libmdbx/src/test/utils.h @@ -243,9 +243,9 @@ static __inline void cpu_relax() { //----------------------------------------------------------------------------- struct simple_checksum { - uint64_t value; + uint64_t value{0}; - simple_checksum() : value(0) {} + simple_checksum() = default; void push(const uint32_t &data) { value += data * UINT64_C(9386433910765580089) + 1; diff --git a/libs/libmdbx/src/test/valgrind_suppress.txt b/libs/libmdbx/src/test/valgrind_suppress.txt index 98309ceb4f..e1e152051d 100644 --- a/libs/libmdbx/src/test/valgrind_suppress.txt +++ b/libs/libmdbx/src/test/valgrind_suppress.txt @@ -12,8 +12,12 @@ msync(start) fun:msync ... - fun:mdbx_env_sync_ex + fun:mdbx_env_sync_internal } + +# modern Valgrind don't support the `vector[...]` pattern +# for((i=0;i<64;++i)); do echo -e "{\n pwrite-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n fun:pwritev\n ...\n fun:mdbx_page_flush\n}"; done >> valgrind_suppress.txt + { pwrite-page-flush Memcheck:Param @@ -22,3 +26,515 @@ ... fun:mdbx_page_flush } +{ + pwrite-page-flush-0 + Memcheck:Param + pwritev(vector[0]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-1 + Memcheck:Param + pwritev(vector[1]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-2 + Memcheck:Param + pwritev(vector[2]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-3 + Memcheck:Param + pwritev(vector[3]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-4 + Memcheck:Param + pwritev(vector[4]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-5 + Memcheck:Param + pwritev(vector[5]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-6 + Memcheck:Param + pwritev(vector[6]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-7 + Memcheck:Param + pwritev(vector[7]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-8 + Memcheck:Param + pwritev(vector[8]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-9 + Memcheck:Param + pwritev(vector[9]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-10 + Memcheck:Param + pwritev(vector[10]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-11 + Memcheck:Param + pwritev(vector[11]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-12 + Memcheck:Param + pwritev(vector[12]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-13 + Memcheck:Param + pwritev(vector[13]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-14 + Memcheck:Param + pwritev(vector[14]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-15 + Memcheck:Param + pwritev(vector[15]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-16 + Memcheck:Param + pwritev(vector[16]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-17 + Memcheck:Param + pwritev(vector[17]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-18 + Memcheck:Param + pwritev(vector[18]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-19 + Memcheck:Param + pwritev(vector[19]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-20 + Memcheck:Param + pwritev(vector[20]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-21 + Memcheck:Param + pwritev(vector[21]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-22 + Memcheck:Param + pwritev(vector[22]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-23 + Memcheck:Param + pwritev(vector[23]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-24 + Memcheck:Param + pwritev(vector[24]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-25 + Memcheck:Param + pwritev(vector[25]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-26 + Memcheck:Param + pwritev(vector[26]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-27 + Memcheck:Param + pwritev(vector[27]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-28 + Memcheck:Param + pwritev(vector[28]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-29 + Memcheck:Param + pwritev(vector[29]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-30 + Memcheck:Param + pwritev(vector[30]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-31 + Memcheck:Param + pwritev(vector[31]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-32 + Memcheck:Param + pwritev(vector[32]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-33 + Memcheck:Param + pwritev(vector[33]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-34 + Memcheck:Param + pwritev(vector[34]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-35 + Memcheck:Param + pwritev(vector[35]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-36 + Memcheck:Param + pwritev(vector[36]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-37 + Memcheck:Param + pwritev(vector[37]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-38 + Memcheck:Param + pwritev(vector[38]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-39 + Memcheck:Param + pwritev(vector[39]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-40 + Memcheck:Param + pwritev(vector[40]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-41 + Memcheck:Param + pwritev(vector[41]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-42 + Memcheck:Param + pwritev(vector[42]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-43 + Memcheck:Param + pwritev(vector[43]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-44 + Memcheck:Param + pwritev(vector[44]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-45 + Memcheck:Param + pwritev(vector[45]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-46 + Memcheck:Param + pwritev(vector[46]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-47 + Memcheck:Param + pwritev(vector[47]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-48 + Memcheck:Param + pwritev(vector[48]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-49 + Memcheck:Param + pwritev(vector[49]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-50 + Memcheck:Param + pwritev(vector[50]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-51 + Memcheck:Param + pwritev(vector[51]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-52 + Memcheck:Param + pwritev(vector[52]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-53 + Memcheck:Param + pwritev(vector[53]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-54 + Memcheck:Param + pwritev(vector[54]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-55 + Memcheck:Param + pwritev(vector[55]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-56 + Memcheck:Param + pwritev(vector[56]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-57 + Memcheck:Param + pwritev(vector[57]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-58 + Memcheck:Param + pwritev(vector[58]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-59 + Memcheck:Param + pwritev(vector[59]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-60 + Memcheck:Param + pwritev(vector[60]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-61 + Memcheck:Param + pwritev(vector[61]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-62 + Memcheck:Param + pwritev(vector[62]) + fun:pwritev + ... + fun:mdbx_page_flush +} +{ + pwrite-page-flush-63 + Memcheck:Param + pwritev(vector[63]) + fun:pwritev + ... + fun:mdbx_page_flush +} diff --git a/libs/libmdbx/src/version.c b/libs/libmdbx/src/version.c index d6b952761e..53bb45848d 100644 --- a/libs/libmdbx/src/version.c +++ b/libs/libmdbx/src/version.c @@ -4,7 +4,7 @@ #include "src/internals.h" #if MDBX_VERSION_MAJOR != 0 || \ - MDBX_VERSION_MINOR != 7 + MDBX_VERSION_MINOR != 8 #error "API version mismatch! Had `git fetch --tags` done?" #endif @@ -24,11 +24,11 @@ __dll_export #endif const mdbx_version_info mdbx_version = { 0, - 7, - 0, - 1981, - {"2020-04-25T11:12:23+03:00", "5c78012e38f306d9601e1f43109c8aecbacb2e14", "ca8fa31c3fe1b4d92278d7a54364f6fb73dbae04", - "v0.7.0-39-gca8fa31"}, + 8, + 1, + 1, + {"2020-06-14T13:32:46+03:00", "968e4f98dd38e2a4e5cdf12c8203aa7b7abb149e", "0c5496d4d50ccedf8a182c01a76a8fbc7430d469", + "v0.8.1-1-g0c5496d"}, sourcery}; __dll_export |