summaryrefslogtreecommitdiff
path: root/libs/libmdbx
diff options
context:
space:
mode:
authorGeorge Hazan <ghazan@miranda.im>2020-01-07 22:56:32 +0300
committerGeorge Hazan <ghazan@miranda.im>2020-01-07 22:56:39 +0300
commit7524acda0a8778572262429f27b359d7ee91fc90 (patch)
tree6443228a439b83c5bc6d15923a9ccd91fa4718e3 /libs/libmdbx
parent62c842df372abe0349f025ac4cbbc8426526ac8b (diff)
libmdbx: update to 0.5.0
Diffstat (limited to 'libs/libmdbx')
-rw-r--r--libs/libmdbx/src/CMakeLists.txt1
-rw-r--r--libs/libmdbx/src/README.md411
-rw-r--r--libs/libmdbx/src/appveyor.yml2
-rw-r--r--libs/libmdbx/src/mdbx.h185
-rw-r--r--libs/libmdbx/src/src/alloy.c5
-rw-r--r--libs/libmdbx/src/src/elements/config.h.in1
-rw-r--r--libs/libmdbx/src/src/elements/core.c1062
-rw-r--r--libs/libmdbx/src/src/elements/internals.h16
-rw-r--r--libs/libmdbx/src/src/elements/lck-posix.c170
-rw-r--r--libs/libmdbx/src/src/elements/lck-windows.c63
-rw-r--r--libs/libmdbx/src/src/elements/osal.c234
-rw-r--r--libs/libmdbx/src/src/elements/osal.h18
-rw-r--r--libs/libmdbx/src/src/elements/version.c36
-rw-r--r--libs/libmdbx/src/src/tools/mdbx_chk.c69
-rw-r--r--libs/libmdbx/src/src/tools/mdbx_stat.c36
-rw-r--r--libs/libmdbx/src/test/CMakeLists.txt36
-rw-r--r--libs/libmdbx/src/test/append.cc5
-rw-r--r--libs/libmdbx/src/test/base.h9
-rw-r--r--libs/libmdbx/src/test/cases.cc13
-rw-r--r--libs/libmdbx/src/test/chrono.cc7
-rw-r--r--libs/libmdbx/src/test/chrono.h3
-rw-r--r--libs/libmdbx/src/test/config.cc159
-rw-r--r--libs/libmdbx/src/test/config.h14
-rw-r--r--libs/libmdbx/src/test/copy.cc2
-rw-r--r--libs/libmdbx/src/test/dead.cc2
-rw-r--r--libs/libmdbx/src/test/hill.cc124
-rw-r--r--libs/libmdbx/src/test/jitter.cc42
-rw-r--r--libs/libmdbx/src/test/keygen.cc105
-rw-r--r--libs/libmdbx/src/test/keygen.h21
-rw-r--r--libs/libmdbx/src/test/log.cc177
-rw-r--r--libs/libmdbx/src/test/log.h45
-rw-r--r--libs/libmdbx/src/test/long_stochastic.sh98
-rw-r--r--libs/libmdbx/src/test/main.cc193
-rw-r--r--libs/libmdbx/src/test/osal-unix.cc434
-rw-r--r--libs/libmdbx/src/test/osal-windows.cc92
-rw-r--r--libs/libmdbx/src/test/osal.h4
-rw-r--r--libs/libmdbx/src/test/pcrf/CMakeLists.txt4
-rw-r--r--libs/libmdbx/src/test/pcrf/pcrf_test.c83
-rw-r--r--libs/libmdbx/src/test/test.cc236
-rw-r--r--libs/libmdbx/src/test/test.h149
-rw-r--r--libs/libmdbx/src/test/test.vcxproj209
-rw-r--r--libs/libmdbx/src/test/try.cc2
-rw-r--r--libs/libmdbx/src/test/ttl.cc41
-rw-r--r--libs/libmdbx/src/test/utils.cc5
-rw-r--r--libs/libmdbx/src/test/utils.h13
45 files changed, 2841 insertions, 1795 deletions
diff --git a/libs/libmdbx/src/CMakeLists.txt b/libs/libmdbx/src/CMakeLists.txt
index 46f827ebc2..3fb33d3f31 100644
--- a/libs/libmdbx/src/CMakeLists.txt
+++ b/libs/libmdbx/src/CMakeLists.txt
@@ -326,6 +326,7 @@ mark_as_advanced(MDBX_LOCKING)
add_mdbx_option(MDBX_TRUST_RTC "Does a system have battery-backed Real-Time Clock or just a fake." AUTO)
mark_as_advanced(MDBX_TRUST_RTC)
option(MDBX_ENABLE_TESTS "Build MDBX tests." ${BUILD_TESTING})
+option(MDBX_FORCE_ASSERTIONS "Force enable assertion checking." OFF)
################################################################################
################################################################################
diff --git a/libs/libmdbx/src/README.md b/libs/libmdbx/src/README.md
index 5c32049f79..e578f893ed 100644
--- a/libs/libmdbx/src/README.md
+++ b/libs/libmdbx/src/README.md
@@ -1,45 +1,71 @@
-### The [repository now only mirrored on the Github](https://abf.io/erthink/libmdbx) due to illegal discriminatory restrictions for Russian Crimea and for sovereign crimeans.
-<!-- Required extensions: pymdownx.betterem, pymdownx.tilde, pymdownx.emoji, pymdownx.tasklist, pymdownx.superfences -->
------
-
libmdbx
-======================================
+=======
_libmdbx_ is an extremely fast, compact, powerful, embedded
-transactional [key-value
-store](https://en.wikipedia.org/wiki/Key-value_database)
-database, with permissive [OpenLDAP Public License](LICENSE).
-_libmdbx_ has a specific set of properties and capabilities,
-focused on creating unique lightweight solutions with
-extraordinary performance.
-
-The next version is under active non-public development and will be
+transactional [key-value store](https://en.wikipedia.org/wiki/Key-value_database)
+database, with [permissive license](LICENSE).
+_MDBX_ has a specific set of properties and capabilities,
+focused on creating unique lightweight solutions with extraordinary performance.
+
+1. Allows **swarm of multi-threaded processes to [ACID]((https://en.wikipedia.org/wiki/ACID))ly read and update** several key-value [maps](https://en.wikipedia.org/wiki/Associative_array) and [multimaps](https://en.wikipedia.org/wiki/Multimap) in a localy-shared database.
+
+2. Provides **extraordinary performance**, minimal overhead through [Memory-Mapping](https://en.wikipedia.org/wiki/Memory-mapped_file) and `Olog(N)` operations costs by virtue of [B+ tree](https://en.wikipedia.org/wiki/B%2B_tree).
+
+3. Requires **no maintenance and no crash recovery** since doesn't use [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging), but that might be a caveat for some workloads.
+
+4. **Compact and friendly for fully embeddeding**. Only 25KLOC of `C11`, 64K x86 binary code,
+no internal threads neither processes, but implements a simplified variant of the
+[Berkeley DB](https://en.wikipedia.org/wiki/Berkeley_DB) and
+[dbm](https://en.wikipedia.org/wiki/DBM_(computing)) API.
+
+5. Enforces [serializability](https://en.wikipedia.org/wiki/Serializability) for
+writers just by single
+[mutex](https://en.wikipedia.org/wiki/Mutual_exclusion) and affords
+[wait-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom)
+for parallel readers without atomic/interlocked operations, while
+**writing and reading transactions do not block each other**.
+
+6. **Guarantee data integrity** after crash unless this was explicitly
+neglected in favour of write performance.
+
+7. Supports Linux, Windows, MacOS, FreeBSD, DragonFly, Solaris,
+OpenSolaris, OpenIndiana, NetBSD, OpenBSD and other systems compliant with
+**POSIX.1-2008**.
+
+Historically, _MDBX_ is deeply revised and extended descendant of amazing
+[Lightning Memory-Mapped Database](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
+_MDBX_ inherits all benefits from _LMDB_, but resolves some issues and adds set of improvements.
+
+The next version is under active non-public development from scratch and will be
released as **_MithrilDB_** and `libmithrildb` for libraries & packages.
Admittedly mythical [Mithril](https://en.wikipedia.org/wiki/Mithril) is
resembling silver but being stronger and lighter than steel. Therefore
_MithrilDB_ is rightly relevant name.
-> _MithrilDB_ will be radically different from _libmdbx_ by the new
-> database format and API based on C++17, as well as the [Apache 2.0
-> License](https://www.apache.org/licenses/LICENSE-2.0). The goal of this
-> revolution is to provide a clearer and robust API, add more features and
-> new valuable properties of database.
-
-*The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.*
+ > _MithrilDB_ will be radically different from _libmdbx_ by the new
+ > database format and API based on C++17, as well as the [Apache 2.0
+ > License](https://www.apache.org/licenses/LICENSE-2.0). The goal of this
+ > revolution is to provide a clearer and robust API, add more features and
+ > new valuable properties of database.
[![Build Status](https://travis-ci.org/leo-yuriev/libmdbx.svg?branch=master)](https://travis-ci.org/leo-yuriev/libmdbx)
[![Build status](https://ci.appveyor.com/api/projects/status/ue94mlopn50dqiqg/branch/master?svg=true)](https://ci.appveyor.com/project/leo-yuriev/libmdbx/branch/master)
[![Coverity Scan Status](https://scan.coverity.com/projects/12915/badge.svg)](https://scan.coverity.com/projects/reopen-libmdbx)
+*The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет хорошо.*
+
+-----
+
## Table of Contents
- [Overview](#overview)
+ - [Features](#features)
+ - [Limitations](#limitations)
+ - [Caveats & Gotchas](#caveats--gotchas)
- [Comparison with other databases](#comparison-with-other-databases)
+ - [Improvements beyond LMDB](#improvements-beyond-lmdb)
- [History & Acknowledgments](#history)
-- [Description](#description)
- - [Key features](#key-features)
- - [Improvements over LMDB](#improvements-over-lmdb)
- - [Gotchas](#gotchas)
- [Usage](#usage)
- [Building](#building)
+ - [API description](#api-description)
- [Bindings](#bindings)
- [Performance comparison](#performance-comparison)
- [Integral performance](#integral-performance)
@@ -49,202 +75,179 @@ _MithrilDB_ is rightly relevant name.
- [Async-write mode](#async-write-mode)
- [Cost comparison](#cost-comparison)
------
+# Overview
-## Overview
+## Features
-_libmdbx_ is revised and extended descendant of amazing [Lightning
-Memory-Mapped
-Database](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
-_libmdbx_ inherits all features and characteristics from
-[LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database),
-but resolves some issues and adds several features.
+- Key-value data model, keys are always sorted.
- - _libmdbx_ guarantee data integrity after crash unless this was explicitly
-neglected in favour of write performance.
+- Fully [ACID](https://en.wikipedia.org/wiki/ACID)-compliant, through to
+[MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
+and [CoW](https://en.wikipedia.org/wiki/Copy-on-write).
- - _libmdbx_ allows multiple processes to read and update several key-value
-tables concurrently, while being
-[ACID](https://en.wikipedia.org/wiki/ACID)-compliant, with minimal
-overhead and Olog(N) operation cost.
+- Multiple key-value sub-databases within a single datafile.
- - _libmdbx_ enforce
-[serializability](https://en.wikipedia.org/wiki/Serializability) for
-writers by single
-[mutex](https://en.wikipedia.org/wiki/Mutual_exclusion) and affords
-[wait-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm#Wait-freedom)
-for parallel readers without atomic/interlocked operations, while
-writing and reading transactions do not block each other.
+- Range lookups, including range query estimation.
- - _libmdbx_ uses [B+Trees](https://en.wikipedia.org/wiki/B%2B_tree) and
-[Memory-Mapping](https://en.wikipedia.org/wiki/Memory-mapped_file),
-doesn't use [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging)
-which might be a caveat for some workloads.
+- Efficient support for short fixed length keys, including native 32/64-bit integers.
- - _libmdbx_ implements a simplified variant of the [Berkeley
-DB](https://en.wikipedia.org/wiki/Berkeley_DB) and/or
-[dbm](https://en.wikipedia.org/wiki/DBM_(computing)) API.
+- Ultra-efficient support for [multimaps](https://en.wikipedia.org/wiki/Multimap). Multi-values sorted, searchable and iterable. Keys stored without duplication.
- - _libmdbx_ supports Linux, Windows, MacOS, FreeBSD, DragonFly, Solaris,
-OpenSolaris, OpenIndiana, NetBSD, OpenBSD and other systems compliant with
-POSIX.1-2008.
+- Data is [memory-mapped](https://en.wikipedia.org/wiki/Memory-mapped_file) and accessible directly/zero-copy. Traversal of database records is extremely-fast.
-### Comparison with other databases
-For now please refer to [chapter of "BoltDB comparison with other
-databases"](https://github.com/coreos/bbolt#comparison-with-other-databases)
-which is also (mostly) applicable to _libmdbx_.
+- Transactions for readers and writers, ones do not block others.
-### History
-At first the development was carried out within the
-[ReOpenLDAP](https://github.com/leo-yuriev/ReOpenLDAP) project. About a
-year later _libmdbx_ was separated into standalone project, which was
-[presented at Highload++ 2015
-conference](http://www.highload.ru/2015/abstracts/1831.html).
+- Writes are strongly serialized. No transactions conflicts nor deadlocks.
-Since 2017 _libmdbx_ is used in [Fast Positive Tables](https://github.com/leo-yuriev/libfpta),
-and development is funded by [Positive Technologies](https://www.ptsecurity.com).
+- Readers are [non-blocking](https://en.wikipedia.org/wiki/Non-blocking_algorithm), notwithstanding [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation).
-### Acknowledgments
-Howard Chu <hyc@openldap.org> is the author of LMDB, from which
-originated the MDBX in 2015.
+- Nested write transactions.
-Martin Hedenfalk <martin@bzero.se> is the author of `btree.c` code, which
-was used for begin development of LMDB.
+- Reads scales linearly across CPUs.
------
+- Continuous zero-overhead database compactification.
-Description
-===========
+- Automatic on-the-fly database size adjustment.
-## Key features
+- Customizable database page size.
-1. Key-value pairs are stored in ordered map(s), keys are always sorted,
-range lookups are supported.
+- `Olog(N)` cost of lookup, insert, update, and delete operations by virtue of [B+ tree characteristics](https://en.wikipedia.org/wiki/B%2B_tree#Characteristics).
-2. Data is [memory-mapped](https://en.wikipedia.org/wiki/Memory-mapped_file)
-into each worker DB process, and could be accessed zero-copy from transactions.
+- Online hot backup.
-3. Transactions are
-[ACID](https://en.wikipedia.org/wiki/ACID)-compliant, through to
-[MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
-and [CoW](https://en.wikipedia.org/wiki/Copy-on-write). Writes are
-strongly serialized and aren't blocked by reads, transactions can't
-conflict with each other. Reads are guaranteed to get only commited data
-([relaxing serializability](https://en.wikipedia.org/wiki/Serializability#Relaxing_serializability)).
+- Append operation for efficient bulk insertion of pre-sorted data.
+
+- No [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) nor any
+transaction journal. No crash recovery needed. No maintenance is required.
-4. Read transactions are
-[non-blocking](https://en.wikipedia.org/wiki/Non-blocking_algorithm),
-don't use [atomic operations](https://en.wikipedia.org/wiki/Linearizability#High-level_atomic_operations).
-Readers don't block each other and aren't blocked by writers. Read
-performance scales linearly with CPU core count.
- > Nonetheless, "connect to DB" (starting the first read transaction in a thread) and
- > "disconnect from DB" (closing DB or thread termination) requires a lock
- > acquisition to register/unregister at the "readers table".
+- No internal cache and/or memory management, all done by basic OS services.
-5. Keys with multiple values are stored efficiently without key
-duplication, sorted by value, including integers (valuable for
-secondary indexes).
+## Limitations
-6. Efficient operation on short fixed length keys,
-including 32/64-bit integer types.
+- **Page size**: a power of 2, maximum `65536` bytes, default `4096` bytes.
+- **Key size**: minimum 0, maximum ≈¼ pagesize (`1300` bytes for default 4K pagesize, `21780` bytes for 64K pagesize).
+- **Value size**: minimum 0, maximum `2146435072` (`0x7FF00000`) bytes for maps, ≈¼ pagesize for multimaps (`1348` bytes default 4K pagesize, `21828` bytes for 64K pagesize).
+- **Write transaction size**: up to `4194301` (`0x3FFFFD`) pages (16 [GiB](https://en.wikipedia.org/wiki/Gibibyte) for pagesize, 256 [GiB](https://en.wikipedia.org/wiki/Gibibyte) for 64K pagesize).
+- **Database size**: up to `2147483648` pages (8 [TiB](https://en.wikipedia.org/wiki/Tebibyte) for default 4K pagesize, 128 [TiB](https://en.wikipedia.org/wiki/Tebibyte) for 64K pagesize).
+- **Maximum sub-databases**: `32765`.
-7. [WAF](https://en.wikipedia.org/wiki/Write_amplification) (Write
-Amplification Factor) и RAF (Read Amplification Factor) are Olog(N).
+## Caveats & Gotchas
-8. No [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) and
-transaction journal. In case of a crash no recovery needed. No need for
-regular maintenance. Backups can be made on the fly on working DB
-without freezing writers.
+1. There cannot be more than one writer at a time, i.e. no more than one write transaction at a time.
-9. No additional memory management, all done by basic OS services.
+2. MDBX is based on [B+ tree](https://en.wikipedia.org/wiki/B%2B_tree), so access to database pages is mostly random.
+Thus SSDs provide a significant performance boost over spinning disks for large databases.
+3. MDBX uses [shadow paging](https://en.wikipedia.org/wiki/Shadow_paging) instead of [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging). Thus syncing data to disk might be bottleneck for write intensive workload.
-## Improvements over LMDB
+4. MDBX uses [copy-on-write](https://en.wikipedia.org/wiki/Copy-on-write) for [snapshot isolation](https://en.wikipedia.org/wiki/Snapshot_isolation) during updates, but read transactions prevents recycling an old retired/freed pages, since it read ones. Thus altering of data during a parallel
+long-lived read operation will increase the process work set, may exhaust entire free database space,
+the database can grow quickly, and result in performance degradation.
+Try to avoid long running read transactions.
-_libmdbx_ is superior to _legendary [LMDB](https://symas.com/lmdb/)_ in
+5. MDBX is extraordinarily fast and provides minimal overhead for data access,
+so you should reconsider about use brute force techniques and double check your code.
+On the one hand, in the case of MDBX, a simple linear search may be more profitable than complex indexes.
+On the other hand, if you make something suboptimally, you can notice a detrimentally only on sufficiently large data.
+
+### Comparison with other databases
+For now please refer to [chapter of "BoltDB comparison with other
+databases"](https://github.com/coreos/bbolt#comparison-with-other-databases)
+which is also (mostly) applicable to _libmdbx_.
+
+Improvements beyond LMDB
+========================
+
+_libmdbx_ is superior to legendary _[LMDB](https://symas.com/lmdb/)_ in
terms of features and reliability, not inferior in performance. In
-comparison to LMDB, _libmdbx_ make things "just work" perfectly and
+comparison to _LMDB_, _libmdbx_ make things "just work" perfectly and
out-of-the-box, not silently and catastrophically break down. The list
below is pruned down to the improvements most notable and obvious from
the user's point of view.
-1. Automatic on-the-fly database size control by preset parameters, both
-reduction and increment.
+### Added Features:
+
+1. Keys could be more than 2 times longer than _LMDB_.
+ > For DB with default page size _libmdbx_ support keys up to 1300 bytes
+ > and up to 21780 bytes for 64K page size. _LMDB_ allows key size up to
+ > 511 bytes and may silently loses data with large values.
+
+2. Up to 20% faster than _LMDB_ in [CRUD](https://en.wikipedia.org/wiki/Create,_read,_update_and_delete) benchmarks.
+ > Benchmarks of the in-[tmpfs](https://en.wikipedia.org/wiki/Tmpfs) scenarios,
+ > that tests the speed of engine itself, shown that _libmdbx_ 10-20% faster than _LMDB_.
+ > These and other results could be easily reproduced with [ioArena](https://github.com/pmwkaa/ioarena) just by `make bench-quartet`,
+ > including comparisons with [RockDB](https://en.wikipedia.org/wiki/RocksDB)
+ > and [WiredTiger](https://en.wikipedia.org/wiki/WiredTiger).
+
+3. Automatic on-the-fly database size adjustment, both increment and reduction.
> _libmdbx_ manage the database size according to parameters specified
> by `mdbx_env_set_geometry()` function,
> ones include the growth step and the truncation threshold.
-2. Automatic continuous zero-overhead database compactification.
- > _libmdbx_ logically move as possible a freed pages
- > at end of allocation area into unallocated space,
- > and then release such space if a lot of.
+4. Automatic continuous zero-overhead database compactification.
+ > During each commit _libmdbx_ merges suitable freeing pages into unallocated area
+ > at the end of file, and then truncate unused space when a lot enough of.
+
+5. The same database format for 32- and 64-bit builds.
+ > _libmdbx_ database format depends only on the [endianness](https://en.wikipedia.org/wiki/Endianness) but not on the [bitness](https://en.wiktionary.org/wiki/bitness).
-3. LIFO policy for recycling a Garbage Collection items. On systems with a disk
-write-back cache, this can significantly increase write performance, up to
-several times in a best case scenario.
- > LIFO means that for reuse pages will be taken which became unused the lastest.
+6. LIFO policy for Garbage Collection recycling. This can significantly increase write performance due write-back disk cache up to several times in a best case scenario.
+ > LIFO means that for reuse will be taken latest became unused pages.
> Therefore the loop of database pages circulation becomes as short as possible.
- > In other words, the number of pages, that are overwritten in memory
- > and on disk during a series of write transactions, will be as small as possible.
- > Thus creates ideal conditions for the efficient operation of the disk write-back cache.
+ > In other words, the set of pages, that are (over)written in memory and on disk during a series of write transactions, will be as small as possible.
+ > Thus creates ideal conditions for the battery-backed or flash-backed disk cache efficiency.
-4. Fast estimation of range query result volume, i.e. how many items can
+7. Fast estimation of range query result volume, i.e. how many items can
be found between a `KEY1` and a `KEY2`. This is prerequisite for build
and/or optimize query execution plans.
- > _libmdbx_ performs a rough estimate based only on b-tree pages that
- > are common for the both stacks of cursors that were set to corresponing
- > keys.
+ > _libmdbx_ performs a rough estimate based on common B-tree pages of the paths from root to corresponding keys.
-5. `mdbx_chk` tool for database integrity check.
+8. `mdbx_chk` tool for database integrity check.
-6. Guarantee of database integrity even in asynchronous unordered write-to-disk mode.
- > _libmdbx_ propose additional trade-off by implementing append-like manner for updates
- > in `NOSYNC` and `MAPASYNC` modes, that avoid database corruption after a system crash
- > contrary to LMDB. Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode available to match LMDB behaviour,
- > and for a special use-cases.
+9. Automated steady sync-to-disk upon several thresholds and/or timeout via cheap polling.
+
+10. Sequence generation and three persistent 64-bit markers.
-7. Automated steady flush to disk upon volume of changes and/or by
-timeout via cheap polling.
+11. Callback for lack-of-space condition of database that allows you to control and/or resolve such situations.
-8. Sequence generation and three cheap persistent 64-bit markers with ACID.
+12. Support for opening database in the exclusive mode, including on a network share.
-9. Support for keys and values of zero length, including multi-values
-(aka sorted duplicates).
+### Added Abilities:
-10. The handler of lack-of-space condition with a callback,
-that allow you to control and resolve such situations.
+1. Zero-length for keys and values.
-11. Support for opening a database in the exclusive mode, including on a network share.
+2. Ability to determine whether the particular data is on a dirty page
+or not, that allows to avoid copy-out before updates.
-12. Extended transaction info, including dirty and leftover space info
-for a write transaction, reading lag and hold over space for read
-transactions.
+3. Ability to determine whether the cursor is pointed to a key-value
+pair, to the first, to the last, or not set to anything.
-13. Extended whole-database info (aka environment) and reader enumeration.
+4. Extended information of whole-database, sub-databases, transactions, readers enumeration.
+ > _libmdbx_ provides a lot of information, including dirty and leftover pages
+ > for a write transaction, reading lag and holdover space for read transactions.
-14. Extended update or delete, _at once_ with getting previous value
-and addressing the particular item from multi-value with the same key.
+5. Extended update and delete operations.
+ > _libmdbx_ allows ones _at once_ with getting previous value
+ > and addressing the particular item from multi-value with the same key.
-15. Support for explicitly updating the existing record, not insertion a new one.
+### Other fixes and specifics:
-16. All cursors are uniformly, can be reused and should be closed explicitly,
-regardless ones were opened within write or read transaction.
+1. Fixed more than 10 significant errors, in particular: page leaks, wrong sub-database statistics, segfault in several conditions, unoptimal page merge strategy, updating an existing record with a change in data size (including for multimap), etc.
-17. Correct update of current record with `MDBX_CURRENT` flag when size
-of key or data was changed, including sorted duplicated.
+2. All cursors can be reused and should be closed explicitly, regardless ones were opened within write or read transaction.
-18. Opening database handles is spared from race conditions and
+3. Opening database handles are spared from race conditions and
pre-opening is not needed.
-19. Ability to determine whether the particular data is on a dirty page
-or not, that allows to avoid copy-out before updates.
-
-20. Ability to determine whether the cursor is pointed to a key-value
-pair, to the first, to the last, or not set to anything.
+4. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete.
-21. Returning `MDBX_EMULTIVAL` error in case of ambiguous update or delete.
+5. Guarantee of database integrity even in asynchronous unordered write-to-disk mode.
+ > _libmdbx_ propose additional trade-off by implementing append-like manner for updates
+ > in `MDBX_SAFE_NOSYNC` and `MDBX_WRITEMAP|MDBX_MAPASYNC` modes, that avoid database corruption after a system crash
+ > contrary to LMDB. Nevertheless, the `MDBX_UTTERLY_NOSYNC` mode available to match LMDB behaviour,
+ > and for a special use-cases.
-22. On **MacOS** the `fcntl(F_FULLFSYNC)` syscall is used _by
+6. On **MacOS** the `fcntl(F_FULLFSYNC)` syscall is used _by
default_ to synchronize data with the disk, as this is [the only way to
guarantee data
durability](https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html)
@@ -254,59 +257,29 @@ compared to LMDB, where the `fsync()` syscall is used. Therefore,
_libmdbx_ allows you to override this behavior by defining the
`MDBX_OSX_SPEED_INSTEADOF_DURABILITY=1` option while build the library.
-23. On **Windows** the `LockFileEx()` syscall is used for locking, since
+7. On **Windows** the `LockFileEx()` syscall is used for locking, since
it allows place the database on network drives, and provides protection
against incompetent user actions (aka
[poka-yoke](https://en.wikipedia.org/wiki/Poka-yoke)). Therefore
_libmdbx_ may be a little lag in performance tests from LMDB where a
named mutexes are used.
+### History
+At first the development was carried out within the
+[ReOpenLDAP](https://github.com/leo-yuriev/ReOpenLDAP) project. About a
+year later _libmdbx_ was separated into standalone project, which was
+[presented at Highload++ 2015
+conference](http://www.highload.ru/2015/abstracts/1831.html).
+
+Since 2017 _libmdbx_ is used in [Fast Positive Tables](https://github.com/leo-yuriev/libfpta),
+and development is funded by [Positive Technologies](https://www.ptsecurity.com).
-## Gotchas
-
-1. There cannot be more than one writer at a time.
- > On the other hand, this allows serialize an updates and eliminate any
- > possibility of conflicts, deadlocks or logical errors.
-
-2. No [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) means
-relatively big [WAF](https://en.wikipedia.org/wiki/Write_amplification)
-(Write Amplification Factor). Because of this syncing data to disk might
-be quite resource intensive and be main performance bottleneck during
-intensive write workload.
- > As compromise _libmdbx_ allows several modes of lazy and/or periodic
- > syncing, including `MAPASYNC` mode, which modificate data in memory and
- > asynchronously syncs data to disk, moment to sync is picked by OS.
- >
- > Although this should be used with care, synchronous transactions in a DB
- > with transaction journal will require 2 IOPS minimum (probably 3-4 in
- > practice) because of filesystem overhead, overhead depends on
- > filesystem, not on record count or record size. In _libmdbx_ IOPS count
- > will grow logarithmically depending on record count in DB (height of B+
- > tree) and will require at least 2 IOPS per transaction too.
-
-3. [CoW](https://en.wikipedia.org/wiki/Copy-on-write) for
-[MVCC](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
-is done on memory page level with
-[B+trees](https://ru.wikipedia.org/wiki/B-%D0%B4%D0%B5%D1%80%D0%B5%D0%B2%D0%BE).
-Therefore altering data requires to copy about Olog(N) memory pages,
-which uses [memory bandwidth](https://en.wikipedia.org/wiki/Memory_bandwidth) and is main
-performance bottleneck in `MDBX_MAPASYNC` mode.
- > This is unavoidable, but isn't that bad. Syncing data to disk requires
- > much more similar operations which will be done by OS, therefore this is
- > noticeable only if data sync to persistent storage is fully disabled.
- > _libmdbx_ allows to safely save data to persistent storage with minimal
- > performance overhead. If there is no need to save data to persistent
- > storage then it's much more preferable to use `std::map`.
-
-4. Massive altering of data during a parallel long read operation will
-increase the process work set, may exhaust entire free database space and
-result in subsequent write performance degradation.
- > _libmdbx_ mostly solve this issue by lack-of-space callback and `MDBX_LIFORECLAIM` mode.
- > See [`mdbx.h`](mdbx.h) with API description for details.
- > The "next" version of libmdbx (MithrilDB) will completely solve this.
-
-5. There are no built-in checksums or digests to verify database integrity.
- > The "next" version of _libmdbx_ (MithrilDB) will solve this issue employing [Merkle Tree](https://en.wikipedia.org/wiki/Merkle_tree).
+### Acknowledgments
+Howard Chu <hyc@openldap.org> is the author of LMDB, from which
+originated the MDBX in 2015.
+
+Martin Hedenfalk <martin@bzero.se> is the author of `btree.c` code, which
+was used for begin development of LMDB.
--------------------------------------------------------------------------------
@@ -427,20 +400,23 @@ will need to install the current (not outdated) version of
recommend that you install [Homebrew](https://brew.sh/) and then execute
`brew install bash`.
+## API description
+For more information and API description see the [mdbx.h](mdbx.h) header.
+
## Bindings
| Runtime | GitHub | Author |
| -------- | ------ | ------ |
| Java | [mdbxjni](https://github.com/castortech/mdbxjni) | [Castor Technologies](https://castortech.com/) |
| .NET | [mdbx.NET](https://github.com/wangjia184/mdbx.NET) | [Jerry Wang](https://github.com/wangjia184) |
-
+ | Rust | [mdbx-rs](https://github.com/Kerollmops/mdbx-rs) | [Clément Renault](https://github.com/Kerollmops) |
--------------------------------------------------------------------------------
Performance comparison
======================
-All benchmarks were done by [IOArena](https://github.com/pmwkaa/ioarena)
+All benchmarks were done in 2015 by [IOArena](https://github.com/pmwkaa/ioarena)
and multiple [scripts](https://github.com/pmwkaa/ioarena/tree/HL%2B%2B2015)
runs on Lenovo Carbon-2 laptop, i7-4600U 2.1 GHz, 8 Gb RAM,
SSD SAMSUNG MZNTD512HAGL-000L1 (DXT23L0Q) 512 Gb.
@@ -557,7 +533,7 @@ and after full run the database contains 10,000 small key-value records.
Summary of used resources during lazy-write mode benchmarks:
- - Read and write IOPS;
+ - Read and write IOPs;
- Sum of user CPU time and sys CPU time;
@@ -566,7 +542,7 @@ Summary of used resources during lazy-write mode benchmarks:
compactification, etc).
_ForestDB_ is excluded because benchmark showed it's resource
-consumption for each resource (CPU, IOPS) much higher than other engines
+consumption for each resource (CPU, IOPs) much higher than other engines
which prevents to meaningfully compare it with them.
All benchmark data is gathered by
@@ -577,16 +553,5 @@ syscall and by scanning data directory.
--------------------------------------------------------------------------------
-```
-$ objdump -f -h -j .text libmdbx.so
-
-libmdbx.so: file format elf64-x86-64
-architecture: i386:x86-64, flags 0x00000150:
-HAS_SYMS, DYNAMIC, D_PAGED
-start address 0x0000000000003710
-
-Sections:
-Idx Name Size VMA LMA File off Algn
- 11 .text 00015eff 0000000000003710 0000000000003710 00003710 2**4
- CONTENTS, ALLOC, LOAD, READONLY, CODE
-```
+### The [repository now only mirrored on the Github](https://abf.io/erthink/libmdbx) due to illegal discriminatory restrictions for Russian Crimea and for sovereign crimeans.
+<!-- Required extensions: pymdownx.betterem, pymdownx.tilde, pymdownx.emoji, pymdownx.tasklist, pymdownx.superfences -->
diff --git a/libs/libmdbx/src/appveyor.yml b/libs/libmdbx/src/appveyor.yml
index f42224d582..0cf8f8633f 100644
--- a/libs/libmdbx/src/appveyor.yml
+++ b/libs/libmdbx/src/appveyor.yml
@@ -1,4 +1,4 @@
-version: 0.4.0.{build}
+version: 0.5.0.{build}
environment:
matrix:
diff --git a/libs/libmdbx/src/mdbx.h b/libs/libmdbx/src/mdbx.h
index dcbe608b29..a522501a65 100644
--- a/libs/libmdbx/src/mdbx.h
+++ b/libs/libmdbx/src/mdbx.h
@@ -1,4 +1,4 @@
-/**** BRIEFLY ******************************************************************
+/**** BRIEFLY ******************************************************************
*
* libmdbx is superior to LMDB (https://bit.ly/26ts7tL) in terms of features
* and reliability, not inferior in performance. In comparison to LMDB, libmdbx
@@ -55,10 +55,10 @@
* transaction logs or append-only data writes, MDBX requires no maintenance
* during operation. Both write-ahead loggers and append-only databases require
* periodic checkpointing and/or compaction of their log or database files
- * otherwise they grow without bound. MDBX tracks free pages within the database
- * and re-uses them for new write operations, so the database size does not grow
- * without bound in normal use. It is worth noting that the "next" version
- * libmdbx (MithrilDB) will solve this problem.
+ * otherwise they grow without bound. MDBX tracks retired/freed pages within the
+ * database and re-uses them for new write operations, so the database size does
+ * not grow without bound in normal use. It is worth noting that the "next"
+ * version libmdbx (MithrilDB) will solve this problem.
*
* The memory map can be used as a read-only or read-write map. It is read-only
* by default as this provides total immunity to corruption. Using read-write
@@ -403,17 +403,16 @@
* the lock was restored - we have to wait until such a process releases the
* database, and so on.
*
- * - Avoid long-lived transactions, especially in the scenarios with a high
- * rate of write transactions. Read transactions prevent reuse of pages
- * freed by newer write transactions, thus the database can grow quickly.
- * Write transactions prevent other write transactions, since writes are
- * serialized.
+ * - Avoid long-lived read transactions, especially in the scenarios with a
+ * high rate of write transactions. Long-lived read transactions prevents
+ * recycling pages retired/freed by newer write transactions, thus the
+ * database can grow quickly.
*
* Understanding the problem of long-lived read transactions requires some
* explanation, but can be difficult for quick perception. So is is
* reasonable to simplify this as follows:
* 1. Garbage collection problem exists in all databases one way or
- * another, e.g. VACUUM in PostgreSQL. But in _libmdbx_ it's even more
+ * another, e.g. VACUUM in PostgreSQL. But in MDBX it's even more
* discernible because of high transaction rate and intentional
* internals simplification in favor of performance.
*
@@ -644,7 +643,7 @@ typedef pthread_t mdbx_tid_t;
/*----------------------------------------------------------------------------*/
#define MDBX_VERSION_MAJOR 0
-#define MDBX_VERSION_MINOR 4
+#define MDBX_VERSION_MINOR 5
#ifndef LIBMDBX_API
#if defined(LIBMDBX_EXPORTS)
@@ -826,7 +825,9 @@ typedef struct iovec MDBX_val;
#define MDBX_DBG_ASSERT 1 /* Enable assertion checks */
#define MDBX_DBG_AUDIT 2 /* Enable pages usage audit at commit transactions */
#define MDBX_DBG_JITTER 4 /* Enable small random delays in critical points */
-#define MDBX_DBG_DUMP 8 /* Include or not database(s) in coredump files */
+#define MDBX_DBG_DUMP /* Include or not meta-pages in coredump files, MAY \
+ affect performance in MDBX_WRITEMAP mode */ \
+ 8
#define MDBX_DBG_LEGACY_MULTIOPEN 16 /* Enable multi-opening environment(s) */
/* A debug-logger callback function,
@@ -1076,11 +1077,11 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
* conditions for the efficient operation of the disk write-back cache.
*
* MDBX_LIFORECLAIM is compatible with all no-sync flags (i.e. MDBX_NOMETASYNC,
- * MDBX_NOSYNC, MDBX_UTTERLY_NOSYNC, MDBX_MAPASYNC), but gives no noticeable
- * impact in combination with MDB_NOSYNC and MDX_MAPASYNC. Because MDBX will
- * not reused paged from the last "steady" MVCC-snapshot and later, i.e. the
- * loop length of database pages circulation will be mostly defined by frequency
- * of calling mdbx_env_sync() rather than LIFO and FIFO difference.
+ * MDBX_SAFE_NOSYNC, MDBX_UTTERLY_NOSYNC, MDBX_MAPASYNC), but gives no
+ * noticeable impact in combination with MDBX_SAFE_NOSYNC. Because MDBX will
+ * reused pages only before the last "steady" MVCC-snapshot, i.e. the loop
+ * length of database pages circulation will be mostly defined by frequency of
+ * calling mdbx_env_sync() rather than LIFO and FIFO difference.
*
* This flag may be changed at any time using mdbx_env_set_flags(). */
#define MDBX_LIFORECLAIM 0x4000000u
@@ -1089,12 +1090,12 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
#define MDBX_PAGEPERTURB 0x8000000u
/**** SYNC MODES ***************************************************************
- * (!!!) Using any combination of MDBX_NOSYNC, MDBX_NOMETASYNC, MDBX_MAPASYNC
- * and especially MDBX_UTTERLY_NOSYNC is always a deal to reduce durability
- * for gain write performance. You must know exactly what you are doing and
- * what risks you are taking!
+ * (!!!) Using any combination of MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC,
+ * MDBX_MAPASYNC and especially MDBX_UTTERLY_NOSYNC is always a deal to reduce
+ * durability for gain write performance. You must know exactly what you are
+ * doing and what risks you are taking!
*
- * NOTE for LMDB users: MDBX_NOSYNC is NOT similar to LMDB_NOSYNC, but
+ * NOTE for LMDB users: MDBX_SAFE_NOSYNC is NOT similar to LMDB_NOSYNC, but
* MDBX_UTTERLY_NOSYNC is exactly match LMDB_NOSYNC.
* See details below.
*
@@ -1184,24 +1185,25 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
* for particular write transaction.
*
*
- * MDBX_NOSYNC = don't sync anything but keep previous steady commits.
+ * MDBX_SAFE_NOSYNC = don't sync anything but keep previous steady commits.
*
- * Like MDBX_UTTERLY_NOSYNC the MDBX_NOSYNC flag similarly disable flush
- * system buffers to disk when committing a transaction. But there is a
- * huge difference in how are recycled the MVCC snapshots corresponding
- * to previous "steady" transactions (see below).
+ * Like MDBX_UTTERLY_NOSYNC the MDBX_SAFE_NOSYNC flag similarly disable
+ * flush system buffers to disk when committing a transaction. But there
+ * is a huge difference in how are recycled the MVCC snapshots
+ * corresponding to previous "steady" transactions (see below).
*
- * Depending on the platform and hardware, with MDBX_NOSYNC you may get
- * a multiple increase of write performance, even 10 times or more.
- * NOTE that (MDBX_NOSYNC | MDBX_WRITEMAP) leaves the system with no hint
- * for when to write transactions to disk. Therefore the (MDBX_MAPASYNC |
- * MDBX_WRITEMAP) may be preferable, but without MDBX_NOSYNC because
- * the (MDBX_MAPASYNC | MDBX_NOSYNC) actually gives MDBX_UTTERLY_NOSYNC.
+ * Depending on the platform and hardware, with MDBX_SAFE_NOSYNC you may
+ * get a multiple increase of write performance, even 10 times or more.
+ * NOTE that (MDBX_SAFE_NOSYNC | MDBX_WRITEMAP) leaves the system with no
+ * hint for when to write transactions to disk. Therefore the
+ * (MDBX_MAPASYNC | MDBX_WRITEMAP) may be preferable, but without
+ * MDBX_SAFE_NOSYNC because the (MDBX_MAPASYNC | MDBX_SAFE_NOSYNC) actually
+ * gives MDBX_UTTERLY_NOSYNC.
*
- * In contrast to MDBX_UTTERLY_NOSYNC mode, with MDBX_NOSYNC flag MDBX will
- * keeps untouched pages within B-tree of the last transaction "steady"
- * which was synced to disk completely. This has big implications for both
- * data durability and (unfortunately) performance:
+ * In contrast to MDBX_UTTERLY_NOSYNC mode, with MDBX_SAFE_NOSYNC flag MDBX
+ * will keeps untouched pages within B-tree of the last transaction
+ * "steady" which was synced to disk completely. This has big implications
+ * for both data durability and (unfortunately) performance:
* - a system crash can't corrupt the database, but you will lose the
* last transactions; because MDBX will rollback to last steady commit
* since it kept explicitly.
@@ -1214,22 +1216,22 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
* insufficient space and before increasing the size of the file on
* disk.
*
- * In other words, with MDBX_NOSYNC flag MDBX insures you from the whole
- * database corruption, at the cost increasing database size and/or number
- * of disk IOPS. So, MDBX_NOSYNC flag could be used with mdbx_env_synv()
- * as alternatively for batch committing or nested transaction (in some
- * cases). As well, auto-sync feature exposed by mdbx_env_set_syncbytes()
- * and mdbx_env_set_syncperiod() functions could be very usefull with
- * MDBX_NOSYNC flag.
- *
- * The number and volume of of disk IOPS with MDBX_NOSYNC flag will
- * exactly the as without any no-sync flags. However, you should expect
- * a larger process's work set (https://bit.ly/2kA2tFX) and significantly
- * worse a locality of reference (https://bit.ly/2mbYq2J), due to the
- * more intensive allocation of previously unused pages and increase the
- * size of the database.
- *
- * MDBX_NOSYNC flag may be changed at any time using
+ * In other words, with MDBX_SAFE_NOSYNC flag MDBX insures you from the
+ * whole database corruption, at the cost increasing database size and/or
+ * number of disk IOPS. So, MDBX_SAFE_NOSYNC flag could be used with
+ * mdbx_env_synv() as alternatively for batch committing or nested
+ * transaction (in some cases). As well, auto-sync feature exposed by
+ * mdbx_env_set_syncbytes() and mdbx_env_set_syncperiod() functions could
+ * be very usefull with MDBX_SAFE_NOSYNC flag.
+ *
+ * The number and volume of of disk IOPS with MDBX_SAFE_NOSYNC flag will
+ * exactly the as without any no-sync flags. However, you should expect a
+ * larger process's work set (https://bit.ly/2kA2tFX) and significantly
+ * worse a locality of reference (https://bit.ly/2mbYq2J), due to the more
+ * intensive allocation of previously unused pages and increase the size of
+ * the database.
+ *
+ * MDBX_SAFE_NOSYNC flag may be changed at any time using
* mdbx_env_set_flags() or by passing to mdbx_txn_begin() for particular
* write transaction.
*
@@ -1237,12 +1239,13 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
* MDBX_MAPASYNC = use asynchronous msync when MDBX_WRITEMAP is used.
*
* MDBX_MAPASYNC meaningful and give effect only in conjunction
- * with MDBX_WRITEMAP or MDBX_NOSYNC:
- * - with MDBX_NOSYNC actually gives MDBX_UTTERLY_NOSYNC, which
+ * with MDBX_WRITEMAP or MDBX_SAFE_NOSYNC:
+ * - with MDBX_SAFE_NOSYNC actually gives MDBX_UTTERLY_NOSYNC, which
* wipe previous steady commits for reuse pages as described above.
- * - with MDBX_WRITEMAP but without MDBX_NOSYNC instructs MDBX to use
+ * - with MDBX_WRITEMAP but without MDBX_SAFE_NOSYNC instructs MDBX to use
* asynchronous mmap-flushes to disk as described below.
- * - with both MDBX_WRITEMAP and MDBX_NOSYNC you get the both effects.
+ * - with both MDBX_WRITEMAP and MDBX_SAFE_NOSYNC you get the both
+ * effects.
*
* Asynchronous mmap-flushes means that actually all writes will scheduled
* and performed by operation system on it own manner, i.e. unordered.
@@ -1250,10 +1253,10 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
* data to disk, but no more.
*
* With MDBX_MAPASYNC flag, but without MDBX_UTTERLY_NOSYNC (i.e. without
- * OR'ing with MDBX_NOSYNC) MDBX will keeps untouched pages within B-tree
- * of the last transaction "steady" which was synced to disk completely.
- * So, this makes exactly the same "long-lived" impact and the same
- * consequences as described above for MDBX_NOSYNC flag.
+ * OR'ing with MDBX_SAFE_NOSYNC) MDBX will keeps untouched pages within
+ * B-tree of the last transaction "steady" which was synced to disk
+ * completely. So, this makes exactly the same "long-lived" impact and the
+ * same consequences as described above for MDBX_SAFE_NOSYNC flag.
*
* Depending on the platform and hardware, with combination of
* MDBX_WRITEMAP and MDBX_MAPASYNC you may get a multiple increase of write
@@ -1271,18 +1274,18 @@ LIBMDBX_API const char *mdbx_dump_val(const MDBX_val *key, char *const buf,
*
* (!) don't combine this flag with MDBX_MAPASYNC
* since you will got MDBX_UTTERLY_NOSYNC in that way (see below) */
-#define MDBX_NOSYNC 0x10000u
+#define MDBX_SAFE_NOSYNC 0x10000u
/* Use asynchronous msync when MDBX_WRITEMAP is used,
* see description in the "SYNC MODES" section above.
*
- * (!) don't combine this flag with MDBX_NOSYNC
+ * (!) don't combine this flag with MDBX_SAFE_NOSYNC
* since you will got MDBX_UTTERLY_NOSYNC in that way (see below) */
#define MDBX_MAPASYNC 0x100000u
/* Don't sync anything and wipe previous steady commits,
* see description in the "SYNC MODES" section above. */
-#define MDBX_UTTERLY_NOSYNC (MDBX_NOSYNC | MDBX_MAPASYNC)
+#define MDBX_UTTERLY_NOSYNC (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)
/**** DATABASE FLAGS **********************************************************/
/* Use reverse string keys */
@@ -1519,14 +1522,14 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* MDBX_NORDAHEAD, MDBX_NOMEMINIT, MDBX_COALESCE, MDBX_LIFORECLAIM.
* See "ENVIRONMENT FLAGS" section above.
*
- * - MDBX_NOMETASYNC, MDBX_NOSYNC, MDBX_UTTERLY_NOSYNC, MDBX_MAPASYNC.
+ * - MDBX_NOMETASYNC, MDBX_SAFE_NOSYNC, MDBX_UTTERLY_NOSYNC, MDBX_MAPASYNC.
* See "SYNC MODES" section above.
*
* NOTE: MDB_NOLOCK flag don't supported by MDBX,
* try use MDBX_EXCLUSIVE as a replacement.
*
* NOTE: MDBX don't allow to mix processes with different MDBX_WRITEMAP,
- * MDBX_NOSYNC, MDBX_NOMETASYNC, MDBX_MAPASYNC flags onthe same
+ * MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC, MDBX_MAPASYNC flags on the same
* environment. In such case MDBX_INCOMPATIBLE will be returned.
*
* If the database is already exist and parameters specified early by
@@ -1552,7 +1555,7 @@ LIBMDBX_API int mdbx_env_create(MDBX_env **penv);
* more than once.
* - MDBX_INCOMPATIBLE = Environment is already opened by another process,
* but with different set of MDBX_WRITEMAP,
- * MDBX_NOSYNC, MDBX_NOMETASYNC, MDBX_MAPASYNC
+ * MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC, MDBX_MAPASYNC
* flags.
* Or if the database is already exist and
* parameters specified early by
@@ -1727,7 +1730,7 @@ __deprecated LIBMDBX_API int mdbx_env_info(MDBX_env *env, MDBX_envinfo *info,
/* Flush the environment data buffers to disk.
*
* Unless the environment was opened with no-sync flags (MDBX_NOMETASYNC,
- * MDBX_NOSYNC, MDBX_UTTERLY_NOSYNC and MDBX_MAPASYNC), then data is always
+ * MDBX_SAFE_NOSYNC, MDBX_UTTERLY_NOSYNC and MDBX_MAPASYNC), then data is always
* written an flushed to disk when mdbx_txn_commit() is called. Otherwise
* mdbx_env_sync() may be called to manually write and flush unsynced data to
* disk.
@@ -1763,14 +1766,15 @@ LIBMDBX_API int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock);
LIBMDBX_API int mdbx_env_sync(MDBX_env *env);
LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
-/* Sets threshold to force flush the data buffers to disk, even of MDBX_NOSYNC,
- * MDBX_NOMETASYNC and MDBX_MAPASYNC flags in the environment. The threshold
- * value affects all processes which operates with given environment until the
- * last process close environment or a new value will be settled.
+/* Sets threshold to force flush the data buffers to disk, even of
+ * MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC and MDBX_MAPASYNC flags in the environment.
+ * The threshold value affects all processes which operates with given
+ * environment until the last process close environment or a new value will be
+ * settled.
*
* Data is always written to disk when mdbx_txn_commit() is called, but the
* operating system may keep it buffered. MDBX always flushes the OS buffers
- * upon commit as well, unless the environment was opened with MDBX_NOSYNC,
+ * upon commit as well, unless the environment was opened with MDBX_SAFE_NOSYNC,
* MDBX_MAPASYNC or in part MDBX_NOMETASYNC.
*
* The default is 0, than mean no any threshold checked, and no additional
@@ -1784,14 +1788,14 @@ LIBMDBX_API int mdbx_env_sync_poll(MDBX_env *env);
LIBMDBX_API int mdbx_env_set_syncbytes(MDBX_env *env, size_t threshold);
/* Sets relative period since the last unsteay commit to force flush the data
- * buffers to disk, even of MDBX_NOSYNC, MDBX_NOMETASYNC and MDBX_MAPASYNC flags
- * in the environment. The relative period value affects all processes which
- * operates with given environment until the last process close environment or a
- * new value will be settled.
+ * buffers to disk, even of MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC and MDBX_MAPASYNC
+ * flags in the environment. The relative period value affects all processes
+ * which operates with given environment until the last process close
+ * environment or a new value will be settled.
*
* Data is always written to disk when mdbx_txn_commit() is called, but the
* operating system may keep it buffered. MDBX always flushes the OS buffers
- * upon commit as well, unless the environment was opened with MDBX_NOSYNC,
+ * upon commit as well, unless the environment was opened with MDBX_SAFE_NOSYNC,
* MDBX_MAPASYNC or in part MDBX_NOMETASYNC.
*
* Settled period don't checked asynchronously, but only by the
@@ -2223,9 +2227,9 @@ LIBMDBX_API void *mdbx_env_get_userctx(MDBX_env *env);
* - MDBX_TRYTXN
* Do not block when starting a write transaction.
*
- * - MDBX_NOSYNC, MDBX_NOMETASYNC or MDBX_MAPASYNC
+ * - MDBX_SAFE_NOSYNC, MDBX_NOMETASYNC or MDBX_MAPASYNC
* Do not sync data to disk corresponding to MDBX_NOMETASYNC
- * or MDBX_NOSYNC description (see abobe).
+ * or MDBX_SAFE_NOSYNC description (see abobe).
*
* [out] txn Address where the new MDBX_txn handle will be stored
*
@@ -2299,7 +2303,7 @@ typedef struct MDBX_txn_info {
* [in] txn A transaction handle returned by mdbx_txn_begin().
* [out] stat The address of an MDBX_txn_info structure
* where the information will be copied.
- * [in[ scan_rlt The boolean flag controls the scan of the read lock table to
+ * [in] scan_rlt The boolean flag controls the scan of the read lock table to
* provide complete information. Such scan is relatively
* expensive and you can avoid it if corresponding fields are
* not needed (see description of MDBX_txn_info above).
@@ -3517,16 +3521,10 @@ LIBMDBX_API int mdbx_get_attr(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
#endif /* MDBX_NEXENTA_ATTRS */
/*******************************************************************************
- * LY: temporary workaround for Elbrus's memcmp() bug. */
-#ifndef __GLIBC_PREREQ
-#if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
-#define __GLIBC_PREREQ(maj, min) \
- ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min))
-#else
-#define __GLIBC_PREREQ(maj, min) (0)
-#endif
-#endif /* __GLIBC_PREREQ */
-#if defined(__e2k__) && !__GLIBC_PREREQ(2, 24)
+ * Workaround for mmaped-lookahead-cross-page-boundary bug
+ * in an obsolete versions of Elbrus's libc and kernels. */
+#if defined(__e2k__) && defined(MDBX_E2K_MLHCPB_WORKAROUND) && \
+ MDBX_E2K_MLHCPB_WORKAROUND
LIBMDBX_API int mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2,
size_t n);
LIBMDBX_API int mdbx_e2k_strcmp_bug_workaround(const char *s1, const char *s2);
@@ -3549,8 +3547,7 @@ LIBMDBX_API size_t mdbx_e2k_strnlen_bug_workaround(const char *s,
#define strlen mdbx_e2k_strlen_bug_workaround
#undef strnlen
#define strnlen mdbx_e2k_strnlen_bug_workaround
-
-#endif /* Elbrus's memcmp() bug. */
+#endif /* MDBX_E2K_MLHCPB_WORKAROUND */
#ifdef __cplusplus
}
diff --git a/libs/libmdbx/src/src/alloy.c b/libs/libmdbx/src/src/alloy.c
index 98f3aac3a9..c40446732b 100644
--- a/libs/libmdbx/src/src/alloy.c
+++ b/libs/libmdbx/src/src/alloy.c
@@ -11,9 +11,8 @@
* top-level directory of the distribution or, alternatively, at
* <http://www.OpenLDAP.org/license.html>. */
-/* Amalgamated build */
-#define MDBX_ALLOY 1
-#include "elements/internals.h" /* must be included fisrt */
+#define MDBX_ALLOY 1 /* amalgamated build */
+#include "elements/internals.h" /* must be included first */
#include "elements/core.c"
#include "elements/osal.c"
diff --git a/libs/libmdbx/src/src/elements/config.h.in b/libs/libmdbx/src/src/elements/config.h.in
index 1079899d9a..b8776d98a2 100644
--- a/libs/libmdbx/src/src/elements/config.h.in
+++ b/libs/libmdbx/src/src/elements/config.h.in
@@ -12,6 +12,7 @@
#cmakedefine ENABLE_GPROF
#cmakedefine ENABLE_GCOV
#cmakedefine ENABLE_ASAN
+#cmakedefine MDBX_FORCE_ASSERTIONS
/* Common */
#cmakedefine01 MDBX_TXN_CHECKOWNER
diff --git a/libs/libmdbx/src/src/elements/core.c b/libs/libmdbx/src/src/elements/core.c
index b45db5178b..b5a27ddccf 100644
--- a/libs/libmdbx/src/src/elements/core.c
+++ b/libs/libmdbx/src/src/elements/core.c
@@ -40,12 +40,12 @@
/*------------------------------------------------------------------------------
* Internal inlines */
-static __pure_function __inline bool is_powerof2(size_t x) {
+static __pure_function __always_inline bool is_powerof2(size_t x) {
return (x & (x - 1)) == 0;
}
-static __pure_function __inline size_t roundup_powerof2(size_t value,
- size_t granularity) {
+static __pure_function __always_inline size_t
+roundup_powerof2(size_t value, size_t granularity) {
assert(is_powerof2(granularity));
return (value + granularity - 1) & ~(granularity - 1);
}
@@ -70,22 +70,26 @@ static __pure_function unsigned log2n(size_t value) {
/*------------------------------------------------------------------------------
* Unaligned access */
-static __pure_function __maybe_unused __inline unsigned
+static __pure_function __maybe_unused __always_inline unsigned
field_alignment(unsigned alignment_baseline, size_t field_offset) {
unsigned merge = alignment_baseline | (unsigned)field_offset;
return merge & -(int)merge;
}
/* read-thunk for UB-sanitizer */
-static __pure_function __inline uint8_t peek_u8(const uint8_t *ptr) {
+static __pure_function __always_inline uint8_t
+peek_u8(const uint8_t *const __restrict ptr) {
return *ptr;
}
/* write-thunk for UB-sanitizer */
-static __inline void poke_u8(uint8_t *ptr, const uint8_t v) { *ptr = v; }
+static __always_inline void poke_u8(uint8_t *const __restrict ptr,
+ const uint8_t v) {
+ *ptr = v;
+}
-static __pure_function __inline uint16_t
-unaligned_peek_u16(const unsigned expected_alignment, const void *ptr) {
+static __pure_function __always_inline uint16_t
+unaligned_peek_u16(const unsigned expected_alignment, const void *const ptr) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint16_t)) == 0)
return *(const uint16_t *)ptr;
@@ -96,8 +100,9 @@ unaligned_peek_u16(const unsigned expected_alignment, const void *ptr) {
}
}
-static __inline void unaligned_poke_u16(const unsigned expected_alignment,
- void *ptr, const uint16_t v) {
+static __always_inline void
+unaligned_poke_u16(const unsigned expected_alignment,
+ void *const __restrict ptr, const uint16_t v) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
*(uint16_t *)ptr = v;
@@ -105,8 +110,8 @@ static __inline void unaligned_poke_u16(const unsigned expected_alignment,
memcpy(ptr, &v, sizeof(v));
}
-static __pure_function __inline uint32_t
-unaligned_peek_u32(const unsigned expected_alignment, const void *ptr) {
+static __pure_function __always_inline uint32_t unaligned_peek_u32(
+ const unsigned expected_alignment, const void *const __restrict ptr) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint32_t)) == 0)
return *(const uint32_t *)ptr;
@@ -123,8 +128,9 @@ unaligned_peek_u32(const unsigned expected_alignment, const void *ptr) {
}
}
-static __inline void unaligned_poke_u32(const unsigned expected_alignment,
- void *ptr, const uint32_t v) {
+static __always_inline void
+unaligned_poke_u32(const unsigned expected_alignment,
+ void *const __restrict ptr, const uint32_t v) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
*(uint32_t *)ptr = v;
@@ -136,8 +142,8 @@ static __inline void unaligned_poke_u32(const unsigned expected_alignment,
memcpy(ptr, &v, sizeof(v));
}
-static __pure_function __inline uint64_t
-unaligned_peek_u64(const unsigned expected_alignment, const void *ptr) {
+static __pure_function __always_inline uint64_t unaligned_peek_u64(
+ const unsigned expected_alignment, const void *const __restrict ptr) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint64_t)) == 0)
return *(const uint64_t *)ptr;
@@ -154,8 +160,9 @@ unaligned_peek_u64(const unsigned expected_alignment, const void *ptr) {
}
}
-static __inline void unaligned_poke_u64(const unsigned expected_alignment,
- void *ptr, const uint64_t v) {
+static __always_inline void
+unaligned_poke_u64(const unsigned expected_alignment,
+ void *const __restrict ptr, const uint64_t v) {
assert((uintptr_t)ptr % expected_alignment == 0);
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
*(uint64_t *)ptr = v;
@@ -188,7 +195,8 @@ static __inline void unaligned_poke_u64(const unsigned expected_alignment,
unaligned_poke_u64(1, (char *)(ptr) + offsetof(struct, field), value)
/* Get the page number pointed to by a branch node */
-static __pure_function __inline pgno_t node_pgno(const MDBX_node *node) {
+static __pure_function __always_inline pgno_t
+node_pgno(const MDBX_node *const __restrict node) {
pgno_t pgno = UNALIGNED_PEEK_32(node, MDBX_node, mn_pgno32);
if (sizeof(pgno) > 4)
pgno |= ((uint64_t)UNALIGNED_PEEK_8(node, MDBX_node, mn_extra)) << 32;
@@ -196,7 +204,8 @@ static __pure_function __inline pgno_t node_pgno(const MDBX_node *node) {
}
/* Set the page number in a branch node */
-static __inline void node_set_pgno(MDBX_node *node, pgno_t pgno) {
+static __always_inline void node_set_pgno(MDBX_node *const __restrict node,
+ pgno_t pgno) {
assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO);
UNALIGNED_POKE_32(node, MDBX_node, mn_pgno32, (uint32_t)pgno);
@@ -206,32 +215,38 @@ static __inline void node_set_pgno(MDBX_node *node, pgno_t pgno) {
}
/* Get the size of the data in a leaf node */
-static __pure_function __inline size_t node_ds(const MDBX_node *node) {
+static __pure_function __always_inline size_t
+node_ds(const MDBX_node *const __restrict node) {
return UNALIGNED_PEEK_32(node, MDBX_node, mn_dsize);
}
/* Set the size of the data for a leaf node */
-static __inline void node_set_ds(MDBX_node *node, size_t size) {
+static __always_inline void node_set_ds(MDBX_node *const __restrict node,
+ size_t size) {
assert(size < INT_MAX);
UNALIGNED_POKE_32(node, MDBX_node, mn_dsize, (uint32_t)size);
}
/* The size of a key in a node */
-static __pure_function __inline size_t node_ks(const MDBX_node *node) {
+static __pure_function __always_inline size_t
+node_ks(const MDBX_node *const __restrict node) {
return UNALIGNED_PEEK_16(node, MDBX_node, mn_ksize);
}
/* Set the size of the key for a leaf node */
-static __inline void node_set_ks(MDBX_node *node, size_t size) {
+static __always_inline void node_set_ks(MDBX_node *const __restrict node,
+ size_t size) {
assert(size < INT16_MAX);
UNALIGNED_POKE_16(node, MDBX_node, mn_ksize, (uint16_t)size);
}
-static __pure_function __inline uint8_t node_flags(const MDBX_node *node) {
+static __pure_function __always_inline uint8_t
+node_flags(const MDBX_node *const __restrict node) {
return UNALIGNED_PEEK_8(node, MDBX_node, mn_flags);
}
-static __inline void node_set_flags(MDBX_node *node, uint8_t flags) {
+static __always_inline void node_set_flags(MDBX_node *const __restrict node,
+ uint8_t flags) {
UNALIGNED_POKE_8(node, MDBX_node, mn_flags, flags);
}
@@ -239,27 +254,30 @@ static __inline void node_set_flags(MDBX_node *node, uint8_t flags) {
#define NODESIZE offsetof(MDBX_node, mn_data)
/* Address of the key for the node */
-static __pure_function __inline void *node_key(const MDBX_node *node) {
+static __pure_function __always_inline void *
+node_key(const MDBX_node *const __restrict node) {
return (char *)node + NODESIZE;
}
/* Address of the data for a node */
-static __pure_function __inline void *node_data(const MDBX_node *node) {
+static __pure_function __always_inline void *
+node_data(const MDBX_node *const __restrict node) {
return (char *)node_key(node) + node_ks(node);
}
/* Size of a node in a leaf page with a given key and data.
* This is node header plus key plus data size. */
-static __pure_function __inline size_t node_size_len(const size_t key_len,
- const size_t value_len) {
+static __pure_function __always_inline size_t
+node_size_len(const size_t key_len, const size_t value_len) {
return NODESIZE + EVEN(key_len + value_len);
}
-static __pure_function __inline size_t node_size(const MDBX_val *key,
- const MDBX_val *value) {
+static __pure_function __always_inline size_t node_size(const MDBX_val *key,
+ const MDBX_val *value) {
return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0);
}
-static __pure_function __inline pgno_t peek_pgno(const void *ptr) {
+static __pure_function __always_inline pgno_t
+peek_pgno(const void *const __restrict ptr) {
if (sizeof(pgno_t) == sizeof(uint32_t))
return (pgno_t)unaligned_peek_u32(1, ptr);
else if (sizeof(pgno_t) == sizeof(uint64_t))
@@ -271,7 +289,8 @@ static __pure_function __inline pgno_t peek_pgno(const void *ptr) {
}
}
-static __inline void poke_pgno(void *ptr, const pgno_t pgno) {
+static __always_inline void poke_pgno(void *const __restrict ptr,
+ const pgno_t pgno) {
if (sizeof(pgno) == sizeof(uint32_t))
unaligned_poke_u32(1, ptr, pgno);
else if (sizeof(pgno) == sizeof(uint64_t))
@@ -280,8 +299,8 @@ static __inline void poke_pgno(void *ptr, const pgno_t pgno) {
memcpy(ptr, &pgno, sizeof(pgno));
}
-static __pure_function __inline pgno_t
-node_largedata_pgno(const MDBX_node *node) {
+static __pure_function __always_inline pgno_t
+node_largedata_pgno(const MDBX_node *const __restrict node) {
assert(node_flags(node) & F_BIGDATA);
return peek_pgno(node_data(node));
}
@@ -399,8 +418,9 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, unsigned flags) {
* size will only include the key and not the data. Sizes are always
* rounded up to an even number of bytes, to guarantee 2-byte alignment
* of the MDBX_node headers. */
-static __pure_function __inline size_t
-leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) {
+static __pure_function __always_inline size_t leaf_size(const MDBX_env *env,
+ const MDBX_val *key,
+ const MDBX_val *data) {
size_t node_bytes = node_size(key, data);
/* NOTE: The actual limit is LEAF_NODEMAX(env->me_psize), but it reasonable to
* use env->me_branch_nodemax (which is 3 times less) as the treshold because:
@@ -442,8 +462,8 @@ leaf_size(const MDBX_env *env, const MDBX_val *key, const MDBX_val *data) {
* [in] key The key for the node.
*
* Returns The number of bytes needed to store the node. */
-static __pure_function __inline size_t branch_size(const MDBX_env *env,
- const MDBX_val *key) {
+static __pure_function __always_inline size_t branch_size(const MDBX_env *env,
+ const MDBX_val *key) {
/* Size of a node in a branch page with a given key.
* This is just the node header plus the key, there is no data. */
size_t node_bytes = node_size(key, nullptr);
@@ -460,69 +480,72 @@ static __pure_function __inline size_t branch_size(const MDBX_env *env,
/*----------------------------------------------------------------------------*/
-static __pure_function __inline size_t pgno2bytes(const MDBX_env *env,
- pgno_t pgno) {
+static __pure_function __always_inline size_t pgno2bytes(const MDBX_env *env,
+ pgno_t pgno) {
mdbx_assert(env, (1u << env->me_psize2log) == env->me_psize);
return ((size_t)pgno) << env->me_psize2log;
}
-static __pure_function __inline MDBX_page *pgno2page(const MDBX_env *env,
- pgno_t pgno) {
+static __pure_function __always_inline MDBX_page *pgno2page(const MDBX_env *env,
+ pgno_t pgno) {
return (MDBX_page *)(env->me_map + pgno2bytes(env, pgno));
}
-static __pure_function __inline pgno_t bytes2pgno(const MDBX_env *env,
- size_t bytes) {
+static __pure_function __always_inline pgno_t bytes2pgno(const MDBX_env *env,
+ size_t bytes) {
mdbx_assert(env, (env->me_psize >> env->me_psize2log) == 1);
return (pgno_t)(bytes >> env->me_psize2log);
}
-static __pure_function __inline size_t pgno_align2os_bytes(const MDBX_env *env,
- pgno_t pgno) {
+static __pure_function size_t pgno_align2os_bytes(const MDBX_env *env,
+ pgno_t pgno) {
return roundup_powerof2(pgno2bytes(env, pgno), env->me_os_psize);
}
-static __pure_function __inline pgno_t pgno_align2os_pgno(const MDBX_env *env,
- pgno_t pgno) {
+static __pure_function pgno_t pgno_align2os_pgno(const MDBX_env *env,
+ pgno_t pgno) {
return bytes2pgno(env, pgno_align2os_bytes(env, pgno));
}
-static __pure_function __inline size_t bytes_align2os_bytes(const MDBX_env *env,
- size_t bytes) {
+static __pure_function size_t bytes_align2os_bytes(const MDBX_env *env,
+ size_t bytes) {
return roundup_powerof2(roundup_powerof2(bytes, env->me_psize),
env->me_os_psize);
}
/* Address of first usable data byte in a page, after the header */
-static __pure_function __inline void *page_data(const MDBX_page *mp) {
+static __pure_function __always_inline void *page_data(const MDBX_page *mp) {
return (char *)mp + PAGEHDRSZ;
}
-static __pure_function __inline const MDBX_page *data_page(const void *data) {
+static __pure_function __always_inline const MDBX_page *
+data_page(const void *data) {
return container_of(data, MDBX_page, mp_ptrs);
}
-static __pure_function __inline MDBX_meta *page_meta(MDBX_page *mp) {
+static __pure_function __always_inline MDBX_meta *page_meta(MDBX_page *mp) {
return (MDBX_meta *)page_data(mp);
}
/* Number of nodes on a page */
-static __pure_function __inline unsigned page_numkeys(const MDBX_page *mp) {
+static __pure_function __always_inline unsigned
+page_numkeys(const MDBX_page *mp) {
return mp->mp_lower >> 1;
}
/* The amount of space remaining in the page */
-static __pure_function __inline unsigned page_room(const MDBX_page *mp) {
+static __pure_function __always_inline unsigned page_room(const MDBX_page *mp) {
return mp->mp_upper - mp->mp_lower;
}
-static __pure_function __inline unsigned page_space(const MDBX_env *env) {
+static __pure_function __always_inline unsigned
+page_space(const MDBX_env *env) {
STATIC_ASSERT(PAGEHDRSZ % 2 == 0);
return env->me_psize - PAGEHDRSZ;
}
-static __pure_function __inline unsigned page_used(const MDBX_env *env,
- const MDBX_page *mp) {
+static __pure_function __always_inline unsigned page_used(const MDBX_env *env,
+ const MDBX_page *mp) {
return page_space(env) - page_room(mp);
}
@@ -540,14 +563,14 @@ page_fill_enough(const MDBX_page *mp, unsigned spaceleft_threshold,
}
/* The number of overflow pages needed to store the given size. */
-static __pure_function __inline pgno_t number_of_ovpages(const MDBX_env *env,
- size_t bytes) {
+static __pure_function __always_inline pgno_t
+number_of_ovpages(const MDBX_env *env, size_t bytes) {
return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1;
}
/* Address of node i in page p */
-static __pure_function __inline MDBX_node *page_node(const MDBX_page *mp,
- unsigned i) {
+static __pure_function __always_inline MDBX_node *page_node(const MDBX_page *mp,
+ unsigned i) {
assert((mp->mp_flags & (P_LEAF2 | P_OVERFLOW | P_META)) == 0);
assert(page_numkeys(mp) > (unsigned)(i));
assert(mp->mp_ptrs[i] % 2 == 0);
@@ -557,7 +580,7 @@ static __pure_function __inline MDBX_node *page_node(const MDBX_page *mp,
/* The address of a key in a LEAF2 page.
* LEAF2 pages are used for MDBX_DUPFIXED sorted-duplicate sub-DBs.
* There are no node headers, keys are stored contiguously. */
-static __pure_function __inline void *
+static __pure_function __always_inline void *
page_leaf2key(const MDBX_page *mp, unsigned i, size_t keysize) {
assert(mp->mp_leaf2_ksize == keysize);
(void)keysize;
@@ -565,22 +588,23 @@ page_leaf2key(const MDBX_page *mp, unsigned i, size_t keysize) {
}
/* Set the node's key into keyptr. */
-static __inline void get_key(const MDBX_node *node, MDBX_val *keyptr) {
+static __always_inline void get_key(const MDBX_node *node, MDBX_val *keyptr) {
keyptr->iov_len = node_ks(node);
keyptr->iov_base = node_key(node);
}
/* Set the node's key into keyptr, if requested. */
-static __inline void get_key_optional(const MDBX_node *node,
- MDBX_val *keyptr /* __may_null */) {
+static __always_inline void
+get_key_optional(const MDBX_node *node, MDBX_val *keyptr /* __may_null */) {
if (keyptr)
get_key(node, keyptr);
}
/*------------------------------------------------------------------------------
- * LY: temporary workaround for Elbrus's memcmp() bug. */
-
-#if defined(__e2k__) && !__GLIBC_PREREQ(2, 24)
+ * Workaround for mmaped-lookahead-cross-page-boundary bug
+ * in an obsolete versions of Elbrus's libc and kernels. */
+#if defined(__e2k__) && defined(MDBX_E2K_MLHCPB_WORKAROUND) && \
+ MDBX_E2K_MLHCPB_WORKAROUND
int __hot mdbx_e2k_memcmp_bug_workaround(const void *s1, const void *s2,
size_t n) {
if (unlikely(n > 42
@@ -688,12 +712,12 @@ size_t __hot mdbx_e2k_strnlen_bug_workaround(const char *s, size_t maxlen) {
}
return n;
}
-#endif /* Elbrus's memcmp() bug. */
+#endif /* MDBX_E2K_MLHCPB_WORKAROUND */
/*------------------------------------------------------------------------------
* safe read/write volatile 64-bit fields on 32-bit architectures. */
-static __inline void atomic_yield(void) {
+static __always_inline void atomic_yield(void) {
#if defined(_WIN32) || defined(_WIN64)
YieldProcessor();
#elif defined(__x86_64__) || defined(__i386__) || defined(__e2k__)
@@ -723,8 +747,8 @@ static __inline void atomic_yield(void) {
}
#if MDBX_64BIT_CAS
-static __inline bool atomic_cas64(volatile uint64_t *p, uint64_t c,
- uint64_t v) {
+static __always_inline bool atomic_cas64(volatile uint64_t *p, uint64_t c,
+ uint64_t v) {
#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_LLONG_LOCK_FREE)
STATIC_ASSERT(sizeof(long long) >= sizeof(uint64_t));
STATIC_ASSERT(atomic_is_lock_free(p));
@@ -742,8 +766,8 @@ static __inline bool atomic_cas64(volatile uint64_t *p, uint64_t c,
}
#endif /* MDBX_64BIT_CAS */
-static __inline bool atomic_cas32(volatile uint32_t *p, uint32_t c,
- uint32_t v) {
+static __always_inline bool atomic_cas32(volatile uint32_t *p, uint32_t c,
+ uint32_t v) {
#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE)
STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
STATIC_ASSERT(atomic_is_lock_free(p));
@@ -760,7 +784,7 @@ static __inline bool atomic_cas32(volatile uint32_t *p, uint32_t c,
#endif
}
-static __inline uint32_t atomic_add32(volatile uint32_t *p, uint32_t v) {
+static __always_inline uint32_t atomic_add32(volatile uint32_t *p, uint32_t v) {
#if defined(ATOMIC_VAR_INIT) || defined(ATOMIC_INT_LOCK_FREE)
STATIC_ASSERT(sizeof(int) >= sizeof(uint32_t));
STATIC_ASSERT(atomic_is_lock_free(p));
@@ -779,7 +803,7 @@ static __inline uint32_t atomic_add32(volatile uint32_t *p, uint32_t v) {
#define atomic_sub32(p, v) atomic_add32(p, 0 - (v))
-static __maybe_unused __inline bool safe64_is_valid(uint64_t v) {
+static __maybe_unused __always_inline bool safe64_is_valid(uint64_t v) {
#if MDBX_WORDBITS >= 64
return v < SAFE64_INVALID_THRESHOLD;
#else
@@ -787,7 +811,7 @@ static __maybe_unused __inline bool safe64_is_valid(uint64_t v) {
#endif /* MDBX_WORDBITS */
}
-static __maybe_unused __inline bool
+static __maybe_unused __always_inline bool
safe64_is_valid_ptr(const mdbx_safe64_t *ptr) {
mdbx_compiler_barrier();
#if MDBX_64BIT_ATOMIC
@@ -797,7 +821,7 @@ safe64_is_valid_ptr(const mdbx_safe64_t *ptr) {
#endif /* MDBX_64BIT_ATOMIC */
}
-static __inline uint64_t safe64_txnid_next(uint64_t txnid) {
+static __always_inline uint64_t safe64_txnid_next(uint64_t txnid) {
txnid += MDBX_TXNID_STEP;
#if !MDBX_64BIT_CAS
/* avoid overflow of low-part in safe64_reset() */
@@ -806,7 +830,8 @@ static __inline uint64_t safe64_txnid_next(uint64_t txnid) {
return txnid;
}
-static __inline void safe64_reset(mdbx_safe64_t *ptr, bool single_writer) {
+static __always_inline void safe64_reset(mdbx_safe64_t *ptr,
+ bool single_writer) {
mdbx_compiler_barrier();
#if !MDBX_64BIT_CAS
if (!single_writer) {
@@ -824,14 +849,15 @@ static __inline void safe64_reset(mdbx_safe64_t *ptr, bool single_writer) {
ptr->atomic = UINT64_MAX;
#else
/* atomically make value >= SAFE64_INVALID_THRESHOLD */
- ptr->high = ptr->low = UINT32_MAX;
+ ptr->high = UINT32_MAX;
#endif /* MDBX_64BIT_ATOMIC */
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true);
}
-static __inline bool safe64_reset_compare(mdbx_safe64_t *ptr, txnid_t compare) {
+static __always_inline bool safe64_reset_compare(mdbx_safe64_t *ptr,
+ txnid_t compare) {
mdbx_compiler_barrier();
/* LY: This function is used to reset `mr_txnid` from OOM-kick in case
* the asynchronously cancellation of read transaction. Therefore,
@@ -859,7 +885,7 @@ static __inline bool safe64_reset_compare(mdbx_safe64_t *ptr, txnid_t compare) {
return rc;
}
-static __inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) {
+static __always_inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) {
mdbx_compiler_barrier();
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
#if MDBX_64BIT_ATOMIC
@@ -867,7 +893,7 @@ static __inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) {
#else /* MDBX_64BIT_ATOMIC */
/* update low-part but still value >= SAFE64_INVALID_THRESHOLD */
ptr->low = (uint32_t)v;
- // assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
+ assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true);
/* update high-part from SAFE64_INVALID_THRESHOLD to actual value */
@@ -900,7 +926,8 @@ static __always_inline uint64_t safe64_read(const mdbx_safe64_t *ptr) {
return v;
}
-static __inline void safe64_update(mdbx_safe64_t *ptr, const uint64_t v) {
+static __always_inline void safe64_update(mdbx_safe64_t *ptr,
+ const uint64_t v) {
safe64_reset(ptr, true);
safe64_write(ptr, v);
}
@@ -1604,7 +1631,7 @@ static int lcklist_detach_locked(MDBX_env *env) {
/*----------------------------------------------------------------------------*/
-static __inline size_t pnl2bytes(const size_t size) {
+static __always_inline size_t pnl2bytes(const size_t size) {
assert(size > 0 && size <= MDBX_PNL_MAX * 2);
size_t bytes = roundup_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD +
sizeof(pgno_t) * (size + 2),
@@ -1613,7 +1640,7 @@ static __inline size_t pnl2bytes(const size_t size) {
return bytes;
}
-static __inline pgno_t bytes2pnl(const size_t bytes) {
+static __always_inline pgno_t bytes2pnl(const size_t bytes) {
size_t size = bytes / sizeof(pgno_t);
assert(size > 2 && size <= MDBX_PNL_MAX * 2);
return (pgno_t)size - 2;
@@ -1688,8 +1715,8 @@ static int mdbx_pnl_reserve(MDBX_PNL *ppl, const size_t wanna) {
}
/* Make room for num additional elements in an PNL */
-static __inline int __must_check_result mdbx_pnl_need(MDBX_PNL *ppl,
- size_t num) {
+static __always_inline int __must_check_result mdbx_pnl_need(MDBX_PNL *ppl,
+ size_t num) {
assert(MDBX_PNL_SIZE(*ppl) <= MDBX_PNL_MAX &&
MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_SIZE(*ppl));
assert(num <= MDBX_PNL_MAX);
@@ -1699,7 +1726,7 @@ static __inline int __must_check_result mdbx_pnl_need(MDBX_PNL *ppl,
: mdbx_pnl_reserve(ppl, wanna);
}
-static __inline void mdbx_pnl_xappend(MDBX_PNL pl, pgno_t pgno) {
+static __always_inline void mdbx_pnl_xappend(MDBX_PNL pl, pgno_t pgno) {
assert(MDBX_PNL_SIZE(pl) < MDBX_PNL_ALLOCLEN(pl));
if (mdbx_audit_enabled()) {
for (unsigned i = MDBX_PNL_SIZE(pl); i > 0; --i)
@@ -1789,7 +1816,7 @@ static __hot int __must_check_result mdbx_pnl_insert_range(MDBX_PNL *ppl,
return MDBX_SUCCESS;
}
-static bool __hot mdbx_pnl_check(const MDBX_PNL pl, const pgno_t limit) {
+static bool mdbx_pnl_check(const MDBX_PNL pl, const pgno_t limit) {
assert(limit >= MIN_PAGENO && limit <= MAX_PAGENO + 1);
if (likely(MDBX_PNL_SIZE(pl))) {
assert(MDBX_PNL_LEAST(pl) >= MIN_PAGENO);
@@ -1812,8 +1839,8 @@ static bool __hot mdbx_pnl_check(const MDBX_PNL pl, const pgno_t limit) {
return true;
}
-static __inline bool mdbx_pnl_check4assert(const MDBX_PNL pl,
- const pgno_t limit) {
+static __always_inline bool mdbx_pnl_check4assert(const MDBX_PNL pl,
+ const pgno_t limit) {
if (unlikely(pl == nullptr))
return true;
assert(MDBX_PNL_ALLOCLEN(pl) >= MDBX_PNL_SIZE(pl));
@@ -1871,7 +1898,7 @@ static __hot unsigned mdbx_pnl_exist(MDBX_PNL pnl, pgno_t id) {
/*----------------------------------------------------------------------------*/
-static __inline size_t txl2bytes(const size_t size) {
+static __always_inline size_t txl2bytes(const size_t size) {
assert(size > 0 && size <= MDBX_TXL_MAX * 2);
size_t bytes = roundup_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD +
sizeof(txnid_t) * (size + 2),
@@ -1880,7 +1907,7 @@ static __inline size_t txl2bytes(const size_t size) {
return bytes;
}
-static __inline size_t bytes2txl(const size_t bytes) {
+static __always_inline size_t bytes2txl(const size_t bytes) {
size_t size = bytes / sizeof(txnid_t);
assert(size > 2 && size <= MDBX_TXL_MAX * 2);
return size - 2;
@@ -1933,8 +1960,8 @@ static int mdbx_txl_reserve(MDBX_TXL *ptl, const size_t wanna) {
return MDBX_ENOMEM;
}
-static __inline int __must_check_result mdbx_txl_need(MDBX_TXL *ptl,
- size_t num) {
+static __always_inline int __must_check_result mdbx_txl_need(MDBX_TXL *ptl,
+ size_t num) {
assert(MDBX_PNL_SIZE(*ptl) <= MDBX_TXL_MAX &&
MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_SIZE(*ptl));
assert(num <= MDBX_PNL_MAX);
@@ -1944,7 +1971,7 @@ static __inline int __must_check_result mdbx_txl_need(MDBX_TXL *ptl,
: mdbx_txl_reserve(ptl, wanna);
}
-static __inline void mdbx_txl_xappend(MDBX_TXL tl, txnid_t id) {
+static __always_inline void mdbx_txl_xappend(MDBX_TXL tl, txnid_t id) {
assert(MDBX_PNL_SIZE(tl) < MDBX_PNL_ALLOCLEN(tl));
MDBX_PNL_SIZE(tl) += 1;
MDBX_PNL_LAST(tl) = id;
@@ -1970,7 +1997,7 @@ static int __must_check_result mdbx_txl_append(MDBX_TXL *ptl, txnid_t id) {
#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno)
SORT_IMPL(dp_sort, MDBX_DP, DP_SORT_CMP)
-static __inline MDBX_DPL mdbx_dpl_sort(MDBX_DPL dl) {
+static __always_inline MDBX_DPL mdbx_dpl_sort(MDBX_DPL dl) {
assert(dl->length <= MDBX_DPL_TXNFULL);
assert(dl->sorted <= dl->length);
if (dl->sorted != dl->length) {
@@ -2043,7 +2070,7 @@ static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t pgno) {
return (unsigned)(it - dl);
}
-static __inline MDBX_page *mdbx_dpl_find(MDBX_DPL dl, pgno_t pgno) {
+static __always_inline MDBX_page *mdbx_dpl_find(MDBX_DPL dl, pgno_t pgno) {
const unsigned i = mdbx_dpl_search(dl, pgno);
assert((int)i > 0);
return (i <= dl->length && dl[i].pgno == pgno) ? dl[i].ptr : nullptr;
@@ -2065,9 +2092,8 @@ static __hot MDBX_page *mdbx_dpl_remove(MDBX_DPL dl, pgno_t prno) {
return mp;
}
-static __inline int __must_check_result mdbx_dpl_append(MDBX_DPL dl,
- pgno_t pgno,
- MDBX_page *page) {
+static __always_inline int __must_check_result
+mdbx_dpl_append(MDBX_DPL dl, pgno_t pgno, MDBX_page *page) {
assert(dl->length <= MDBX_DPL_TXNFULL);
if (mdbx_audit_enabled()) {
for (unsigned i = dl->length; i > 0; --i) {
@@ -2090,7 +2116,7 @@ static __inline int __must_check_result mdbx_dpl_append(MDBX_DPL dl,
return MDBX_SUCCESS;
}
-static __inline void mdbx_dpl_clear(MDBX_DPL dl) {
+static __always_inline void mdbx_dpl_clear(MDBX_DPL dl) {
dl->sorted = dl->length = 0;
}
@@ -2196,7 +2222,7 @@ static int __must_check_result mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp);
static int __must_check_result mdbx_audit_ex(MDBX_txn *txn,
unsigned retired_stored,
bool dont_filter_gc);
-static __maybe_unused __inline int __must_check_result
+static __maybe_unused __always_inline int __must_check_result
mdbx_audit(MDBX_txn *txn) {
return mdbx_audit_ex(txn, 0, (txn->mt_flags & MDBX_RDONLY) != 0);
}
@@ -2664,7 +2690,7 @@ static void mdbx_dlist_free(MDBX_txn *txn) {
mdbx_dpl_clear(dl);
}
-static __inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) {
+static __always_inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) {
mdbx_cassert(mc, (mc->mc_flags & C_SUB) != 0);
MDBX_xcursor *mx = container_of(mc->mc_db, MDBX_xcursor, mx_db);
MDBX_cursor_couple *couple = container_of(mx, MDBX_cursor_couple, inner);
@@ -2823,7 +2849,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
most -= 1;
}
const unsigned refunded = txn->mt_next_pgno - most;
- mdbx_verbose("refund-sorted %u pages %" PRIaPGNO " -> %" PRIaPGNO,
+ mdbx_verbose("refund-suitable %u pages %" PRIaPGNO " -> %" PRIaPGNO,
refunded, most, txn->mt_next_pgno);
txn->tw.loose_count -= refunded;
txn->tw.dirtyroom += refunded;
@@ -2863,7 +2889,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
while (dl->length && dl[dl->length].pgno == txn->mt_next_pgno - 1 &&
dl[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) {
MDBX_page *dp = dl[dl->length].ptr;
- mdbx_verbose("refund-unsorted page %" PRIaPGNO, dp->mp_pgno);
+ mdbx_verbose("refund-sorted page %" PRIaPGNO, dp->mp_pgno);
mdbx_tassert(txn, dp->mp_pgno == dl[dl->length].pgno);
dl->length -= 1;
}
@@ -2932,10 +2958,10 @@ static __cold void mdbx_kill_page(MDBX_env *env, MDBX_page *mp, pgno_t pgno,
mdbx_assert(env, pgno >= NUM_METAS && npages);
if (IS_DIRTY(mp) || (env->me_flags & MDBX_WRITEMAP)) {
const size_t bytes = pgno2bytes(env, npages);
- memset(mp, 0, bytes);
+ memset(mp, -1, bytes);
mp->mp_pgno = pgno;
if ((env->me_flags & MDBX_WRITEMAP) == 0)
- mdbx_pwrite(env->me_fd, mp, bytes, pgno2bytes(env, pgno));
+ mdbx_pwrite(env->me_lazy_fd, mp, bytes, pgno2bytes(env, pgno));
} else {
struct iovec iov[MDBX_COMMIT_PAGES];
iov[0].iov_len = env->me_psize;
@@ -2945,13 +2971,13 @@ static __cold void mdbx_kill_page(MDBX_env *env, MDBX_page *mp, pgno_t pgno,
while (--npages) {
iov[n] = iov[0];
if (++n == MDBX_COMMIT_PAGES) {
- mdbx_pwritev(env->me_fd, iov, MDBX_COMMIT_PAGES, iov_off,
+ mdbx_pwritev(env->me_lazy_fd, iov, MDBX_COMMIT_PAGES, iov_off,
pgno2bytes(env, MDBX_COMMIT_PAGES));
iov_off += pgno2bytes(env, MDBX_COMMIT_PAGES);
n = 0;
}
}
- mdbx_pwritev(env->me_fd, iov, n, iov_off, pgno2bytes(env, n));
+ mdbx_pwritev(env->me_lazy_fd, iov, n, iov_off, pgno2bytes(env, n));
}
}
@@ -2966,10 +2992,11 @@ static __cold void mdbx_kill_page(MDBX_env *env, MDBX_page *mp, pgno_t pgno,
* If the page wasn't dirtied in this txn, just add it
* to this txn's free list. */
-static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
+static int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
const pgno_t pgno = mp->mp_pgno;
+ mp->mp_txnid = INVALID_TXNID;
if (txn->mt_parent) {
mdbx_tassert(txn, (txn->mt_env->me_flags & MDBX_WRITEMAP) == 0);
mdbx_tassert(txn, mp != pgno2page(txn->mt_env, pgno));
@@ -3046,7 +3073,7 @@ static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
return MDBX_SUCCESS;
}
-static __hot int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
+static int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
const pgno_t pgno = mp->mp_pgno;
MDBX_txn *const txn = mc->mc_txn;
@@ -3095,8 +3122,8 @@ static __hot int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
return rc;
}
-static __must_check_result __inline int mdbx_retire_pgno(MDBX_cursor *mc,
- const pgno_t pgno) {
+static __must_check_result __always_inline int
+mdbx_retire_pgno(MDBX_cursor *mc, const pgno_t pgno) {
MDBX_page *mp;
int rc = mdbx_page_get(mc, pgno, &mp, NULL);
if (likely(rc == MDBX_SUCCESS))
@@ -3296,7 +3323,7 @@ bailout:
/*----------------------------------------------------------------------------*/
-static __inline bool meta_bootid_match(const MDBX_meta *meta) {
+static __always_inline bool meta_bootid_match(const MDBX_meta *meta) {
return meta->mm_bootid.x == bootid.x && meta->mm_bootid.y == bootid.y &&
(bootid.x | bootid.y) != 0;
}
@@ -3458,15 +3485,15 @@ mdbx_meta_mostrecent(const enum meta_choise_mode mode, const MDBX_env *env) {
return head;
}
-static __hot MDBX_meta *mdbx_meta_steady(const MDBX_env *env) {
+static MDBX_meta *mdbx_meta_steady(const MDBX_env *env) {
return mdbx_meta_mostrecent(prefer_steady, env);
}
-static __hot MDBX_meta *mdbx_meta_head(const MDBX_env *env) {
+static MDBX_meta *mdbx_meta_head(const MDBX_env *env) {
return mdbx_meta_mostrecent(prefer_last, env);
}
-static __hot txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
+static txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
while (true) {
const MDBX_meta *head = mdbx_meta_head(env);
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
@@ -3477,7 +3504,7 @@ static __hot txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
}
}
-static __hot txnid_t mdbx_recent_steady_txnid(const MDBX_env *env) {
+static txnid_t mdbx_recent_steady_txnid(const MDBX_env *env) {
while (true) {
const MDBX_meta *head = mdbx_meta_steady(env);
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
@@ -3571,6 +3598,8 @@ static __cold pgno_t mdbx_find_largest(MDBX_env *env, pgno_t largest) {
/* Add a page to the txn's dirty list */
static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
+ mp->mp_txnid = INVALID_TXNID;
+ mp->mp_flags |= P_DIRTY;
const int rc = mdbx_dpl_append(txn->tw.dirtylist, mp->mp_pgno, mp);
if (unlikely(rc != MDBX_SUCCESS)) {
txn->mt_flags |= MDBX_TXN_ERROR;
@@ -3584,7 +3613,7 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
}
#if !(defined(_WIN32) || defined(_WIN64))
-static __inline __maybe_unused int ignore_enosys(int err) {
+static __always_inline __maybe_unused int ignore_enosys(int err) {
#ifdef ENOSYS
if (err == ENOSYS)
return MDBX_RESULT_TRUE;
@@ -3619,7 +3648,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
bytes2pgno(env, offset), bytes2pgno(env, offset + length));
#if defined(F_RDAHEAD)
- if (unlikely(fcntl(env->me_fd, F_RDAHEAD, enable) == -1))
+ if (unlikely(fcntl(env->me_lazy_fd, F_RDAHEAD, enable) == -1))
return errno;
#endif /* F_RDAHEAD */
@@ -3629,7 +3658,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
hint.ra_offset = offset;
hint.ra_count = length;
(void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(
- env->me_fd, F_RDADVISE, &hint);
+ env->me_lazy_fd, F_RDADVISE, &hint);
#endif /* F_RDADVISE */
#if defined(MADV_WILLNEED)
int err = madvise(env->me_map + offset, length, MADV_WILLNEED)
@@ -3651,7 +3680,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
}
#elif defined(POSIX_FADV_WILLNEED)
int err = ignore_enosys(
- posix_fadvise(env->me_fd, offset, length, POSIX_FADV_WILLNEED));
+ posix_fadvise(env->me_lazy_fd, offset, length, POSIX_FADV_WILLNEED));
if (unlikely(MDBX_IS_ERROR(err)))
return err;
#endif /* MADV_WILLNEED */
@@ -3669,7 +3698,7 @@ static int __cold mdbx_set_readahead(MDBX_env *env, const size_t offset,
return err;
#elif defined(POSIX_FADV_RANDOM)
int err = ignore_enosys(
- posix_fadvise(env->me_fd, offset, length, POSIX_FADV_RANDOM));
+ posix_fadvise(env->me_lazy_fd, offset, length, POSIX_FADV_RANDOM));
if (unlikely(MDBX_IS_ERROR(err)))
return err;
#endif /* MADV_RANDOM */
@@ -3767,8 +3796,9 @@ static __cold int mdbx_mapresize(MDBX_env *env, const pgno_t used_pgno,
POSIX_MADV_DONTNEED));
#elif defined(POSIX_FADV_DONTNEED)
if (rc == MDBX_RESULT_TRUE)
- rc = ignore_enosys(posix_fadvise(
- env->me_fd, size_bytes, prev_size - size_bytes, POSIX_FADV_DONTNEED));
+ rc = ignore_enosys(posix_fadvise(env->me_lazy_fd, size_bytes,
+ prev_size - size_bytes,
+ POSIX_FADV_DONTNEED));
#endif /* MADV_DONTNEED */
if (unlikely(MDBX_IS_ERROR(rc)))
goto bailout;
@@ -3866,7 +3896,7 @@ static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
if (env->me_flags & MDBX_WRITEMAP)
meta->mm_datasync_sign = wipe;
else
- return mdbx_pwrite(env->me_fd, &wipe, sizeof(meta->mm_datasync_sign),
+ return mdbx_pwrite(env->me_lazy_fd, &wipe, sizeof(meta->mm_datasync_sign),
(uint8_t *)&meta->mm_datasync_sign - env->me_map);
}
return MDBX_SUCCESS;
@@ -3885,20 +3915,27 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
if (env->me_flags & MDBX_WRITEMAP) {
mdbx_flush_incoherent_cpu_writeback();
- return mdbx_msync(&env->me_dxb_mmap, 0, pgno2bytes(env, NUM_METAS), false);
- }
-
-#if defined(__linux__) || defined(__gnu_linux__)
- if (sync_file_range(env->me_fd, 0, pgno2bytes(env, NUM_METAS),
- SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER))
- err = errno;
+ err = mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
+ false);
+ if (unlikely(err != MDBX_SUCCESS))
+ return err;
+ } else {
+#if (defined(__linux__) || defined(__gnu_linux__)) && !defined(MDBX_SAFE4QEMU)
+ if (sync_file_range(env->me_lazy_fd, 0, pgno2bytes(env, NUM_METAS),
+ SYNC_FILE_RANGE_WRITE | SYNC_FILE_RANGE_WAIT_AFTER))
+ err = errno;
#else
- err = mdbx_filesync(env->me_fd, MDBX_SYNC_DATA);
+ err = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA);
#endif
- if (unlikely(err != MDBX_SUCCESS))
- return err;
- mdbx_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS),
- env->me_os_psize);
+ if (unlikely(err != MDBX_SUCCESS))
+ return err;
+ mdbx_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS),
+ env->me_os_psize);
+ }
+
+ if (likely(env->me_lck))
+ /* force oldest refresh */
+ env->me_lck->mti_readers_refresh_flag = true;
return MDBX_SUCCESS;
}
@@ -3925,8 +3962,8 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
#define MDBX_ALLOC_NEW 4
#define MDBX_ALLOC_ALL (MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
-static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
- MDBX_page **const mp, int flags) {
+__hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
+ MDBX_page **const mp, int flags) {
int rc;
MDBX_txn *txn = mc->mc_txn;
MDBX_env *env = txn->mt_env;
@@ -3963,6 +4000,8 @@ static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
mdbx_ensure(env, np->mp_pgno >= NUM_METAS);
VALGRIND_MAKE_MEM_UNDEFINED(page_data(np), page_space(txn->mt_env));
ASAN_UNPOISON_MEMORY_REGION(page_data(np), page_space(txn->mt_env));
+ np->mp_flags = P_DIRTY;
+ np->mp_txnid = INVALID_TXNID;
*mp = np;
return MDBX_SUCCESS;
}
@@ -4034,7 +4073,7 @@ skip_cache:
goto fail;
if (flags & MDBX_LIFORECLAIM) {
/* Begin from oldest reader if any */
- if (oldest > 2) {
+ if (oldest > MIN_TXNID) {
last = oldest - 1;
op = MDBX_SET_RANGE;
}
@@ -4241,15 +4280,17 @@ skip_cache:
rc = MDBX_RESULT_TRUE;
const pgno_t autosync_threshold = *env->me_autosync_threshold;
const uint64_t autosync_period = *env->me_autosync_period;
- /* wipe the last steady-point if:
+ /* wipe the last steady-point if one of:
* - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
- * otherwise, make a new steady-point if:
+ * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
+ * otherwise, make a new steady-point if one of:
* - auto-sync threshold is specified and reached;
- * - OR upper limit of database size is reached;
- * - OR database is full (with the current file size)
+ * - upper limit of database size is reached;
+ * - database is full (with the current file size)
* AND auto-sync threshold it NOT specified */
if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
- (autosync_threshold | autosync_period) == 0) {
+ ((autosync_threshold | autosync_period) == 0 ||
+ next >= steady->mm_geo.now)) {
/* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
* without any auto-sync treshold(s). */
rc = mdbx_wipe_steady(env, oldest);
@@ -4312,9 +4353,10 @@ skip_cache:
goto done;
}
- mdbx_warning("unable growth datafile to %" PRIaPGNO
- " pages (+%" PRIaPGNO "), errcode %d",
- aligned, aligned - txn->mt_end_pgno, rc);
+ mdbx_error("unable growth datafile to %" PRIaPGNO " pages (+%" PRIaPGNO
+ "), errcode %d",
+ aligned, aligned - txn->mt_end_pgno, rc);
+ rc = (rc == MDBX_RESULT_TRUE) ? MDBX_MAP_FULL : rc;
} else {
mdbx_debug("gc-alloc: next %u > upper %u", next, txn->mt_geo.upper);
}
@@ -4416,9 +4458,8 @@ __hot static void mdbx_page_copy(MDBX_page *dst, MDBX_page *src,
* [in] mp the page being referenced. It must not be dirty.
* [out] ret the writable page, if any.
* ret is unchanged if mp wasn't spilled. */
-__hot static int __must_check_result mdbx_page_unspill(MDBX_txn *txn,
- MDBX_page *mp,
- MDBX_page **ret) {
+static int __must_check_result mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp,
+ MDBX_page **ret) {
MDBX_env *env = txn->mt_env;
pgno_t pgno = mp->mp_pgno, pn = pgno << 1;
@@ -4453,10 +4494,8 @@ __hot static int __must_check_result mdbx_page_unspill(MDBX_txn *txn,
* page remains spilled until child commits */
int rc = mdbx_page_dirty(txn, np);
- if (likely(rc == MDBX_SUCCESS)) {
- np->mp_flags |= P_DIRTY;
+ if (likely(rc == MDBX_SUCCESS))
*ret = np;
- }
return rc;
}
return MDBX_SUCCESS;
@@ -4540,6 +4579,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
mdbx_page_copy(np, mp, txn->mt_env->me_psize);
np->mp_pgno = pgno;
+ np->mp_txnid = INVALID_TXNID;
np->mp_flags |= P_DIRTY;
done:
@@ -4620,7 +4660,7 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
if (outside_txn) {
if (unsynced_pages > /* FIXME: define threshold */ 16 &&
- (flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) == 0) {
+ (flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
const size_t usedbytes = pgno_align2os_bytes(env, head->mm_geo.next);
@@ -4629,7 +4669,7 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
/* LY: pre-sync without holding lock to reduce latency for writer(s) */
int err = (flags & MDBX_WRITEMAP)
? mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, false)
- : mdbx_filesync(env->me_fd, MDBX_SYNC_DATA);
+ : mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA);
if (unlikely(err != MDBX_SUCCESS))
return err;
@@ -4647,7 +4687,7 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, int force, int nonblock) {
}
if (!META_IS_STEADY(head) ||
- ((flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) {
+ ((flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0 && unsynced_pages)) {
mdbx_debug("meta-head %" PRIaPGNO ", %s, sync_pending %" PRIaPGNO,
data_page(head)->mp_pgno, mdbx_durable_str(head),
unsynced_pages);
@@ -4668,10 +4708,11 @@ fastpath:
if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_NOMETASYNC) != 0) {
const txnid_t head_txnid = mdbx_recent_committed_txnid(env);
if (*env->me_meta_sync_txnid != (uint32_t)head_txnid) {
- rc = (flags & MDBX_WRITEMAP)
- ? mdbx_msync(&env->me_dxb_mmap, 0, pgno2bytes(env, NUM_METAS),
- false)
- : mdbx_filesync(env->me_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
+ rc =
+ (flags & MDBX_WRITEMAP)
+ ? mdbx_msync(&env->me_dxb_mmap, 0,
+ pgno_align2os_bytes(env, NUM_METAS), false)
+ : mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
if (likely(rc == MDBX_SUCCESS))
*env->me_meta_sync_txnid = (uint32_t)head_txnid;
}
@@ -4843,7 +4884,7 @@ static void mdbx_txn_valgrind(MDBX_env *env, MDBX_txn *txn) {
mdbx_txn_unlock(env);
}
}
-#endif /* MDBX_USE_VALGRIND */
+#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
/* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */
static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
@@ -4873,7 +4914,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
mdbx_assert(env, (flags & ~(MDBX_TXN_BEGIN_FLAGS | MDBX_TXN_SPILLS |
MDBX_WRITEMAP)) == 0);
if (flags & MDBX_RDONLY) {
- txn->mt_flags = MDBX_RDONLY | (env->me_flags & MDBX_NOTLS);
+ txn->mt_flags =
+ MDBX_RDONLY | (env->me_flags & (MDBX_NOTLS | MDBX_WRITEMAP));
MDBX_reader *r = txn->to.reader;
STATIC_ASSERT(sizeof(size_t) == sizeof(r->mr_tid));
if (likely(env->me_flags & MDBX_ENV_TXKEY)) {
@@ -5004,6 +5046,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */
mdbx_ensure(env, txn->mt_txnid >=
/* paranoia is appropriate here */ *env->me_oldest);
+ txn->mt_numdbs = env->me_numdbs;
} else {
/* Not yet touching txn == env->me_txn0, it may be active */
mdbx_jitter4testing(false);
@@ -5046,7 +5089,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
if (txn->tw.lifo_reclaimed)
MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) = 0;
env->me_txn = txn;
- memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned));
+ txn->mt_numdbs = env->me_numdbs;
+ memcpy(txn->mt_dbiseqs, env->me_dbiseqs, txn->mt_numdbs * sizeof(unsigned));
/* Copy the DB info and flags */
memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDBX_db));
/* Moved to here to avoid a data race in read TXNs */
@@ -5055,7 +5099,6 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
}
/* Setup db info */
- txn->mt_numdbs = env->me_numdbs;
mdbx_compiler_barrier();
for (unsigned i = CORE_DBS; i < txn->mt_numdbs; i++) {
unsigned x = env->me_dbflags[i];
@@ -5092,9 +5135,13 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_flags |= MDBX_SHRINK_ALLOWED;
mdbx_srwlock_AcquireShared(&env->me_remap_guard);
}
-#endif
+#endif /* Windows */
} else {
env->me_dxb_mmap.current = size;
+#if defined(_WIN32) || defined(_WIN64)
+ env->me_dxb_mmap.filesize =
+ (env->me_dxb_mmap.filesize < size) ? size : env->me_dxb_mmap.filesize;
+#endif /* Windows */
}
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
mdbx_txn_valgrind(env, txn);
@@ -5318,9 +5365,10 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
if (txn != env->me_txn0)
mdbx_free(txn);
} else {
- mdbx_assert(env, (txn->mt_flags &
- ~(MDBX_RDONLY | MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED |
- MDBX_NOMETASYNC | MDBX_NOSYNC | MDBX_MAPASYNC)) == 0);
+ mdbx_assert(env,
+ (txn->mt_flags & ~(MDBX_NOTLS | MDBX_RDONLY | MDBX_WRITEMAP |
+ MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
+ MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) == 0);
txn->mt_signature = MDBX_MT_SIGNATURE;
*ret = txn;
mdbx_debug("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO
@@ -5811,43 +5859,44 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored,
return MDBX_PROBLEM;
}
-static __inline unsigned backlog_size(MDBX_txn *txn) {
+static __always_inline unsigned backlog_size(MDBX_txn *txn) {
return MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) + txn->tw.loose_count;
}
-static __inline unsigned gctree_backlog(MDBX_txn *txn) {
- return /* for split upto root page */ txn->mt_dbs[FREE_DBI].md_depth +
- /* for rebalance */ 2 + /* for grow */ 1;
-}
-
/* LY: Prepare a backlog of pages to modify GC itself,
* while reclaiming is prohibited. It should be enough to prevent search
* in mdbx_page_alloc() during a deleting, when GC tree is unbalanced. */
static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
const size_t pnl_bytes) {
- const unsigned linear = number_of_ovpages(
- txn->mt_env,
- pnl_bytes ? pnl_bytes : MDBX_PNL_SIZEOF(txn->tw.retired_pages));
- const unsigned backlog = linear + gctree_backlog(txn);
-
- if (likely(
- linear == 1 &&
- backlog_size(txn) >
- (pnl_bytes
- ? backlog
- : backlog + /* for COW */ txn->mt_dbs[FREE_DBI].md_depth)))
+ const unsigned linear4list = number_of_ovpages(txn->mt_env, pnl_bytes);
+ const unsigned backlog4cow = txn->mt_dbs[FREE_DBI].md_depth;
+ const unsigned backlog4rebalance = backlog4cow + 1;
+
+ if (likely(linear4list == 1 &&
+ backlog_size(txn) > (pnl_bytes
+ ? backlog4rebalance
+ : (backlog4cow + backlog4rebalance))))
return MDBX_SUCCESS;
- gc_cursor->mc_flags &= ~C_RECLAIMING;
+ mdbx_trace(">> pnl_bytes %zu, backlog %u, 4list %u, 4cow %u, 4rebalance %u",
+ pnl_bytes, backlog_size(txn), linear4list, backlog4cow,
+ backlog4rebalance);
+ gc_cursor->mc_flags &= ~C_RECLAIMING;
int err = mdbx_cursor_touch(gc_cursor);
- if (err == MDBX_SUCCESS && linear > 1)
- err = mdbx_page_alloc(gc_cursor, linear, nullptr, MDBX_ALLOC_ALL);
+ mdbx_trace("== after-touch, backlog %u, err %d", backlog_size(txn), err);
+
+ if (linear4list > 1 && err == MDBX_SUCCESS) {
+ err = mdbx_page_alloc(gc_cursor, linear4list, nullptr,
+ MDBX_ALLOC_GC | MDBX_ALLOC_CACHE);
+ mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err);
+ }
- while (err == MDBX_SUCCESS && backlog_size(txn) < backlog)
+ while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS)
err = mdbx_page_alloc(gc_cursor, 1, NULL, MDBX_ALLOC_GC);
gc_cursor->mc_flags |= C_RECLAIMING;
+ mdbx_trace("<< backlog %u, err %d", backlog_size(txn), err);
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
}
@@ -5897,6 +5946,10 @@ retry:
goto bailout;
}
+ rc = mdbx_prep_backlog(txn, &mc, MDBX_PNL_SIZEOF(txn->tw.retired_pages));
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto bailout;
+
unsigned settled = 0, cleaned_gc_slot = 0, reused_gc_slot = 0,
filled_gc_slot = ~0u;
txnid_t cleaned_gc_id = 0, gc_rid = txn->tw.last_reclaimed;
@@ -6104,8 +6157,11 @@ retry:
mdbx_debug_extra_print(" %" PRIaPGNO, txn->tw.retired_pages[i]);
mdbx_debug_extra_print("%s", "\n");
}
- if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)))
+ if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) {
+ mdbx_trace("%s.reclaimed-list changed %u -> %u, retry", dbg_prefix_mode,
+ amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist));
goto retry /* rare case, but avoids GC fragmentation and one loop. */;
+ }
continue;
}
@@ -6521,7 +6577,7 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
unsigned iov_items, size_t iov_off,
size_t iov_bytes) {
MDBX_env *const env = txn->mt_env;
- int rc = mdbx_pwritev(env->me_fd, iov, iov_items, iov_off, iov_bytes);
+ int rc = mdbx_pwritev(env->me_lazy_fd, iov, iov_items, iov_off, iov_bytes);
if (unlikely(rc != MDBX_SUCCESS)) {
mdbx_error("Write error: %s", mdbx_strerror(rc));
txn->mt_flags |= MDBX_TXN_ERROR;
@@ -6538,7 +6594,7 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
* [in] txn the transaction that's being committed
* [in] keep number of initial pages in dirtylist to keep dirty.
* Returns 0 on success, non-zero on failure. */
-static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
+__hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
struct iovec iov[MDBX_COMMIT_PAGES];
const MDBX_DPL dl = (keep || txn->tw.loose_count > 1)
? mdbx_dpl_sort(txn->tw.dirtylist)
@@ -6557,11 +6613,15 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
mdbx_tassert(txn, dp->mp_flags & P_DIRTY);
/* Don't flush this page yet */
- if (dp->mp_flags & (P_LOOSE | P_KEEP)) {
+ if (dp->mp_flags & P_KEEP) {
dp->mp_flags &= ~P_KEEP;
dl[++w] = dl[r];
continue;
}
+ if (dp->mp_flags & P_LOOSE) {
+ dl[++w] = dl[r];
+ continue;
+ }
const unsigned npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1;
flush_begin = (flush_begin < dp->mp_pgno) ? flush_begin : dp->mp_pgno;
@@ -6569,7 +6629,7 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
(flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages;
*env->me_unsynced_pages += npages;
dp->mp_flags &= ~P_DIRTY;
- dp->mp_validator = 0 /* TODO */;
+ dp->mp_txnid = txn->mt_txnid;
if ((env->me_flags & MDBX_WRITEMAP) == 0) {
const size_t size = pgno2bytes(env, npages);
@@ -6658,8 +6718,8 @@ static __cold bool mdbx_txn_import_dbi(MDBX_txn *txn, MDBX_dbi dbi) {
}
/* Check txn and dbi arguments to a function */
-static __inline bool TXN_DBI_EXIST(MDBX_txn *txn, MDBX_dbi dbi,
- unsigned validity) {
+static __always_inline bool mdbx_txn_dbi_exists(MDBX_txn *txn, MDBX_dbi dbi,
+ unsigned validity) {
if (likely(dbi < txn->mt_numdbs && (txn->mt_dbflags[dbi] & validity)))
return true;
@@ -6957,6 +7017,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
(txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) {
for (int i = txn->mt_numdbs; --i >= 0;)
mdbx_tassert(txn, (txn->mt_dbflags[i] & DB_DIRTY) == 0);
+ rc = MDBX_SUCCESS;
goto done;
}
@@ -7029,8 +7090,6 @@ int mdbx_txn_commit(MDBX_txn *txn) {
goto fail;
}
- if (likely(env->me_lck))
- env->me_lck->mti_readers_refresh_flag = false;
end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE;
done:
@@ -7128,7 +7187,7 @@ static int __cold mdbx_validate_meta(MDBX_env *env, MDBX_meta *const meta,
const uint64_t used_bytes = meta->mm_geo.next * (uint64_t)meta->mm_psize;
if (used_bytes > *filesize) {
/* Here could be a race with DB-shrinking performed by other process */
- int err = mdbx_filesize(env->me_fd, filesize);
+ int err = mdbx_filesize(env->me_lazy_fd, filesize);
if (unlikely(err != MDBX_SUCCESS))
return err;
if (used_bytes > *filesize) {
@@ -7230,7 +7289,7 @@ static int __cold mdbx_validate_meta(MDBX_env *env, MDBX_meta *const meta,
static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
uint64_t *filesize,
const int lck_exclusive) {
- int rc = mdbx_filesize(env->me_fd, filesize);
+ int rc = mdbx_filesize(env->me_lazy_fd, filesize);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@@ -7256,7 +7315,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
while (1) {
mdbx_trace("reading meta[%d]: offset %u, bytes %u, retry-left %u",
meta_number, offset, MIN_PAGESIZE, retryleft);
- int err = mdbx_pread(env->me_fd, buffer, MIN_PAGESIZE, offset);
+ int err = mdbx_pread(env->me_lazy_fd, buffer, MIN_PAGESIZE, offset);
if (err != MDBX_SUCCESS) {
if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 &&
*filesize == 0 && (env->me_flags & MDBX_RDONLY) == 0)
@@ -7269,7 +7328,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
}
char again[MIN_PAGESIZE];
- err = mdbx_pread(env->me_fd, again, MIN_PAGESIZE, offset);
+ err = mdbx_pread(env->me_lazy_fd, again, MIN_PAGESIZE, offset);
if (err != MDBX_SUCCESS) {
mdbx_error("read meta[%u,%u]: %i, %s", offset, MIN_PAGESIZE, err,
mdbx_strerror(err));
@@ -7385,7 +7444,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_assert(env, (env->me_flags & (MDBX_RDONLY | MDBX_FATAL_ERROR)) == 0);
mdbx_assert(env, pending->mm_geo.next <= pending->mm_geo.now);
- if (flags & (MDBX_NOSYNC | MDBX_MAPASYNC)) {
+ if (flags & (MDBX_SAFE_NOSYNC | MDBX_MAPASYNC)) {
/* Check auto-sync conditions */
const pgno_t autosync_threshold = *env->me_autosync_threshold;
const uint64_t autosync_period = *env->me_autosync_period;
@@ -7411,23 +7470,31 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
ASAN_POISON_MEMORY_REGION(env->me_map + pgno2bytes(env, largest_pgno),
pgno2bytes(env, edge - largest_pgno));
}
-#endif /* MDBX_USE_VALGRIND */
+#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
#if defined(MADV_DONTNEED)
- const size_t largest_aligned2os_bytes =
- pgno_align2os_bytes(env, largest_pgno);
- const pgno_t largest_aligned2os_pgno =
- bytes2pgno(env, largest_aligned2os_bytes);
+ const size_t largest_bytes = pgno2bytes(env, largest_pgno);
+ const size_t madvise_gap = (largest_bytes < 65536 * 256)
+ ? 65536
+ : (largest_bytes > MEGABYTE * 4 * 256)
+ ? MEGABYTE * 4
+ : largest_bytes >> 8;
+ const size_t discard_edge_bytes = bytes_align2os_bytes(
+ env,
+ (MDBX_RDONLY & (env->me_lck ? env->me_lck->mti_envmode : env->me_flags))
+ ? largest_bytes
+ : largest_bytes + madvise_gap);
+ const pgno_t discard_edge_pgno = bytes2pgno(env, discard_edge_bytes);
const pgno_t prev_discarded_pgno = *env->me_discarded_tail;
if (prev_discarded_pgno >
- largest_aligned2os_pgno +
- /* 1M threshold to avoid unreasonable madvise() call */
- bytes2pgno(env, MEGABYTE)) {
+ discard_edge_pgno +
+ /* threshold to avoid unreasonable frequent madvise() calls */
+ bytes2pgno(env, madvise_gap)) {
mdbx_notice("open-MADV_%s %u..%u", "DONTNEED", *env->me_discarded_tail,
largest_pgno);
- *env->me_discarded_tail = largest_aligned2os_pgno;
+ *env->me_discarded_tail = discard_edge_pgno;
const size_t prev_discarded_bytes =
pgno2bytes(env, prev_discarded_pgno) & ~(env->me_os_psize - 1);
- mdbx_ensure(env, prev_discarded_bytes > largest_aligned2os_bytes);
+ mdbx_ensure(env, prev_discarded_bytes > discard_edge_bytes);
int advise = MADV_DONTNEED;
#if defined(MADV_FREE) && \
0 /* MADV_FREE works for only anonymous vma at the moment */
@@ -7435,8 +7502,8 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_linux_kernel_version > 0x04050000)
advise = MADV_FREE;
#endif /* MADV_FREE */
- int err = madvise(env->me_map + largest_aligned2os_bytes,
- prev_discarded_bytes - largest_aligned2os_bytes, advise)
+ int err = madvise(env->me_map + discard_edge_bytes,
+ prev_discarded_bytes - discard_edge_bytes, advise)
? ignore_enosys(errno)
: MDBX_SUCCESS;
if (unlikely(MDBX_IS_ERROR(err)))
@@ -7477,25 +7544,29 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
/* LY: step#1 - sync previously written/updated data-pages */
int rc = *env->me_unsynced_pages ? MDBX_RESULT_TRUE /* carry non-steady */
: MDBX_RESULT_FALSE /* carry steady */;
- if (rc != MDBX_RESULT_FALSE && (flags & MDBX_NOSYNC) == 0) {
+ if (rc != MDBX_RESULT_FALSE && (flags & MDBX_SAFE_NOSYNC) == 0) {
mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
MDBX_meta *const recent_steady_meta = mdbx_meta_steady(env);
if (flags & MDBX_WRITEMAP) {
- const size_t usedbytes = pgno_align2os_bytes(env, pending->mm_geo.next);
- rc = mdbx_msync(&env->me_dxb_mmap, 0, usedbytes, flags & MDBX_MAPASYNC);
- if (unlikely(rc != MDBX_SUCCESS))
- goto fail;
+ const size_t begin = pgno2bytes(env, NUM_METAS) & ~(env->me_os_psize - 1);
+ const size_t end = pgno_align2os_bytes(env, pending->mm_geo.next);
+ if (end > begin) {
+ rc = mdbx_msync(&env->me_dxb_mmap, begin, end - begin,
+ flags & MDBX_MAPASYNC);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto fail;
+ }
rc = MDBX_RESULT_TRUE /* carry non-steady */;
if ((flags & MDBX_MAPASYNC) == 0) {
if (unlikely(pending->mm_geo.next > recent_steady_meta->mm_geo.now)) {
- rc = mdbx_filesync(env->me_fd, MDBX_SYNC_SIZE);
+ rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_SIZE);
if (unlikely(rc != MDBX_SUCCESS))
goto fail;
}
rc = MDBX_RESULT_FALSE /* carry steady */;
}
} else {
- rc = mdbx_filesync(env->me_fd,
+ rc = mdbx_filesync(env->me_lazy_fd,
(pending->mm_geo.next > recent_steady_meta->mm_geo.now)
? MDBX_SYNC_DATA | MDBX_SYNC_SIZE
: MDBX_SYNC_DATA);
@@ -7571,7 +7642,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0);
mdbx_ensure(env, target == head || mdbx_meta_txnid_stable(env, target) <
pending->mm_txnid_a.inconsistent);
- if (env->me_flags & MDBX_WRITEMAP) {
+ if (flags & MDBX_WRITEMAP) {
mdbx_jitter4testing(true);
if (likely(target != head)) {
/* LY: 'invalidate' the meta. */
@@ -7614,40 +7685,50 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
target->mm_datasync_sign = pending->mm_datasync_sign;
mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true);
+ if ((flags & MDBX_SAFE_NOSYNC) == 0) {
+ /* sync meta-pages */
+ const bool weak = (flags & (MDBX_MAPASYNC | MDBX_NOMETASYNC)) != 0;
+ rc = mdbx_msync(&env->me_dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
+ weak);
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto fail;
+ if (!weak) {
+#if defined(__APPLE__) && \
+ MDBX_OSX_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY
+ rc = likely(fcntl(env->me_lazy_fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS
+ : errno;
+ if (unlikely(rc != MDBX_SUCCESS))
+ goto fail;
+#endif /* MacOS */
+ *env->me_meta_sync_txnid = pending->mm_txnid_a.low;
+ }
+ }
} else {
- rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta),
+ const MDBX_meta undo_meta = *target;
+ const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE)
+ ? env->me_dsync_fd
+ : env->me_lazy_fd;
+ rc = mdbx_pwrite(fd, pending, sizeof(MDBX_meta),
(uint8_t *)target - env->me_map);
if (unlikely(rc != MDBX_SUCCESS)) {
undo:
mdbx_debug("%s", "write failed, disk error?");
/* On a failure, the pagecache still contains the new data.
* Try write some old data back, to prevent it from being used. */
- mdbx_pwrite(env->me_fd, (void *)target, sizeof(MDBX_meta),
+ mdbx_pwrite(fd, &undo_meta, sizeof(MDBX_meta),
(uint8_t *)target - env->me_map);
goto fail;
}
mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
- }
-
- /* LY: step#3 - sync meta-pages. */
- mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0);
- if ((flags & (MDBX_NOSYNC | MDBX_NOMETASYNC)) == 0) {
- mdbx_assert(env, ((flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
- if (flags & MDBX_WRITEMAP) {
- const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb;
- const size_t paged_offset = offset & ~(env->me_os_psize - 1);
- const size_t paged_length = roundup_powerof2(
- env->me_psize + offset - paged_offset, env->me_os_psize);
- rc = mdbx_msync(&env->me_dxb_mmap, paged_offset, paged_length,
- flags & MDBX_MAPASYNC);
- if (unlikely(rc != MDBX_SUCCESS))
- goto fail;
- } else {
- rc = mdbx_filesync(env->me_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
- if (rc != MDBX_SUCCESS)
- goto undo;
+ if ((flags & (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC)) == 0) {
+ /* sync meta-pages */
+ if (fd == env->me_lazy_fd) {
+ rc = mdbx_filesync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
+ if (rc != MDBX_SUCCESS)
+ goto undo;
+ }
+ *env->me_meta_sync_txnid = pending->mm_txnid_a.low;
}
- *env->me_meta_sync_txnid = (uint32_t)pending->mm_txnid_a.inconsistent;
}
/* LY: shrink datafile if needed */
@@ -7660,6 +7741,10 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
goto fail;
}
+ if (likely(env->me_lck))
+ /* toggle oldest refresh */
+ env->me_lck->mti_readers_refresh_flag = false;
+
return MDBX_SUCCESS;
fail:
@@ -7691,12 +7776,10 @@ static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
mdbx_ensure(env, branch_nodemax > 42 && branch_nodemax < (int)UINT16_MAX &&
branch_nodemax % 2 == 0);
env->me_branch_nodemax = (unsigned)branch_nodemax;
- env->me_maxkey_nd = (uint16_t)mdbx_limits_keysize_max(env->me_psize, 0);
- env->me_maxkey_ds =
- (uint16_t)mdbx_limits_keysize_max(env->me_psize, MDBX_DUPSORT);
- env->me_maxval_nd = (unsigned)mdbx_limits_valsize_max(env->me_psize, 0);
- env->me_maxval_ds =
- (unsigned)mdbx_limits_valsize_max(env->me_psize, MDBX_DUPSORT);
+ env->me_maxkey_nd = (uint16_t)mdbx_limits_keysize_max(pagesize, 0);
+ env->me_maxkey_ds = (uint16_t)mdbx_limits_keysize_max(pagesize, MDBX_DUPSORT);
+ env->me_maxval_nd = (unsigned)mdbx_limits_valsize_max(pagesize, 0);
+ env->me_maxval_ds = (unsigned)mdbx_limits_valsize_max(pagesize, MDBX_DUPSORT);
mdbx_ensure(env, env->me_maxkey_nd ==
env->me_branch_nodemax - NODESIZE - sizeof(pgno_t));
mdbx_ensure(env, env->me_maxkey_ds ==
@@ -7714,7 +7797,8 @@ int __cold mdbx_env_create(MDBX_env **penv) {
env->me_maxreaders = DEFAULT_READERS;
env->me_maxdbs = env->me_numdbs = CORE_DBS;
- env->me_fd = INVALID_HANDLE_VALUE;
+ env->me_lazy_fd = INVALID_HANDLE_VALUE;
+ env->me_dsync_fd = INVALID_HANDLE_VALUE;
env->me_lfd = INVALID_HANDLE_VALUE;
env->me_pid = mdbx_getpid();
@@ -7906,22 +7990,24 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
goto bailout;
}
- size_lower = roundup_powerof2(size_lower, env->me_os_psize);
- size_upper = roundup_powerof2(size_upper, env->me_os_psize);
- size_now = roundup_powerof2(size_now, env->me_os_psize);
+ const size_t unit =
+ (env->me_os_psize > (size_t)pagesize) ? env->me_os_psize : pagesize;
+ size_lower = roundup_powerof2(size_lower, unit);
+ size_upper = roundup_powerof2(size_upper, unit);
+ size_now = roundup_powerof2(size_now, unit);
/* LY: подбираем значение size_upper:
- * - кратное размеру системной страницы
+ * - кратное размеру страницы
* - без нарушения MAX_MAPSIZE и MAX_PAGENO */
while (unlikely((size_t)size_upper > MAX_MAPSIZE ||
(uint64_t)size_upper / pagesize > MAX_PAGENO)) {
- if ((size_t)size_upper < env->me_os_psize + MIN_MAPSIZE ||
- (size_t)size_upper < env->me_os_psize * (MIN_PAGENO + 1)) {
+ if ((size_t)size_upper < unit + MIN_MAPSIZE ||
+ (size_t)size_upper < (size_t)pagesize * (MIN_PAGENO + 1)) {
/* паранойа на случай переполнения при невероятных значениях */
rc = MDBX_EINVAL;
goto bailout;
}
- size_upper -= env->me_os_psize;
+ size_upper -= unit;
if ((size_t)size_upper < (size_t)size_lower)
size_lower = size_upper;
}
@@ -7943,13 +8029,13 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
}
if (growth_step == 0 && shrink_threshold > 0)
growth_step = 1;
- growth_step = roundup_powerof2(growth_step, env->me_os_psize);
+ growth_step = roundup_powerof2(growth_step, unit);
if (bytes2pgno(env, growth_step) > UINT16_MAX)
growth_step = pgno2bytes(env, UINT16_MAX);
if (shrink_threshold < 0)
shrink_threshold = growth_step + growth_step;
- shrink_threshold = roundup_powerof2(shrink_threshold, env->me_os_psize);
+ shrink_threshold = roundup_powerof2(shrink_threshold, unit);
if (bytes2pgno(env, shrink_threshold) > UINT16_MAX)
shrink_threshold = pgno2bytes(env, UINT16_MAX);
@@ -8162,12 +8248,12 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
return MDBX_ENOMEM;
meta = *mdbx_init_metas(env, buffer);
- err = mdbx_pwrite(env->me_fd, buffer, env->me_psize * NUM_METAS, 0);
+ err = mdbx_pwrite(env->me_lazy_fd, buffer, env->me_psize * NUM_METAS, 0);
mdbx_free(buffer);
if (unlikely(err != MDBX_SUCCESS))
return err;
- err = mdbx_ftruncate(env->me_fd, filesize_before = env->me_dbgeo.now);
+ err = mdbx_ftruncate(env->me_lazy_fd, filesize_before = env->me_dbgeo.now);
if (unlikely(err != MDBX_SUCCESS))
return err;
@@ -8309,27 +8395,28 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
if (unlikely(err != MDBX_SUCCESS))
return err;
-#if defined(MADV_DODUMP) && defined(MADV_DONTDUMP)
- const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS);
- err = madvise(env->me_map, meta_length_aligned2os, MADV_DODUMP)
+#if defined(MADV_DONTDUMP)
+ err = madvise(env->me_map, env->me_dxb_mmap.limit, MADV_DONTDUMP)
? ignore_enosys(errno)
: MDBX_SUCCESS;
if (unlikely(MDBX_IS_ERROR(err)))
return err;
- err = madvise(env->me_map + meta_length_aligned2os,
- env->me_dxb_mmap.current - meta_length_aligned2os,
- (mdbx_runtime_flags & MDBX_DBG_DUMP) ? MADV_DODUMP
- : MADV_DONTDUMP)
- ? ignore_enosys(errno)
- : MDBX_SUCCESS;
- if (unlikely(MDBX_IS_ERROR(err)))
- return err;
-#endif
+#endif /* MADV_DONTDUMP */
+#if defined(MADV_DODUMP)
+ if (mdbx_runtime_flags & MDBX_DBG_DUMP) {
+ const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS);
+ err = madvise(env->me_map, meta_length_aligned2os, MADV_DODUMP)
+ ? ignore_enosys(errno)
+ : MDBX_SUCCESS;
+ if (unlikely(MDBX_IS_ERROR(err)))
+ return err;
+ }
+#endif /* MADV_DODUMP */
#ifdef MDBX_USE_VALGRIND
env->me_valgrind_handle =
VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx");
-#endif
+#endif /* MDBX_USE_VALGRIND */
mdbx_assert(env, used_bytes >= pgno2bytes(env, NUM_METAS) &&
used_bytes <= env->me_dxb_mmap.limit);
@@ -8339,7 +8426,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
ASAN_POISON_MEMORY_REGION(env->me_map + used_bytes,
env->me_dxb_mmap.limit - used_bytes);
env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
-#endif /* MDBX_USE_VALGRIND */
+#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
/* NOTE: AddressSanitizer (at least GCC 7.x, 8.x) could generate
* false-positive alarm here. I have no other explanation for this
@@ -8425,7 +8512,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
MDBX_meta rollback = *head;
mdbx_meta_set_txnid(env, &rollback, undo_txnid);
rollback.mm_datasync_sign = MDBX_DATASIGN_WEAK;
- err = mdbx_pwrite(env->me_fd, &rollback, sizeof(MDBX_meta),
+ err = mdbx_pwrite(env->me_lazy_fd, &rollback, sizeof(MDBX_meta),
(uint8_t *)head - (uint8_t *)env->me_map);
}
if (err) {
@@ -8536,7 +8623,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
return err;
#elif defined(POSIX_FADV_DONTNEED)
err = ignore_enosys(posix_fadvise(
- env->me_fd, used_aligned2os_bytes,
+ env->me_lazy_fd, used_aligned2os_bytes,
env->me_dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED));
if (unlikely(MDBX_IS_ERROR(err)))
return err;
@@ -8558,11 +8645,10 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
/* Open and/or initialize the lock region for the environment. */
static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
mode_t mode) {
- mdbx_assert(env, env->me_fd != INVALID_HANDLE_VALUE);
+ mdbx_assert(env, env->me_lazy_fd != INVALID_HANDLE_VALUE);
mdbx_assert(env, env->me_lfd == INVALID_HANDLE_VALUE);
- int err = mdbx_openfile(lck_pathname, O_RDWR | O_CREAT, mode, &env->me_lfd,
- (env->me_flags & MDBX_EXCLUSIVE) ? true : false);
+ int err = mdbx_openfile(MDBX_OPEN_LCK, env, lck_pathname, &env->me_lfd, mode);
if (err != MDBX_SUCCESS) {
if (!(err == MDBX_ENOFILE && (env->me_flags & MDBX_EXCLUSIVE)) &&
!((err == MDBX_EROFS || err == MDBX_EACCESS || err == MDBX_EPERM) &&
@@ -8570,7 +8656,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
return err;
/* ensure the file system is read-only */
- err = mdbx_check_fs_rdonly(env->me_fd, lck_pathname, err);
+ err = mdbx_check_fs_rdonly(env->me_lazy_fd, lck_pathname, err);
if (err != MDBX_SUCCESS)
return err;
@@ -8862,7 +8948,7 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) {
* at runtime. Changing other flags requires closing the
* environment and re-opening it with the new flags. */
#define CHANGEABLE \
- (MDBX_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC | MDBX_NOMEMINIT | \
+ (MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_MAPASYNC | MDBX_NOMEMINIT | \
MDBX_COALESCE | MDBX_PAGEPERTURB | MDBX_ACCEDE)
#define CHANGELESS \
(MDBX_NOSUBDIR | MDBX_RDONLY | MDBX_WRITEMAP | MDBX_NOTLS | MDBX_NORDAHEAD | \
@@ -8872,9 +8958,9 @@ __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) {
#error "Persistent DB flags & env flags overlap, but both go in mm_flags"
#endif
-int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
+int __cold mdbx_env_open(MDBX_env *env, const char *pathname, unsigned flags,
mode_t mode) {
- if (unlikely(!env || !path))
+ if (unlikely(!env || !pathname))
return MDBX_EINVAL;
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
@@ -8883,11 +8969,11 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
if (flags & ~(CHANGEABLE | CHANGELESS))
return MDBX_EINVAL;
- if (env->me_fd != INVALID_HANDLE_VALUE ||
+ if (env->me_lazy_fd != INVALID_HANDLE_VALUE ||
(env->me_flags & MDBX_ENV_ACTIVE) != 0)
return MDBX_EPERM;
- size_t len_full, len = strlen(path);
+ size_t len_full, len = strlen(pathname);
if (flags & MDBX_NOSUBDIR) {
len_full = len + sizeof(MDBX_LOCK_SUFFIX) + len + 1;
} else {
@@ -8900,12 +8986,12 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
char *dxb_pathname;
if (flags & MDBX_NOSUBDIR) {
dxb_pathname = lck_pathname + len + sizeof(MDBX_LOCK_SUFFIX);
- sprintf(lck_pathname, "%s" MDBX_LOCK_SUFFIX, path);
- strcpy(dxb_pathname, path);
+ sprintf(lck_pathname, "%s" MDBX_LOCK_SUFFIX, pathname);
+ strcpy(dxb_pathname, pathname);
} else {
dxb_pathname = lck_pathname + len + sizeof(MDBX_LOCKNAME);
- sprintf(lck_pathname, "%s" MDBX_LOCKNAME, path);
- sprintf(dxb_pathname, "%s" MDBX_DATANAME, path);
+ sprintf(lck_pathname, "%s" MDBX_LOCKNAME, pathname);
+ sprintf(dxb_pathname, "%s" MDBX_DATANAME, pathname);
}
int rc = MDBX_SUCCESS;
@@ -8913,24 +8999,25 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
if (flags & MDBX_RDONLY) {
/* LY: silently ignore irrelevant flags when
* we're only getting read access */
- flags &= ~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOSYNC | MDBX_NOMETASYNC |
- MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE);
+ flags &=
+ ~(MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC |
+ MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_NOMEMINIT | MDBX_ACCEDE);
} else {
-#ifdef __OpenBSD__
- /* Temporary `workaround` for OpenBSD kernel's bug.
+#if MDBX_MMAP_INCOHERENT_FILE_WRITE
+ /* Temporary `workaround` for OpenBSD kernel's flaw.
* See https://github.com/leo-yuriev/libmdbx/issues/67 */
if ((flags & MDBX_WRITEMAP) == 0) {
if (flags & MDBX_ACCEDE)
flags |= MDBX_WRITEMAP;
else {
mdbx_debug_log(MDBX_LOG_ERROR, __func__, __LINE__,
- "OpenBSD requires MDBX_WRITEMAP because of an internal "
- "bug(s) in a file/buffer/page cache.\n");
+ "System (i.e. OpenBSD) requires MDBX_WRITEMAP because "
+ "of an internal flaw(s) in a file/buffer/page cache.\n");
rc = 42 /* ENOPROTOOPT */;
goto bailout;
}
}
-#endif /* __OpenBSD__ */
+#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
env->me_dirtylist = mdbx_calloc(MDBX_DPL_TXNFULL + 1, sizeof(MDBX_DP));
if (!env->me_dirtylist)
rc = MDBX_ENOMEM;
@@ -8941,7 +9028,7 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
if (rc)
goto bailout;
- env->me_path = mdbx_strdup(path);
+ env->me_path = mdbx_strdup(pathname);
env->me_dbxs = mdbx_calloc(env->me_maxdbs, sizeof(MDBX_dbx));
env->me_dbflags = mdbx_calloc(env->me_maxdbs, sizeof(env->me_dbflags[0]));
env->me_dbiseqs = mdbx_calloc(env->me_maxdbs, sizeof(env->me_dbiseqs[0]));
@@ -8952,40 +9039,50 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
env->me_dbxs[FREE_DBI].md_cmp =
mdbx_cmp_int_align4; /* aligned MDBX_INTEGERKEY */
- int oflags;
- if (F_ISSET(flags, MDBX_RDONLY))
- oflags = O_RDONLY;
- else if (mode != 0) {
- if ((flags & MDBX_NOSUBDIR) == 0) {
+ if ((flags & (MDBX_RDONLY | MDBX_NOSUBDIR)) == 0 && mode != 0) {
#if defined(_WIN32) || defined(_WIN64)
- if (!CreateDirectoryA(path, nullptr)) {
- rc = GetLastError();
- if (rc != ERROR_ALREADY_EXISTS)
- goto bailout;
- }
+ const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
+ if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX)
+ return ERROR_INVALID_NAME;
+ wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t));
+ if (wlen != mbstowcs(pathnameW, pathname, wlen + 1)) {
+ rc = ERROR_INVALID_NAME;
+ goto bailout;
+ }
+ if (!CreateDirectoryW(pathnameW, nullptr)) {
+ rc = GetLastError();
+ if (rc != ERROR_ALREADY_EXISTS)
+ goto bailout;
+ }
#else
- const mode_t dir_mode =
- (/* inherit read/write permissions for group and others */ mode &
- (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) |
- /* always add read/write/search for owner */ S_IRWXU |
- ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) |
- ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0);
- if (mkdir(path, dir_mode)) {
- rc = errno;
- if (rc != EEXIST)
- goto bailout;
- }
-#endif
+ const mode_t dir_mode =
+ (/* inherit read/write permissions for group and others */ mode &
+ (S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) |
+ /* always add read/write/search for owner */ S_IRWXU |
+ ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) |
+ ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0);
+ if (mkdir(pathname, dir_mode)) {
+ rc = errno;
+ if (rc != EEXIST)
+ goto bailout;
}
- oflags = O_RDWR | O_CREAT;
- } else
- oflags = O_RDWR;
+#endif
+ }
- rc = mdbx_openfile(dxb_pathname, oflags, mode, &env->me_fd,
- (env->me_flags & MDBX_EXCLUSIVE) ? true : false);
+ rc = mdbx_openfile(F_ISSET(flags, MDBX_RDONLY) ? MDBX_OPEN_DXB_READ
+ : MDBX_OPEN_DXB_LAZY,
+ env, dxb_pathname, &env->me_lazy_fd, mode);
if (rc != MDBX_SUCCESS)
goto bailout;
+ mdbx_assert(env, env->me_dsync_fd == INVALID_HANDLE_VALUE);
+ if ((flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC)) == 0) {
+ rc = mdbx_openfile(MDBX_OPEN_DXB_DSYNC, env, dxb_pathname,
+ &env->me_dsync_fd, 0);
+ mdbx_ensure(env, (rc != MDBX_SUCCESS) ==
+ (env->me_dsync_fd == INVALID_HANDLE_VALUE));
+ }
+
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
env->me_sysv_ipc.key = ftok(dxb_pathname, 42);
if (env->me_sysv_ipc.key == -1) {
@@ -8997,13 +9094,13 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
#if !(defined(_WIN32) || defined(_WIN64))
if (mode == 0) {
struct stat st;
- if (fstat(env->me_fd, &st)) {
+ if (fstat(env->me_lazy_fd, &st)) {
rc = errno;
goto bailout;
}
mode = st.st_mode;
}
-#endif
+#endif /* !Windows */
const int lck_rc = mdbx_setup_lck(env, lck_pathname, mode);
if (MDBX_IS_ERROR(lck_rc)) {
@@ -9011,7 +9108,8 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
goto bailout;
}
- const unsigned rigorous_flags = MDBX_WRITEMAP | MDBX_NOSYNC | MDBX_MAPASYNC;
+ const unsigned rigorous_flags =
+ MDBX_WRITEMAP | MDBX_SAFE_NOSYNC | MDBX_MAPASYNC;
const unsigned mode_flags = rigorous_flags | MDBX_NOMETASYNC |
MDBX_LIFORECLAIM | MDBX_COALESCE | MDBX_NORDAHEAD;
@@ -9072,27 +9170,32 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
}
if ((flags & MDBX_RDONLY) == 0) {
- rc = MDBX_ENOMEM;
- MDBX_txn *txn;
- int tsize = sizeof(MDBX_txn),
- size =
- tsize + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
- sizeof(unsigned) + 1);
- if ((env->me_pbuf = mdbx_calloc(
- 1 /* page buffer */ + 1 /* page killer bufer */, env->me_psize)) &&
- (txn = mdbx_calloc(1, size))) {
- txn->mt_dbs = (MDBX_db *)((char *)txn + tsize);
- txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
- txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs);
- txn->mt_dbflags = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs);
- txn->mt_env = env;
- txn->mt_dbxs = env->me_dbxs;
- txn->mt_flags = MDBX_TXN_FINISHED;
- env->me_txn0 = txn;
- txn->tw.retired_pages = mdbx_pnl_alloc(MDBX_PNL_INITIAL);
- txn->tw.reclaimed_pglist = mdbx_pnl_alloc(MDBX_PNL_INITIAL);
- if (txn->tw.retired_pages && txn->tw.reclaimed_pglist)
- rc = MDBX_SUCCESS;
+ const size_t tsize = sizeof(MDBX_txn),
+ size = tsize + env->me_maxdbs *
+ (sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
+ sizeof(unsigned) + 1);
+ rc = mdbx_memalign_alloc(
+ env->me_os_psize,
+ env->me_psize * (1 /* page buffer */ + 1 /* page killer bufer */),
+ &env->me_pbuf);
+ if (rc == MDBX_SUCCESS) {
+ memset(env->me_pbuf, -1, env->me_psize * 2);
+ MDBX_txn *txn = mdbx_calloc(1, size);
+ if (txn) {
+ txn->mt_dbs = (MDBX_db *)((char *)txn + tsize);
+ txn->mt_cursors = (MDBX_cursor **)(txn->mt_dbs + env->me_maxdbs);
+ txn->mt_dbiseqs = (unsigned *)(txn->mt_cursors + env->me_maxdbs);
+ txn->mt_dbflags = (uint8_t *)(txn->mt_dbiseqs + env->me_maxdbs);
+ txn->mt_env = env;
+ txn->mt_dbxs = env->me_dbxs;
+ txn->mt_flags = MDBX_TXN_FINISHED;
+ env->me_txn0 = txn;
+ txn->tw.retired_pages = mdbx_pnl_alloc(MDBX_PNL_INITIAL);
+ txn->tw.reclaimed_pglist = mdbx_pnl_alloc(MDBX_PNL_INITIAL);
+ if (!txn->tw.retired_pages || !txn->tw.reclaimed_pglist)
+ rc = MDBX_ENOMEM;
+ } else
+ rc = MDBX_ENOMEM;
}
}
@@ -9116,6 +9219,9 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
#endif
bailout:
+#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
+ mdbx_txn_valgrind(env, nullptr);
+#endif
if (rc) {
rc = mdbx_env_close0(env) ? MDBX_PANIC : rc;
env->me_flags = saved_me_flags | MDBX_FATAL_ERROR;
@@ -9153,9 +9259,15 @@ static int __cold mdbx_env_close0(MDBX_env *env) {
env->me_valgrind_handle = -1;
#endif
}
- if (env->me_fd != INVALID_HANDLE_VALUE) {
- (void)mdbx_closefile(env->me_fd);
- env->me_fd = INVALID_HANDLE_VALUE;
+
+ if (env->me_dsync_fd != INVALID_HANDLE_VALUE) {
+ (void)mdbx_closefile(env->me_dsync_fd);
+ env->me_dsync_fd = INVALID_HANDLE_VALUE;
+ }
+
+ if (env->me_lazy_fd != INVALID_HANDLE_VALUE) {
+ (void)mdbx_closefile(env->me_lazy_fd);
+ env->me_lazy_fd = INVALID_HANDLE_VALUE;
}
if (env->me_lck)
@@ -9167,11 +9279,11 @@ static int __cold mdbx_env_close0(MDBX_env *env) {
}
if (env->me_dbxs) {
- for (unsigned i = env->me_maxdbs; --i >= CORE_DBS;)
+ for (unsigned i = env->me_numdbs; --i >= CORE_DBS;)
mdbx_free(env->me_dbxs[i].md_name.iov_base);
mdbx_free(env->me_dbxs);
}
- mdbx_free(env->me_pbuf);
+ mdbx_memalign_free(env->me_pbuf);
mdbx_free(env->me_dbiseqs);
mdbx_free(env->me_dbflags);
mdbx_free(env->me_path);
@@ -9219,7 +9331,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
rc = (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
#else
struct stat st;
- if (unlikely(fstat(env->me_fd, &st)))
+ if (unlikely(fstat(env->me_lazy_fd, &st)))
rc = errno;
else if (st.st_nlink > 0 /* don't sync deleted files */) {
rc = mdbx_env_sync_ex(env, true, true);
@@ -9499,14 +9611,14 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
MDBX_txn *txn = mc->mc_txn;
if (unlikely(pgno >= txn->mt_next_pgno)) {
mdbx_debug("page %" PRIaPGNO " not found", pgno);
- txn->mt_flags |= MDBX_TXN_ERROR;
- return MDBX_PAGE_NOTFOUND;
+ goto corrupted;
}
- MDBX_env *env = txn->mt_env;
+ MDBX_env *const env = txn->mt_env;
MDBX_page *p = NULL;
int level;
- if ((txn->mt_flags & (MDBX_RDONLY | MDBX_WRITEMAP)) == 0) {
+ mdbx_assert(env, ((txn->mt_flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
+ if (unlikely((txn->mt_flags & (MDBX_RDONLY | MDBX_WRITEMAP)) == 0)) {
level = 1;
do {
/* Spilled pages were dirtied in this txn and flushed
@@ -9514,35 +9626,45 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
* back in from the map (but don't unspill it here,
* leave that unless page_touch happens again). */
if (txn->tw.spill_pages && mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1))
- goto mapped;
+ goto spilled;
p = mdbx_dpl_find(txn->tw.dirtylist, pgno);
if (p)
- goto done;
+ goto dirty;
level++;
} while ((txn = txn->mt_parent) != NULL);
}
level = 0;
-mapped:
+spilled:
p = pgno2page(env, pgno);
-done:
- txn = nullptr /* avoid future use */;
+dirty:
if (unlikely(p->mp_pgno != pgno)) {
mdbx_error("mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO
" (expected)",
p->mp_pgno, pgno);
- return MDBX_CORRUPTED;
+ goto corrupted;
+ }
+
+ if (unlikely((p->mp_flags & (P_LOOSE | P_SUBP | P_META | P_DIRTY)) != 0 ||
+ p->mp_txnid > mc->mc_txn->mt_txnid)) {
+ if (unlikely((mc->mc_txn->mt_flags & MDBX_RDONLY) != 0 ||
+ (p->mp_flags & (P_LOOSE | P_SUBP | P_META | P_DIRTY)) !=
+ P_DIRTY)) {
+ mdbx_error("invalid page's flags (0x%x) or txnid %" PRIaTXN
+ " > (actual) %" PRIaTXN " (expected)",
+ p->mp_flags, p->mp_txnid, mc->mc_txn->mt_txnid);
+ goto corrupted;
+ }
}
- if (unlikely(p->mp_upper < p->mp_lower || ((p->mp_lower | p->mp_upper) & 1) ||
- PAGEHDRSZ + p->mp_upper > env->me_psize) &&
- !IS_OVERFLOW(p)) {
+ if (unlikely(!IS_OVERFLOW(p) && (p->mp_upper < p->mp_lower ||
+ ((p->mp_lower | p->mp_upper) & 1) != 0 ||
+ PAGEHDRSZ + p->mp_upper > env->me_psize))) {
mdbx_error("invalid page lower(%u)/upper(%u), pg-limit %u", p->mp_lower,
p->mp_upper, page_space(env));
- return MDBX_CORRUPTED;
+ goto corrupted;
}
- /* TODO: more checks here, including p->mp_validator */
if (mdbx_audit_enabled()) {
int err = mdbx_page_check(env, p, true);
@@ -9554,6 +9676,10 @@ done:
if (lvl)
*lvl = level;
return MDBX_SUCCESS;
+
+corrupted:
+ txn->mt_flags |= MDBX_TXN_ERROR;
+ return MDBX_CORRUPTED;
}
/* Finish mdbx_page_search() / mdbx_page_search_lowest().
@@ -9760,8 +9886,8 @@ __hot static int mdbx_page_search(MDBX_cursor *mc, MDBX_val *key, int flags) {
* [out] data Updated to point to the node's data.
*
* Returns 0 on success, non-zero on failure. */
-static __inline int mdbx_node_read(MDBX_cursor *mc, MDBX_node *node,
- MDBX_val *data) {
+static __always_inline int mdbx_node_read(MDBX_cursor *mc, MDBX_node *node,
+ MDBX_val *data) {
data->iov_len = node_ds(node);
data->iov_base = node_data(node);
if (unlikely(F_ISSET(node_flags(node), F_BIGDATA))) {
@@ -9789,7 +9915,7 @@ int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) {
if (unlikely(!key || !data))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
MDBX_cursor_couple cx;
@@ -9813,7 +9939,7 @@ int mdbx_get_nearest(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key,
if (unlikely(!key || !data))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED))
@@ -9853,7 +9979,7 @@ int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
if (unlikely(!key || !data))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
MDBX_cursor_couple cx;
@@ -10133,8 +10259,8 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
}
/* Set the cursor on a specific data item. */
-__hot static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
- MDBX_cursor_op op, int *exactp) {
+static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
+ MDBX_cursor_op op, int *exactp) {
int rc;
MDBX_page *mp;
MDBX_node *node = NULL;
@@ -10813,6 +10939,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
insert_key = insert_data = (rc != MDBX_SUCCESS);
uint16_t fp_flags = P_LEAF | P_DIRTY;
MDBX_page *fp = env->me_pbuf;
+ fp->mp_txnid = INVALID_TXNID;
if (insert_key) {
/* The key does not exist */
mdbx_debug("inserting key at index %i", mc->mc_ki[mc->mc_top]);
@@ -10831,7 +10958,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
char *ptr;
unsigned ksize = mc->mc_db->md_xsize;
- if (key->iov_len != ksize)
+ if (unlikely(key->iov_len != ksize))
return MDBX_BAD_VALSIZE;
ptr = page_leaf2key(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
memcpy(ptr, key->iov_base, ksize);
@@ -10893,13 +11020,14 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
/* LY: add configurable threshold to keep reserve space */
dpages) {
if (!IS_DIRTY(omp) && (level || (env->me_flags & MDBX_WRITEMAP))) {
- rc = mdbx_page_unspill(mc->mc_txn, omp, &omp);
- if (unlikely(rc))
- return rc;
+ rc2 = mdbx_page_unspill(mc->mc_txn, omp, &omp);
+ if (unlikely(rc2))
+ return rc2;
level = 0; /* dirty in this txn or clean */
}
/* Is it dirty? */
if (IS_DIRTY(omp)) {
+ mdbx_cassert(mc, omp->mp_txnid > SAFE64_INVALID_THRESHOLD);
/* yes, overwrite it. Note in this case we don't
* bother to try shrinking the page if the new data
* is smaller than the overflow threshold. */
@@ -11031,6 +11159,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
case MDBX_CURRENT | MDBX_NODUPDATA:
case MDBX_CURRENT:
fp->mp_flags |= P_DIRTY;
+ fp->mp_txnid = INVALID_TXNID;
fp->mp_pgno = mp->mp_pgno;
mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
flags |= F_DUPDATA;
@@ -11060,8 +11189,8 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
nested_dupdb.md_entries = page_numkeys(fp);
xdata.iov_len = sizeof(nested_dupdb);
xdata.iov_base = &nested_dupdb;
- if ((rc = mdbx_page_alloc(mc, 1, &mp, MDBX_ALLOC_ALL)))
- return rc;
+ if ((rc2 = mdbx_page_alloc(mc, 1, &mp, MDBX_ALLOC_ALL)))
+ return rc2;
mc->mc_db->md_leaf_pages += 1;
mdbx_cassert(mc, env->me_psize > olddata.iov_len);
offset = env->me_psize - (unsigned)olddata.iov_len;
@@ -11072,6 +11201,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
}
if (mp != fp) {
mp->mp_flags = fp_flags | P_DIRTY;
+ mp->mp_txnid = INVALID_TXNID;
mp->mp_leaf2_ksize = fp->mp_leaf2_ksize;
mp->mp_lower = fp->mp_lower;
mdbx_cassert(mc, fp->mp_upper + offset <= UINT16_MAX);
@@ -11419,6 +11549,7 @@ static int mdbx_page_new(MDBX_cursor *mc, unsigned flags, unsigned num,
mdbx_debug("allocated new page #%" PRIaPGNO ", size %u", np->mp_pgno,
mc->mc_txn->mt_env->me_psize);
np->mp_flags = (uint16_t)(flags | P_DIRTY);
+ np->mp_txnid = INVALID_TXNID;
np->mp_lower = 0;
np->mp_upper = (indx_t)(mc->mc_txn->mt_env->me_psize - PAGEHDRSZ);
@@ -11865,7 +11996,7 @@ int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) {
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_VALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_VALID)))
return MDBX_EINVAL;
if (unlikely(dbi == FREE_DBI && !F_ISSET(txn->mt_flags, MDBX_RDONLY)))
@@ -11907,7 +12038,7 @@ int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) {
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, mc->mc_dbi, DB_VALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, mc->mc_dbi, DB_VALID)))
return MDBX_EINVAL;
if (unlikely(mc->mc_backup))
@@ -13239,7 +13370,7 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) {
if (unlikely(!key))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(txn->mt_flags & (MDBX_RDONLY | MDBX_TXN_BLOCKED)))
@@ -13445,6 +13576,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey,
}
copy->mp_pgno = mp->mp_pgno;
copy->mp_flags = mp->mp_flags;
+ copy->mp_txnid = INVALID_TXNID;
copy->mp_lower = 0;
copy->mp_upper = (indx_t)page_space(env);
@@ -13780,7 +13912,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data,
if (unlikely(!key || !data))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE |
@@ -13850,22 +13982,26 @@ static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) {
mdbx_copy *my = arg;
uint8_t *ptr;
int toggle = 0;
- int rc;
mdbx_condmutex_lock(&my->mc_condmutex);
while (!my->mc_error) {
- while (!my->mc_new)
- mdbx_condmutex_wait(&my->mc_condmutex);
+ while (!my->mc_new && !my->mc_error) {
+ int err = mdbx_condmutex_wait(&my->mc_condmutex);
+ if (err != MDBX_SUCCESS) {
+ my->mc_error = err;
+ goto bailout;
+ }
+ }
if (my->mc_new == 0 + MDBX_EOF) /* 0 buffers, just EOF */
break;
size_t wsize = my->mc_wlen[toggle];
ptr = my->mc_wbuf[toggle];
again:
if (wsize > 0 && !my->mc_error) {
- rc = mdbx_write(my->mc_fd, ptr, wsize);
- if (rc != MDBX_SUCCESS) {
- my->mc_error = rc;
- break;
+ int err = mdbx_write(my->mc_fd, ptr, wsize);
+ if (err != MDBX_SUCCESS) {
+ my->mc_error = err;
+ goto bailout;
}
}
@@ -13882,6 +14018,7 @@ static THREAD_RESULT __cold THREAD_CALL mdbx_env_copythr(void *arg) {
my->mc_new--;
mdbx_condmutex_signal(&my->mc_condmutex);
}
+bailout:
mdbx_condmutex_unlock(&my->mc_condmutex);
return (THREAD_RESULT)0;
}
@@ -13894,8 +14031,11 @@ static int __cold mdbx_env_cthr_toggle(mdbx_copy *my, int adjust) {
mdbx_condmutex_lock(&my->mc_condmutex);
my->mc_new += (short)adjust;
mdbx_condmutex_signal(&my->mc_condmutex);
- while (my->mc_new & 2) /* both buffers in use */
- mdbx_condmutex_wait(&my->mc_condmutex);
+ while (!my->mc_error && (my->mc_new & 2) /* both buffers in use */) {
+ int err = mdbx_condmutex_wait(&my->mc_condmutex);
+ if (err != MDBX_SUCCESS)
+ my->mc_error = err;
+ }
mdbx_condmutex_unlock(&my->mc_condmutex);
my->mc_toggle ^= (adjust & 1);
@@ -13979,6 +14119,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
memcpy(mo, omp, my->mc_env->me_psize);
mo->mp_pgno = my->mc_next_pgno;
+ mo->mp_txnid = MIN_TXNID;
my->mc_next_pgno += omp->mp_pages;
my->mc_wlen[toggle] += my->mc_env->me_psize;
if (omp->mp_pages > 1) {
@@ -14043,6 +14184,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
}
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
mdbx_page_copy(mo, mp, my->mc_env->me_psize);
+ mo->mp_txnid = MIN_TXNID;
mo->mp_pgno = my->mc_next_pgno++;
my->mc_wlen[toggle] += my->mc_env->me_psize;
if (mc.mc_top) {
@@ -14263,10 +14405,10 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
buffer + roundup_powerof2(meta_bytes, env->me_os_psize);
for (size_t offset = meta_bytes; rc == MDBX_SUCCESS && offset < used_size;) {
if (dest_is_pipe) {
-#if defined(__linux__) || defined(__gnu_linux__)
+#if defined(__linux__) || defined(__gnu_linux__) && !defined(MDBX_SAFE4QEMU)
off_t in_offset = offset;
const intptr_t written =
- sendfile(fd, env->me_fd, &in_offset, used_size - offset);
+ sendfile(fd, env->me_lazy_fd, &in_offset, used_size - offset);
if (unlikely(written <= 0)) {
rc = written ? errno : MDBX_ENODATA;
break;
@@ -14275,10 +14417,10 @@ static int __cold mdbx_env_copy_asis(MDBX_env *env, MDBX_txn *read_txn,
continue;
#endif
} else {
-#if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE)
+#if __GLIBC_PREREQ(2, 27) && defined(_GNU_SOURCE) && !defined(MDBX_SAFE4QEMU)
off_t in_offset = offset, out_offset = offset;
ssize_t bytes_copied = copy_file_range(
- env->me_fd, &in_offset, fd, &out_offset, used_size - offset, 0);
+ env->me_lazy_fd, &in_offset, fd, &out_offset, used_size - offset, 0);
if (unlikely(bytes_copied <= 0)) {
rc = bytes_copied ? errno : MDBX_ENODATA;
break;
@@ -14392,49 +14534,28 @@ int __cold mdbx_env_copy(MDBX_env *env, const char *dest_path, unsigned flags) {
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDBX_EBADSIGN;
- char *dxb_pathname;
- mdbx_filehandle_t newfd = INVALID_HANDLE_VALUE;
-
- if (env->me_flags & MDBX_NOSUBDIR) {
- dxb_pathname = (char *)dest_path;
- } else {
- size_t len = strlen(dest_path);
- len += sizeof(MDBX_DATANAME);
- dxb_pathname = mdbx_malloc(len);
- if (!dxb_pathname)
- return MDBX_ENOMEM;
- sprintf(dxb_pathname, "%s" MDBX_DATANAME, dest_path);
- }
-
/* The destination path must exist, but the destination file must not.
* We don't want the OS to cache the writes, since the source data is
* already in the OS cache. */
- int rc = mdbx_openfile(dxb_pathname, O_WRONLY | O_CREAT | O_EXCL, 0640,
- &newfd, true);
- if (rc == MDBX_SUCCESS) {
- if (env->me_psize >= env->me_os_psize) {
-#ifdef F_NOCACHE /* __APPLE__ */
- (void)fcntl(newfd, F_NOCACHE, 1);
-#elif defined(O_DIRECT) && defined(F_GETFL)
- /* Set O_DIRECT if the file system supports it */
- if ((rc = fcntl(newfd, F_GETFL)) != -1)
- (void)fcntl(newfd, F_SETFL, rc | O_DIRECT);
+ mdbx_filehandle_t newfd;
+ int rc = mdbx_openfile(MDBX_OPEN_COPY, env, dest_path, &newfd,
+#if defined(_WIN32) || defined(_WIN64)
+ (mode_t)-1
+#else
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP
#endif
- }
+ );
+ if (rc == MDBX_SUCCESS)
rc = mdbx_env_copy2fd(env, newfd, flags);
- }
if (newfd != INVALID_HANDLE_VALUE) {
int err = mdbx_closefile(newfd);
if (rc == MDBX_SUCCESS && err != rc)
rc = err;
if (rc != MDBX_SUCCESS)
- (void)mdbx_removefile(dxb_pathname);
+ (void)mdbx_removefile(dest_path);
}
- if (dxb_pathname != dest_path)
- mdbx_free(dxb_pathname);
-
return rc;
}
@@ -14529,7 +14650,7 @@ int __cold mdbx_env_get_fd(MDBX_env *env, mdbx_filehandle_t *arg) {
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDBX_EBADSIGN;
- *arg = env->me_fd;
+ *arg = env->me_lazy_fd;
return MDBX_SUCCESS;
}
@@ -14977,7 +15098,7 @@ int __cold mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest,
if (unlikely(!dest))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_VALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_VALID)))
return MDBX_EINVAL;
const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
@@ -14997,8 +15118,9 @@ int __cold mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest,
}
static int mdbx_dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) {
- if (unlikely(dbi < CORE_DBS || dbi >= env->me_maxdbs))
- return MDBX_EINVAL;
+ mdbx_assert(env, dbi >= CORE_DBS);
+ if (unlikely(dbi >= env->me_numdbs))
+ return MDBX_BAD_DBI;
char *ptr = env->me_dbxs[dbi].md_name.iov_base;
/* If there was no name, this was already closed */
@@ -15041,7 +15163,7 @@ int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags,
if (unlikely(!flags || !state))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_VALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_VALID)))
return MDBX_EINVAL;
*flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
@@ -15155,7 +15277,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) {
if (unlikely(1 < (unsigned)del))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(TXN_DBI_CHANGED(txn, dbi)))
@@ -15166,7 +15288,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) {
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) {
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID))) {
rc = MDBX_EINVAL;
goto bailout;
}
@@ -15223,7 +15345,7 @@ int mdbx_set_compare(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cmp_func *cmp) {
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
txn->mt_dbxs[dbi].md_cmp = cmp;
@@ -15235,7 +15357,7 @@ int mdbx_set_dupsort(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cmp_func *cmp) {
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
txn->mt_dbxs[dbi].md_dcmp = cmp;
@@ -15475,36 +15597,6 @@ int __cold mdbx_setup_debug(int loglevel, int flags, MDBX_debug_func *logger) {
flags &= MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER |
MDBX_DBG_DUMP | MDBX_DBG_LEGACY_MULTIOPEN;
#endif
-#if defined(__linux__) || defined(__gnu_linux__)
- if ((mdbx_runtime_flags ^ flags) & MDBX_DBG_DUMP) {
- /* http://man7.org/linux/man-pages/man5/core.5.html */
- const unsigned long dump_bits =
- 1 << 3 /* Dump file-backed shared mappings */
- | 1 << 6 /* Dump shared huge pages */
- | 1 << 8 /* Dump shared DAX pages */;
- const int core_filter_fd =
- open("/proc/self/coredump_filter", O_TRUNC | O_RDWR);
- if (core_filter_fd != -1) {
- char buf[32];
- intptr_t bytes = pread(core_filter_fd, buf, sizeof(buf), 0);
- if (bytes > 0 && (size_t)bytes < sizeof(buf)) {
- buf[bytes] = 0;
- const unsigned long present_mask = strtoul(buf, NULL, 16);
- const unsigned long wanna_mask = (flags & MDBX_DBG_DUMP)
- ? present_mask | dump_bits
- : present_mask & ~dump_bits;
- if (wanna_mask != present_mask) {
- bytes = snprintf(buf, sizeof(buf), "0x%lx\n", wanna_mask);
- if (bytes > 0 && (size_t)bytes < sizeof(buf)) {
- bytes = pwrite(core_filter_fd, buf, bytes, 0);
- (void)bytes;
- }
- }
- }
- close(core_filter_fd);
- }
- }
-#endif /* Linux */
mdbx_runtime_flags = (uint8_t)flags;
}
@@ -16204,9 +16296,8 @@ __hot static ptrdiff_t estimate(const MDBX_db *db,
}
}
-__hot int mdbx_estimate_distance(const MDBX_cursor *first,
- const MDBX_cursor *last,
- ptrdiff_t *distance_items) {
+int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last,
+ ptrdiff_t *distance_items) {
if (unlikely(first == NULL || last == NULL || distance_items == NULL))
return MDBX_EINVAL;
@@ -16310,7 +16401,7 @@ int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key,
if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
MDBX_cursor_couple begin;
@@ -16482,7 +16573,7 @@ int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *new_data,
if (unlikely(new_data == NULL && !(flags & MDBX_CURRENT)))
return MDBX_EINVAL;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_RESERVE |
@@ -16677,7 +16768,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result,
if (unlikely(rc != MDBX_SUCCESS))
return rc;
- if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID)))
+ if (unlikely(!mdbx_txn_dbi_exists(txn, dbi, DB_USRVALID)))
return MDBX_EINVAL;
if (unlikely(TXN_DBI_CHANGED(txn, dbi)))
@@ -17024,6 +17115,9 @@ __dll_export
#ifdef MDBX_USE_VALGRIND
" MDBX_USE_VALGRIND=YES"
#endif /* MDBX_USE_VALGRIND */
+#ifdef MDBX_FORCE_ASSERTIONS
+ " MDBX_FORCE_ASSERTIONS=YES"
+#endif /* MDBX_FORCE_ASSERTIONS */
#ifdef _GNU_SOURCE
" _GNU_SOURCE=YES"
#else
diff --git a/libs/libmdbx/src/src/elements/internals.h b/libs/libmdbx/src/src/elements/internals.h
index 6a39460ab2..6c6a9d4ac6 100644
--- a/libs/libmdbx/src/src/elements/internals.h
+++ b/libs/libmdbx/src/src/elements/internals.h
@@ -211,6 +211,7 @@ typedef uint32_t pgno_t;
typedef uint64_t txnid_t;
#define PRIaTXN PRIi64
#define MIN_TXNID UINT64_C(1)
+#define INVALID_TXNID UINT64_MAX
/* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
* #define MDBX_TXNID_STEP (UINT32_MAX / 3) */
#ifndef MDBX_TXNID_STEP
@@ -256,7 +257,7 @@ typedef union mdbx_safe64 {
typedef struct MDBX_db {
uint16_t md_flags; /* see mdbx_dbi_open */
uint16_t md_depth; /* depth of this tree */
- uint32_t md_xsize; /* also ksize for LEAF2 pages */
+ uint32_t md_xsize; /* key-size for MDBX_DUPFIXED (LEAF2 pages) */
pgno_t md_root; /* the root page of this tree */
pgno_t md_branch_pages; /* number of internal pages */
pgno_t md_leaf_pages; /* number of leaf pages */
@@ -347,8 +348,7 @@ typedef struct MDBX_meta {
typedef struct MDBX_page {
union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
- uint64_t mp_validator; /* checksum of page content or a txnid during
- * which the page has been updated */
+ uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
};
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */
@@ -703,7 +703,8 @@ struct MDBX_txn {
/* Transaction Flags */
/* mdbx_txn_begin() flags */
#define MDBX_TXN_BEGIN_FLAGS \
- (MDBX_NOMETASYNC | MDBX_NOSYNC | MDBX_MAPASYNC | MDBX_RDONLY | MDBX_TRYTXN)
+ (MDBX_NOMETASYNC | MDBX_SAFE_NOSYNC | MDBX_MAPASYNC | MDBX_RDONLY | \
+ MDBX_TRYTXN)
/* internal txn flags */
#define MDBX_TXN_FINISHED 0x01 /* txn is finished or never began */
#define MDBX_TXN_ERROR 0x02 /* txn is unusable after an error */
@@ -864,7 +865,8 @@ struct MDBX_env {
size_t me_signature;
mdbx_mmap_t me_dxb_mmap; /* The main data file */
#define me_map me_dxb_mmap.dxb
-#define me_fd me_dxb_mmap.fd
+#define me_lazy_fd me_dxb_mmap.fd
+ mdbx_filehandle_t me_dsync_fd;
mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck
@@ -947,7 +949,7 @@ struct MDBX_env {
#endif
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
pgno_t me_poison_edge;
-#endif
+#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
MDBX_env *me_lcklist_next;
/* struct me_dbgeo used for accepting db-geo params from user for the new
@@ -1008,7 +1010,7 @@ MDBX_INTERNAL_FUNC void mdbx_panic(const char *fmt, ...) __printf_args(1, 2);
#define mdbx_audit_enabled() (0)
-#if !defined(NDEBUG) || defined(MDBX_FORCE_ASSERT)
+#if !defined(NDEBUG) || defined(MDBX_FORCE_ASSERTIONS)
#define mdbx_assert_enabled() (1)
#else
#define mdbx_assert_enabled() (0)
diff --git a/libs/libmdbx/src/src/elements/lck-posix.c b/libs/libmdbx/src/src/elements/lck-posix.c
index ce2c0c3f23..84652d7adf 100644
--- a/libs/libmdbx/src/src/elements/lck-posix.c
+++ b/libs/libmdbx/src/src/elements/lck-posix.c
@@ -220,8 +220,67 @@ MDBX_INTERNAL_FUNC int mdbx_ipclock_destroy(mdbx_ipclock_t *ipc) {
}
#endif /* MDBX_LOCKING > MDBX_LOCKING_SYSV */
+static int check_fstat(MDBX_env *env) {
+ struct stat st;
+
+ int rc = MDBX_SUCCESS;
+ if (fstat(env->me_lazy_fd, &st)) {
+ rc = errno;
+ mdbx_error("fstat(%s), err %d", "DXB", rc);
+ return rc;
+ }
+
+ if (!S_ISREG(st.st_mode) || st.st_nlink < 1) {
+#ifdef EBADFD
+ rc = EBADFD;
+#else
+ rc = EPERM;
+#endif
+ mdbx_error("%s %s, err %d", "DXB",
+ (st.st_nlink < 1) ? "file was removed" : "not a regular file",
+ rc);
+ return rc;
+ }
+
+ if (st.st_size < (off_t)(MDBX_MIN_PAGESIZE * NUM_METAS)) {
+ mdbx_verbose("dxb-file is too short (%u), exclusive-lock needed",
+ (unsigned)st.st_size);
+ rc = MDBX_RESULT_TRUE;
+ }
+
+ //----------------------------------------------------------------------------
+
+ if (fstat(env->me_lfd, &st)) {
+ rc = errno;
+ mdbx_error("fstat(%s), err %d", "LCK", rc);
+ return rc;
+ }
+
+ if (!S_ISREG(st.st_mode) || st.st_nlink < 1) {
+#ifdef EBADFD
+ rc = EBADFD;
+#else
+ rc = EPERM;
+#endif
+ mdbx_error("%s %s, err %d", "LCK",
+ (st.st_nlink < 1) ? "file was removed" : "not a regular file",
+ rc);
+ return rc;
+ }
+
+ /* Checking file size for detect the situation when we got the shared lock
+ * immediately after mdbx_lck_destroy(). */
+ if (st.st_size < (off_t)(sizeof(MDBX_lockinfo) + sizeof(MDBX_reader))) {
+ mdbx_verbose("lck-file is too short (%u), exclusive-lock needed",
+ (unsigned)st.st_size);
+ rc = MDBX_RESULT_TRUE;
+ }
+
+ return rc;
+}
+
MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
- assert(env->me_fd != INVALID_HANDLE_VALUE);
+ assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
if (unlikely(mdbx_getpid() != env->me_pid))
return MDBX_PANIC;
#if MDBX_USE_OFDLOCKS
@@ -229,43 +288,68 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
choice_fcntl();
#endif /* MDBX_USE_OFDLOCKS */
- int rc;
+ int rc = MDBX_SUCCESS;
if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */
rc =
- lck_op(env->me_fd, op_setlk,
+ lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
if (rc != MDBX_SUCCESS) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "without-lck", rc);
+ mdbx_error("%s, err %u", "without-lck", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
}
+#if defined(_POSIX_PRIORITY_SCHEDULING) && _POSIX_PRIORITY_SCHEDULING > 0
+ sched_yield();
+#endif
+
+retry:
+ if (rc == MDBX_RESULT_TRUE) {
+ rc = lck_op(env->me_lfd, op_setlk, F_UNLCK, 0, 1);
+ if (rc != MDBX_SUCCESS) {
+ mdbx_error("%s, err %u", "unlock-before-retry", rc);
+ mdbx_assert(env, MDBX_IS_ERROR(rc));
+ return rc;
+ }
+ }
-retry_exclusive:
/* Firstly try to get exclusive locking. */
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
if (rc == MDBX_SUCCESS) {
+ rc = check_fstat(env);
+ if (MDBX_IS_ERROR(rc))
+ return rc;
+
continue_dxb_exclusive:
rc =
- lck_op(env->me_fd, op_setlk,
+ lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX);
if (rc == MDBX_SUCCESS)
return MDBX_RESULT_TRUE /* Done: return with exclusive locking. */;
+ int err = check_fstat(env);
+ if (MDBX_IS_ERROR(err))
+ return err;
+
/* the cause may be a collision with POSIX's file-lock recovery. */
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "dxb-exclusive", rc);
+ mdbx_error("%s, err %u", "dxb-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
/* Fallback to lck-shared */
+ } else if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY ||
+ rc == EWOULDBLOCK || rc == EDEADLK)) {
+ mdbx_error("%s, err %u", "try-exclusive", rc);
+ mdbx_assert(env, MDBX_IS_ERROR(rc));
+ return rc;
}
- /* Here could be one of two::
+ /* Here could be one of two:
* - mdbx_lck_destroy() from the another process was hold the lock
* during a destruction.
* - either mdbx_lck_seize() from the another process was got the exclusive
@@ -277,11 +361,19 @@ retry_exclusive:
* competing process doesn't call lck_downgrade(). */
rc = lck_op(env->me_lfd, op_setlkw, F_RDLCK, 0, 1);
if (rc != MDBX_SUCCESS) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "try-shared", rc);
+ mdbx_error("%s, err %u", "try-shared", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
+ rc = check_fstat(env);
+ if (rc == MDBX_RESULT_TRUE)
+ goto retry;
+ if (rc != MDBX_SUCCESS) {
+ mdbx_error("%s, err %u", "lck_fstat", rc);
+ return rc;
+ }
+
/* got shared, retry exclusive */
rc = lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, 1);
if (rc == MDBX_SUCCESS)
@@ -289,39 +381,17 @@ retry_exclusive:
if (!(rc == EAGAIN || rc == EACCES || rc == EBUSY || rc == EWOULDBLOCK ||
rc == EDEADLK)) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "try-exclusive", rc);
+ mdbx_error("%s, err %u", "try-exclusive", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
- /* Checking file size for detect the situation when we got the shared lock
- * immediately after mdbx_lck_destroy(). */
- struct stat st;
- if (fstat(env->me_lfd, &st)) {
- rc = errno;
- mdbx_error("%s(%s) failed: errcode %u", __func__, "check-filesize", rc);
- mdbx_assert(env, MDBX_IS_ERROR(rc));
- return rc;
- }
- if (st.st_size < (unsigned)(sizeof(MDBX_lockinfo) + sizeof(MDBX_reader))) {
- mdbx_verbose("lck-file is too short (%u), retry exclusive-lock",
- (unsigned)st.st_size);
- rc = lck_op(env->me_lfd, op_setlk, F_UNLCK, 0, 1);
- if (rc != MDBX_SUCCESS) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "retry-exclusive", rc);
- mdbx_assert(env, MDBX_IS_ERROR(rc));
- return rc;
- }
- goto retry_exclusive;
- }
-
/* Lock against another process operating in without-lck or exclusive mode. */
rc =
- lck_op(env->me_fd, op_setlk,
+ lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, env->me_pid, 1);
if (rc != MDBX_SUCCESS) {
- mdbx_error("%s(%s) failed: errcode %u", __func__,
- "lock-against-without-lck", rc);
+ mdbx_error("%s, err %u", "lock-against-without-lck", rc);
mdbx_assert(env, MDBX_IS_ERROR(rc));
return rc;
}
@@ -337,15 +407,15 @@ MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
int rc = MDBX_SUCCESS;
if ((env->me_flags & MDBX_EXCLUSIVE) == 0) {
- rc = lck_op(env->me_fd, op_setlk, F_UNLCK, 0, env->me_pid);
+ rc = lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid);
if (rc == MDBX_SUCCESS)
- rc = lck_op(env->me_fd, op_setlk, F_UNLCK, env->me_pid + 1,
+ rc = lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, env->me_pid + 1,
OFF_T_MAX - env->me_pid - 1);
}
if (rc == MDBX_SUCCESS)
rc = lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1);
if (unlikely(rc != 0)) {
- mdbx_error("%s(%s) failed: errcode %u", __func__, "lck", rc);
+ mdbx_error("%s, err %u", "lck", rc);
assert(MDBX_IS_ERROR(rc));
}
return rc;
@@ -357,15 +427,18 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
return MDBX_PANIC;
int rc = MDBX_SUCCESS;
+ struct stat lck_info;
if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor &&
env->me_lck &&
/* try get exclusive access */
lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 &&
- lck_op(env->me_fd, op_setlk,
+ /* if LCK was not removed */
+ fstat(env->me_lfd, &lck_info) == 0 && lck_info.st_nlink > 0 &&
+ lck_op(env->me_lazy_fd, op_setlk,
(env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0,
OFF_T_MAX) == 0) {
- mdbx_verbose("%s: got exclusive, drown locks", __func__);
+ mdbx_verbose("%p got exclusive, drown locks", (void *)env);
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
if (env->me_sysv_ipc.semid != -1)
rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0;
@@ -392,14 +465,19 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
* locks should be released here explicitly with properly order. */
/* close dxb and restore lock */
- if (env->me_fd != INVALID_HANDLE_VALUE) {
- if (unlikely(close(env->me_fd) != 0) && rc == MDBX_SUCCESS)
+ if (env->me_dsync_fd != INVALID_HANDLE_VALUE) {
+ if (unlikely(close(env->me_dsync_fd) != 0) && rc == MDBX_SUCCESS)
+ rc = errno;
+ env->me_dsync_fd = INVALID_HANDLE_VALUE;
+ }
+ if (env->me_lazy_fd != INVALID_HANDLE_VALUE) {
+ if (unlikely(close(env->me_lazy_fd) != 0) && rc == MDBX_SUCCESS)
rc = errno;
- env->me_fd = INVALID_HANDLE_VALUE;
+ env->me_lazy_fd = INVALID_HANDLE_VALUE;
if (op_setlk == F_SETLK && inprocess_neighbor && rc == MDBX_SUCCESS) {
/* restore file-lock */
rc = lck_op(
- inprocess_neighbor->me_fd, F_SETLKW,
+ inprocess_neighbor->me_lazy_fd, F_SETLKW,
(inprocess_neighbor->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK,
(inprocess_neighbor->me_flags & MDBX_EXCLUSIVE)
? 0
@@ -439,7 +517,7 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
int semid = -1;
if (global_uniqueness_flag) {
struct stat st;
- if (fstat(env->me_fd, &st))
+ if (fstat(env->me_lazy_fd, &st))
return errno;
sysv_retry_create:
semid = semget(env->me_sysv_ipc.key, 2,
@@ -705,7 +783,7 @@ MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_rlock);
mdbx_trace("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
- mdbx_panic("%s() failed: errcode %d\n", __func__, rc);
+ mdbx_panic("%s() failed: err %d\n", __func__, rc);
mdbx_jitter4testing(true);
}
@@ -722,6 +800,6 @@ void mdbx_txn_unlock(MDBX_env *env) {
int rc = mdbx_ipclock_unlock(env, env->me_wlock);
mdbx_trace("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
- mdbx_panic("%s() failed: errcode %d\n", __func__, rc);
+ mdbx_panic("%s() failed: err %d\n", __func__, rc);
mdbx_jitter4testing(true);
}
diff --git a/libs/libmdbx/src/src/elements/lck-windows.c b/libs/libmdbx/src/src/elements/lck-windows.c
index 5d74bb8ccd..56d77f5ebf 100644
--- a/libs/libmdbx/src/src/elements/lck-windows.c
+++ b/libs/libmdbx/src/src/elements/lck-windows.c
@@ -151,7 +151,7 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
}
if ((env->me_flags & MDBX_EXCLUSIVE) ||
- flock(env->me_fd,
+ flock(env->me_lazy_fd,
dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
: (LCK_EXCLUSIVE | LCK_WAITFOR),
LCK_BODY))
@@ -162,11 +162,12 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
}
void mdbx_txn_unlock(MDBX_env *env) {
- int rc =
- (env->me_flags & MDBX_EXCLUSIVE) ? TRUE : funlock(env->me_fd, LCK_BODY);
+ int rc = (env->me_flags & MDBX_EXCLUSIVE)
+ ? TRUE
+ : funlock(env->me_lazy_fd, LCK_BODY);
LeaveCriticalSection(&env->me_windowsbug_lock);
if (!rc)
- mdbx_panic("%s failed: errcode %u", __func__, GetLastError());
+ mdbx_panic("%s failed: err %u", __func__, GetLastError());
}
/*----------------------------------------------------------------------------*/
@@ -200,7 +201,7 @@ MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
/* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */
if ((env->me_flags & MDBX_EXCLUSIVE) == 0 &&
!funlock(env->me_lfd, LCK_UPPER))
- mdbx_panic("%s failed: errcode %u", __func__, GetLastError());
+ mdbx_panic("%s failed: err %u", __func__, GetLastError());
}
mdbx_srwlock_ReleaseShared(&env->me_remap_guard);
}
@@ -385,24 +386,24 @@ static void lck_unlock(MDBX_env *env) {
SetLastError(ERROR_SUCCESS);
}
- if (env->me_fd != INVALID_HANDLE_VALUE) {
+ if (env->me_lazy_fd != INVALID_HANDLE_VALUE) {
/* explicitly unlock to avoid latency for other processes (windows kernel
* releases such locks via deferred queues) */
- while (funlock(env->me_fd, LCK_BODY))
+ while (funlock(env->me_lazy_fd, LCK_BODY))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
- while (funlock(env->me_fd, LCK_META))
+ while (funlock(env->me_lazy_fd, LCK_META))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
(void)rc;
SetLastError(ERROR_SUCCESS);
- while (funlock(env->me_fd, LCK_WHOLE))
+ while (funlock(env->me_lazy_fd, LCK_WHOLE))
;
rc = GetLastError();
assert(rc == ERROR_NOT_LOCKED);
@@ -446,8 +447,7 @@ static int internal_seize_lck(HANDLE lfd) {
mdbx_jitter4testing(false);
if (!flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) {
rc = GetLastError() /* 2) something went wrong, give up */;
- mdbx_error("%s(%s) failed: errcode %u", __func__,
- "?-?(free) >> ?-E(middle)", rc);
+ mdbx_error("%s, err %u", "?-?(free) >> ?-E(middle)", rc);
return rc;
}
@@ -462,8 +462,8 @@ static int internal_seize_lck(HANDLE lfd) {
if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
/* 6) something went wrong, give up */
if (!funlock(lfd, LCK_UPPER))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
- "?-E(middle) >> ?-?(free)", GetLastError());
+ mdbx_panic("%s(%s) failed: err %u", __func__, "?-E(middle) >> ?-?(free)",
+ GetLastError());
return rc;
}
@@ -474,13 +474,12 @@ static int internal_seize_lck(HANDLE lfd) {
mdbx_jitter4testing(false);
if (rc != MDBX_RESULT_FALSE)
- mdbx_error("%s(%s) failed: errcode %u", __func__,
- "?-E(middle) >> S-E(locked)", rc);
+ mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
/* 8) now on S-E (locked) or still on ?-E (middle),
* transite to S-? (used) or ?-? (free) */
if (!funlock(lfd, LCK_UPPER))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
+ mdbx_panic("%s(%s) failed: err %u", __func__,
"X-E(locked/middle) >> X-?(used/free)", GetLastError());
/* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */
@@ -490,7 +489,7 @@ static int internal_seize_lck(HANDLE lfd) {
MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
int rc;
- assert(env->me_fd != INVALID_HANDLE_VALUE);
+ assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
if (env->me_flags & MDBX_EXCLUSIVE)
return MDBX_RESULT_TRUE /* nope since files were must be opened
non-shareable */
@@ -499,9 +498,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
if (env->me_lfd == INVALID_HANDLE_VALUE) {
/* LY: without-lck mode (e.g. on read-only filesystem) */
mdbx_jitter4testing(false);
- if (!flock(env->me_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
+ if (!flock(env->me_lazy_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
rc = GetLastError();
- mdbx_error("%s(%s) failed: errcode %u", __func__, "without-lck", rc);
+ mdbx_error("%s, err %u", "without-lck", rc);
return rc;
}
return MDBX_RESULT_FALSE;
@@ -516,16 +515,15 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
* - we need an exclusive lock for do so;
* - we can't lock meta-pages, otherwise other process could get an error
* while opening db in valid (non-conflict) mode. */
- if (!flock(env->me_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
+ if (!flock(env->me_lazy_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
rc = GetLastError();
- mdbx_error("%s(%s) failed: errcode %u", __func__,
- "lock-against-without-lck", rc);
+ mdbx_error("%s, err %u", "lock-against-without-lck", rc);
mdbx_jitter4testing(false);
lck_unlock(env);
} else {
mdbx_jitter4testing(false);
- if (!funlock(env->me_fd, LCK_BODY))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
+ if (!funlock(env->me_lazy_fd, LCK_BODY))
+ mdbx_panic("%s(%s) failed: err %u", __func__,
"unlock-against-without-lck", GetLastError());
}
}
@@ -535,7 +533,7 @@ MDBX_INTERNAL_FUNC int mdbx_lck_seize(MDBX_env *env) {
MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
/* Transite from exclusive state (E-?) to used (S-?) */
- assert(env->me_fd != INVALID_HANDLE_VALUE);
+ assert(env->me_lazy_fd != INVALID_HANDLE_VALUE);
assert(env->me_lfd != INVALID_HANDLE_VALUE);
#if 1
@@ -547,7 +545,7 @@ MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
if (env->me_flags & MDBX_EXCLUSIVE) {
/* transite from E-E to E_? (exclusive-read) */
if (!funlock(env->me_lfd, LCK_UPPER))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
+ mdbx_panic("%s(%s) failed: err %u", __func__,
"E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError());
return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */;
}
@@ -555,21 +553,20 @@ MDBX_INTERNAL_FUNC int mdbx_lck_downgrade(MDBX_env *env) {
/* 3) now at E-E (exclusive-write), transite to ?_E (middle) */
if (!funlock(env->me_lfd, LCK_LOWER))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
+ mdbx_panic("%s(%s) failed: err %u", __func__,
"E-E(exclusive-write) >> ?-E(middle)", GetLastError());
/* 4) now at ?-E (middle), transite to S-E (locked) */
if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) {
int rc = GetLastError() /* 5) something went wrong, give up */;
- mdbx_error("%s(%s) failed: errcode %u", __func__,
- "?-E(middle) >> S-E(locked)", rc);
+ mdbx_error("%s, err %u", "?-E(middle) >> S-E(locked)", rc);
return rc;
}
/* 6) got S-E (locked), continue transition to S-? (used) */
if (!funlock(env->me_lfd, LCK_UPPER))
- mdbx_panic("%s(%s) failed: errcode %u", __func__,
- "S-E(locked) >> S-?(used)", GetLastError());
+ mdbx_panic("%s(%s) failed: err %u", __func__, "S-E(locked) >> S-?(used)",
+ GetLastError());
return MDBX_SUCCESS /* 7) now at S-? (used), done */;
}
@@ -613,6 +610,10 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
case WAIT_OBJECT_0:
/* process just exited */
return MDBX_RESULT_FALSE;
+ case ERROR_ACCESS_DENIED:
+ /* The ERROR_ACCESS_DENIED would be returned for CSRSS-processes, etc.
+ * assume pid exists */
+ return MDBX_RESULT_TRUE;
case WAIT_TIMEOUT:
/* pid running */
return MDBX_RESULT_TRUE;
diff --git a/libs/libmdbx/src/src/elements/osal.c b/libs/libmdbx/src/src/elements/osal.c
index ae99fa05bb..118e57a191 100644
--- a/libs/libmdbx/src/src/elements/osal.c
+++ b/libs/libmdbx/src/src/elements/osal.c
@@ -371,11 +371,11 @@ MDBX_INTERNAL_FUNC int mdbx_condmutex_init(mdbx_condmutex_t *condmutex) {
#if defined(_WIN32) || defined(_WIN64)
int rc = MDBX_SUCCESS;
condmutex->event = NULL;
- condmutex->mutex = CreateMutex(NULL, FALSE, NULL);
+ condmutex->mutex = CreateMutexW(NULL, FALSE, NULL);
if (!condmutex->mutex)
return GetLastError();
- condmutex->event = CreateEvent(NULL, FALSE, FALSE, NULL);
+ condmutex->event = CreateEventW(NULL, TRUE, FALSE, NULL);
if (!condmutex->event) {
rc = GetLastError();
(void)CloseHandle(condmutex->mutex);
@@ -459,8 +459,11 @@ MDBX_INTERNAL_FUNC int mdbx_condmutex_wait(mdbx_condmutex_t *condmutex) {
#if defined(_WIN32) || defined(_WIN64)
DWORD code =
SignalObjectAndWait(condmutex->mutex, condmutex->event, INFINITE, FALSE);
- if (code == WAIT_OBJECT_0)
+ if (code == WAIT_OBJECT_0) {
code = WaitForSingleObject(condmutex->mutex, INFINITE);
+ if (code == WAIT_OBJECT_0)
+ return ResetEvent(condmutex->event) ? MDBX_SUCCESS : GetLastError();
+ }
return waitstatus2errcode(code);
#else
return pthread_cond_wait(&condmutex->cond, &condmutex->mutex);
@@ -509,108 +512,157 @@ MDBX_INTERNAL_FUNC int mdbx_fastmutex_release(mdbx_fastmutex_t *fastmutex) {
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname) {
#if defined(_WIN32) || defined(_WIN64)
- return DeleteFileA(pathname) ? MDBX_SUCCESS : GetLastError();
+ const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
+ if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX)
+ return ERROR_INVALID_NAME;
+ wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t));
+ if (wlen != mbstowcs(pathnameW, pathname, wlen + 1))
+ return ERROR_INVALID_NAME;
+ return DeleteFileW(pathnameW) ? MDBX_SUCCESS : GetLastError();
#else
return unlink(pathname) ? errno : MDBX_SUCCESS;
#endif
}
-MDBX_INTERNAL_FUNC int mdbx_openfile(const char *pathname, int flags,
- mode_t mode, mdbx_filehandle_t *fd,
- bool exclusive) {
+MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
+ const MDBX_env *env, const char *pathname,
+ mdbx_filehandle_t *fd,
+ mode_t unix_mode_bits) {
*fd = INVALID_HANDLE_VALUE;
+
#if defined(_WIN32) || defined(_WIN64)
- (void)mode;
- size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
+ const size_t wlen = mbstowcs(nullptr, pathname, INT_MAX);
if (wlen < 1 || wlen > /* MAX_PATH */ INT16_MAX)
return ERROR_INVALID_NAME;
wchar_t *const pathnameW = _alloca((wlen + 1) * sizeof(wchar_t));
if (wlen != mbstowcs(pathnameW, pathname, wlen + 1))
return ERROR_INVALID_NAME;
- DWORD DesiredAccess, ShareMode;
- DWORD FlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
- switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) {
+ DWORD CreationDisposition = unix_mode_bits ? OPEN_ALWAYS : OPEN_EXISTING;
+ DWORD FlagsAndAttributes =
+ FILE_FLAG_POSIX_SEMANTICS | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED;
+ DWORD DesiredAccess = FILE_READ_ATTRIBUTES;
+ DWORD ShareMode = (env->me_flags & MDBX_EXCLUSIVE)
+ ? 0
+ : (FILE_SHARE_READ | FILE_SHARE_WRITE);
+
+ switch (purpose) {
default:
return ERROR_INVALID_PARAMETER;
- case O_RDONLY:
- DesiredAccess = GENERIC_READ;
- ShareMode =
- exclusive ? FILE_SHARE_READ : (FILE_SHARE_READ | FILE_SHARE_WRITE);
+ case MDBX_OPEN_LCK:
+ CreationDisposition = OPEN_ALWAYS;
+ DesiredAccess |= GENERIC_READ | GENERIC_WRITE;
+ FlagsAndAttributes |= FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_TEMPORARY;
break;
- case O_WRONLY: /* assume for MDBX_env_copy() and friends output */
- DesiredAccess = GENERIC_WRITE;
- ShareMode = 0;
- FlagsAndAttributes |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+ case MDBX_OPEN_DXB_READ:
+ CreationDisposition = OPEN_EXISTING;
+ DesiredAccess |= GENERIC_READ;
+ ShareMode |= FILE_SHARE_READ;
break;
- case O_RDWR:
- DesiredAccess = GENERIC_READ | GENERIC_WRITE;
- ShareMode = exclusive ? 0 : (FILE_SHARE_READ | FILE_SHARE_WRITE);
+ case MDBX_OPEN_DXB_LAZY:
+ DesiredAccess |= GENERIC_READ | GENERIC_WRITE;
break;
- }
-
- DWORD CreationDisposition;
- switch (flags & (O_EXCL | O_CREAT)) {
- default:
- return ERROR_INVALID_PARAMETER;
- case 0:
+ case MDBX_OPEN_DXB_DSYNC:
CreationDisposition = OPEN_EXISTING;
+ DesiredAccess |= GENERIC_WRITE;
+ FlagsAndAttributes |= FILE_FLAG_WRITE_THROUGH;
break;
- case O_EXCL | O_CREAT:
+ case MDBX_OPEN_COPY:
CreationDisposition = CREATE_NEW;
- FlagsAndAttributes |= FILE_ATTRIBUTE_NOT_CONTENT_INDEXED;
- break;
- case O_CREAT:
- CreationDisposition = OPEN_ALWAYS;
- FlagsAndAttributes |= FILE_ATTRIBUTE_NOT_CONTENT_INDEXED;
+ ShareMode = 0;
+ DesiredAccess |= GENERIC_WRITE;
+ FlagsAndAttributes |=
+ (env->me_psize < env->me_os_psize) ? 0 : FILE_FLAG_NO_BUFFERING;
break;
}
*fd = CreateFileW(pathnameW, DesiredAccess, ShareMode, NULL,
CreationDisposition, FlagsAndAttributes, NULL);
-
if (*fd == INVALID_HANDLE_VALUE)
return GetLastError();
- if ((flags & O_CREAT) && GetLastError() != ERROR_ALREADY_EXISTS) {
- /* set FILE_ATTRIBUTE_NOT_CONTENT_INDEXED for new file */
- DWORD FileAttributes = GetFileAttributesA(pathname);
- if (FileAttributes == INVALID_FILE_ATTRIBUTES ||
- !SetFileAttributesA(pathname, FileAttributes |
- FILE_ATTRIBUTE_NOT_CONTENT_INDEXED)) {
- int rc = GetLastError();
- CloseHandle(*fd);
- *fd = INVALID_HANDLE_VALUE;
- return rc;
- }
+
+ BY_HANDLE_FILE_INFORMATION info;
+ if (!GetFileInformationByHandle(*fd, &info)) {
+ int err = GetLastError();
+ CloseHandle(*fd);
+ *fd = INVALID_HANDLE_VALUE;
+ return err;
}
+ const DWORD AttributesDiff =
+ (info.dwFileAttributes ^ FlagsAndAttributes) &
+ (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED |
+ FILE_ATTRIBUTE_TEMPORARY | FILE_ATTRIBUTE_COMPRESSED);
+ if (AttributesDiff)
+ (void)SetFileAttributesW(pathnameW, info.dwFileAttributes ^ AttributesDiff);
+
#else
- (void)exclusive;
+ int flags = unix_mode_bits ? O_CREAT : 0;
+ switch (purpose) {
+ default:
+ return EINVAL;
+ case MDBX_OPEN_LCK:
+ flags |= O_RDWR;
+ break;
+ case MDBX_OPEN_DXB_READ:
+ flags = O_RDONLY;
+ break;
+ case MDBX_OPEN_DXB_LAZY:
+ flags |= O_RDWR;
+ break;
+ case MDBX_OPEN_COPY:
+ flags = O_CREAT | O_WRONLY | O_EXCL;
+ break;
+ case MDBX_OPEN_DXB_DSYNC:
+ flags |= O_WRONLY;
+#if defined(O_DSYNC)
+ flags |= O_DSYNC;
+#elif defined(O_SYNC)
+ flags |= O_SYNC;
+#elif defined(O_FSYNC)
+ flags |= O_FSYNC;
+#endif
+ break;
+ }
+
+ const bool direct_nocache_for_copy =
+ env->me_psize >= env->me_os_psize && purpose == MDBX_OPEN_COPY;
+ if (direct_nocache_for_copy) {
+#if defined(O_DIRECT)
+ flags |= O_DIRECT;
+#endif /* O_DIRECT */
+#if defined(O_NOCACHE)
+ flags |= O_NOCACHE;
+#endif /* O_NOCACHE */
+ }
+
#ifdef O_CLOEXEC
flags |= O_CLOEXEC;
#endif /* O_CLOEXEC */
- *fd = open(pathname, flags, mode);
+
+ *fd = open(pathname, flags, unix_mode_bits);
+#if defined(O_DIRECT)
+ if (*fd < 0 && (flags & O_DIRECT) &&
+ (errno == EINVAL || errno == EAFNOSUPPORT)) {
+ flags &= ~(O_DIRECT | O_EXCL);
+ *fd = open(pathname, flags, unix_mode_bits);
+ }
+#endif /* O_DIRECT */
if (*fd < 0)
return errno;
#if defined(FD_CLOEXEC) && !defined(O_CLOEXEC)
- int fd_flags = fcntl(*fd, F_GETFD);
+ const int fd_flags = fcntl(*fd, F_GETFD);
if (fd_flags != -1)
(void)fcntl(*fd, F_SETFD, fd_flags | FD_CLOEXEC);
#endif /* FD_CLOEXEC && !O_CLOEXEC */
- if ((flags & (O_RDONLY | O_WRONLY | O_RDWR)) == O_WRONLY) {
- /* assume for MDBX_env_copy() and friends output */
-#if defined(O_DIRECT)
- int fd_flags = fcntl(*fd, F_GETFD);
- if (fd_flags != -1)
- (void)fcntl(*fd, F_SETFL, fd_flags | O_DIRECT);
-#endif /* O_DIRECT */
-#if defined(F_NOCACHE)
+ if (direct_nocache_for_copy) {
+#if defined(F_NOCACHE) && !defined(O_NOCACHE)
(void)fcntl(*fd, F_NOCACHE, 1);
#endif /* F_NOCACHE */
}
-#endif
+#endif
return MDBX_SUCCESS;
}
@@ -914,11 +966,6 @@ MDBX_INTERNAL_FUNC int mdbx_msync(mdbx_mmap_t *map, size_t offset,
#endif /* Linux */
const int mode = async ? MS_ASYNC : MS_SYNC;
int rc = (msync(ptr, length, mode) == 0) ? MDBX_SUCCESS : errno;
-#if defined(__APPLE__) && \
- MDBX_OSX_SPEED_INSTEADOF_DURABILITY == MDBX_OSX_WANNA_DURABILITY
- if (rc == MDBX_SUCCESS && mode == MS_SYNC)
- rc = likely(fcntl(map->fd, F_FULLFSYNC) != -1) ? MDBX_SUCCESS : errno;
-#endif /* MacOS */
return rc;
#endif
}
@@ -1334,10 +1381,14 @@ MDBX_INTERNAL_FUNC int mdbx_mmap(const int flags, mdbx_mmap_t *map,
#endif
+ VALGRIND_MAKE_MEM_DEFINED(map->address, map->current);
+ ASAN_UNPOISON_MEMORY_REGION(map->address, map->current);
return MDBX_SUCCESS;
}
MDBX_INTERNAL_FUNC int mdbx_munmap(mdbx_mmap_t *map) {
+ VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
+ ASAN_POISON_MEMORY_REGION(map->address, map->current);
#if defined(_WIN32) || defined(_WIN64)
if (map->section)
NtClose(map->section);
@@ -1369,20 +1420,22 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
/* growth rw-section */
SectionSize.QuadPart = size;
status = NtExtendSection(map->section, &SectionSize);
- if (NT_SUCCESS(status)) {
- map->current = size;
- if (map->filesize < size)
- map->filesize = size;
- }
- return ntstatus2errcode(status);
+ if (!NT_SUCCESS(status))
+ return ntstatus2errcode(status);
+ map->current = size;
+ if (map->filesize < size)
+ map->filesize = size;
+ return MDBX_SUCCESS;
}
if (limit > map->limit) {
- /* check ability of address space for growth before umnap */
+ /* check ability of address space for growth before unmap */
PVOID BaseAddress = (PBYTE)map->address + map->limit;
SIZE_T RegionSize = limit - map->limit;
status = NtAllocateVirtualMemory(GetCurrentProcess(), &BaseAddress, 0,
&RegionSize, MEM_RESERVE, PAGE_NOACCESS);
+ if (status == /* STATUS_CONFLICTING_ADDRESSES */ 0xC0000018)
+ return MDBX_RESULT_TRUE;
if (!NT_SUCCESS(status))
return ntstatus2errcode(status);
@@ -1411,9 +1464,13 @@ MDBX_INTERNAL_FUNC int mdbx_mresize(int flags, mdbx_mmap_t *map, size_t size,
bailout:
map->address = NULL;
map->current = map->limit = 0;
- if (ReservedAddress)
- (void)NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
- &ReservedSize, MEM_RELEASE);
+ if (ReservedAddress) {
+ ReservedSize = 0;
+ status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
+ &ReservedSize, MEM_RELEASE);
+ assert(NT_SUCCESS(status));
+ (void)status;
+ }
return err;
}
@@ -1464,6 +1521,7 @@ retry_file_and_section:
if (ReservedAddress) {
/* release reserved address space */
+ ReservedSize = 0;
status = NtFreeVirtualMemory(GetCurrentProcess(), &ReservedAddress,
&ReservedSize, MEM_RELEASE);
ReservedAddress = NULL;
@@ -1528,15 +1586,17 @@ retry_mapview:;
}
if (limit != map->limit) {
-#if defined(_GNU_SOURCE) && (defined(__linux__) || defined(__gnu_linux__))
- void *ptr = mremap(map->address, map->limit, limit,
- /* LY: in case changing the mapping size calling code
- must guarantees the absence of competing threads,
- and a willingness to another base address */
- MREMAP_MAYMOVE);
+#if defined(MREMAP_MAYMOVE)
+ void *ptr = mremap(map->address, map->limit, limit, 0);
if (ptr == MAP_FAILED) {
rc = errno;
- return (rc == EAGAIN || rc == ENOMEM) ? MDBX_RESULT_TRUE : rc;
+ switch (rc) {
+ case EAGAIN:
+ case ENOMEM:
+ case EFAULT /* MADV_DODUMP / MADV_DONTDUMP are mixed for mmap-range */:
+ rc = MDBX_RESULT_TRUE;
+ }
+ return rc;
}
map->address = ptr;
map->limit = limit;
@@ -1544,15 +1604,17 @@ retry_mapview:;
#ifdef MADV_DONTFORK
if (unlikely(madvise(map->address, map->limit, MADV_DONTFORK) != 0))
return errno;
-#endif
+#endif /* MADV_DONTFORK */
#ifdef MADV_NOHUGEPAGE
(void)madvise(map->address, map->limit, MADV_NOHUGEPAGE);
-#endif
+#endif /* MADV_NOHUGEPAGE */
-#else
+#else /* MREMAP_MAYMOVE */
+ /* TODO: Perhaps here it is worth to implement suspend/resume threads
+ * and perform unmap/map as like for Windows. */
rc = MDBX_RESULT_TRUE;
-#endif /* _GNU_SOURCE && __linux__ */
+#endif /* !MREMAP_MAYMOVE */
}
#endif
return rc;
diff --git a/libs/libmdbx/src/src/elements/osal.h b/libs/libmdbx/src/src/elements/osal.h
index 815aae185f..c590ee51c9 100644
--- a/libs/libmdbx/src/src/elements/osal.h
+++ b/libs/libmdbx/src/src/elements/osal.h
@@ -566,13 +566,23 @@ enum mdbx_syncmode_bits {
};
MDBX_INTERNAL_FUNC int mdbx_filesync(mdbx_filehandle_t fd,
- enum mdbx_syncmode_bits mode_bits);
+ const enum mdbx_syncmode_bits mode_bits);
MDBX_INTERNAL_FUNC int mdbx_ftruncate(mdbx_filehandle_t fd, uint64_t length);
MDBX_INTERNAL_FUNC int mdbx_fseek(mdbx_filehandle_t fd, uint64_t pos);
MDBX_INTERNAL_FUNC int mdbx_filesize(mdbx_filehandle_t fd, uint64_t *length);
-MDBX_INTERNAL_FUNC int mdbx_openfile(const char *pathname, int flags,
- mode_t mode, mdbx_filehandle_t *fd,
- bool exclusive);
+
+enum mdbx_openfile_purpose {
+ MDBX_OPEN_DXB_READ = 0,
+ MDBX_OPEN_DXB_LAZY = 1,
+ MDBX_OPEN_DXB_DSYNC = 2,
+ MDBX_OPEN_LCK = 3,
+ MDBX_OPEN_COPY = 4
+};
+
+MDBX_INTERNAL_FUNC int mdbx_openfile(const enum mdbx_openfile_purpose purpose,
+ const MDBX_env *env, const char *pathname,
+ mdbx_filehandle_t *fd,
+ mode_t unix_mode_bits);
MDBX_INTERNAL_FUNC int mdbx_closefile(mdbx_filehandle_t fd);
MDBX_INTERNAL_FUNC int mdbx_removefile(const char *pathname);
MDBX_INTERNAL_FUNC int mdbx_is_pipe(mdbx_filehandle_t fd);
diff --git a/libs/libmdbx/src/src/elements/version.c b/libs/libmdbx/src/src/elements/version.c
index 741fe7b541..58234230c4 100644
--- a/libs/libmdbx/src/src/elements/version.c
+++ b/libs/libmdbx/src/src/elements/version.c
@@ -4,7 +4,7 @@
#include "internals.h"
#if MDBX_VERSION_MAJOR != 0 || \
- MDBX_VERSION_MINOR != 4
+ MDBX_VERSION_MINOR != 5
#error "API version mismatch! Had `git fetch --tags` done?"
#endif
@@ -12,35 +12,35 @@ static const char sourcery[] = STRINGIFY(MDBX_BUILD_SOURCERY);
__dll_export
#ifdef __attribute_used__
-__attribute_used__
+ __attribute_used__
#elif defined(__GNUC__) || __has_attribute(__used__)
-__attribute__((__used__))
+ __attribute__((__used__))
#endif
#ifdef __attribute_externally_visible__
-__attribute_externally_visible__
+ __attribute_externally_visible__
#elif (defined(__GNUC__) && !defined(__clang__)) || \
__has_attribute(__externally_visible__)
-__attribute__((__externally_visible__))
+ __attribute__((__externally_visible__))
#endif
-const mdbx_version_info mdbx_version = {
- 0,
- 4,
- 0,
- 1692,
- {"2019-12-05T01:43:57+03:00", "456d64170249e6108e90da02648de8c7707e2ed0", "3dccbb25a7710d4eff5fb136182b7bb0abfc4914",
- "v0.4.0-12-g3dccbb2"},
- sourcery };
+ const mdbx_version_info mdbx_version = {
+ 0,
+ 5,
+ 0,
+ 1879,
+ {"2020-01-07T22:30:21+03:00", "f982437718fa106bde7985a976752c08e95599a8", "3ffcc1722de83ce4cd5a37772d3b41e8aea02042",
+ "v0.5.0-10-g3ffcc17"},
+ sourcery};
__dll_export
#ifdef __attribute_used__
-__attribute_used__
+ __attribute_used__
#elif defined(__GNUC__) || __has_attribute(__used__)
-__attribute__((__used__))
+ __attribute__((__used__))
#endif
#ifdef __attribute_externally_visible__
-__attribute_externally_visible__
+ __attribute_externally_visible__
#elif (defined(__GNUC__) && !defined(__clang__)) || \
__has_attribute(__externally_visible__)
-__attribute__((__externally_visible__))
+ __attribute__((__externally_visible__))
#endif
-const char *const mdbx_sourcery_anchor = sourcery;
+ const char *const mdbx_sourcery_anchor = sourcery;
diff --git a/libs/libmdbx/src/src/tools/mdbx_chk.c b/libs/libmdbx/src/src/tools/mdbx_chk.c
index ce1a4a0efd..61fb74c7e5 100644
--- a/libs/libmdbx/src/src/tools/mdbx_chk.c
+++ b/libs/libmdbx/src/src/tools/mdbx_chk.c
@@ -93,7 +93,7 @@ size_t userdb_count, skipped_subdb;
uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages,
unused_pages, backed_pages;
unsigned verbose;
-bool ignore_wrong_order, quiet;
+bool ignore_wrong_order, quiet, dont_traversal;
const char *only_subdb;
struct problem {
@@ -626,22 +626,23 @@ static int process_db(MDBX_dbi dbi_handle, char *dbi_name, visitor *handler,
error("too many DBIs or out of memory\n");
return MDBX_ENOMEM;
}
- const uint64_t subtotal_pages =
- ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages;
- if (subtotal_pages != dbi->pages.total)
- error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "subtotal",
- subtotal_pages, dbi->pages.total);
- if (ms.ms_branch_pages != dbi->pages.branch)
- error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch",
- ms.ms_branch_pages, dbi->pages.branch);
- const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed;
- if (ms.ms_leaf_pages != allleaf_pages)
- error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "all-leaf",
- ms.ms_leaf_pages, allleaf_pages);
- if (ms.ms_overflow_pages != dbi->pages.large_volume)
- error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
- "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume);
-
+ if (!dont_traversal) {
+ const uint64_t subtotal_pages =
+ ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages;
+ if (subtotal_pages != dbi->pages.total)
+ error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
+ "subtotal", subtotal_pages, dbi->pages.total);
+ if (ms.ms_branch_pages != dbi->pages.branch)
+ error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch",
+ ms.ms_branch_pages, dbi->pages.branch);
+ const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed;
+ if (ms.ms_leaf_pages != allleaf_pages)
+ error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
+ "all-leaf", ms.ms_leaf_pages, allleaf_pages);
+ if (ms.ms_overflow_pages != dbi->pages.large_volume)
+ error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n",
+ "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume);
+ }
rc = mdbx_cursor_open(txn, dbi_handle, &mc);
if (rc) {
error("mdbx_cursor_open failed, error %d %s\n", rc, mdbx_strerror(rc));
@@ -758,18 +759,19 @@ bailout:
}
static void usage(char *prog) {
- fprintf(stderr,
- "usage: %s [-V] [-v] [-n] [-q] [-c] [-w] [-d] [-i] [-s subdb] dbpath\n"
- " -V\t\tprint version and exit\n"
- " -v\t\tmore verbose, could be used multiple times\n"
- " -n\t\tNOSUBDIR mode for open\n"
- " -q\t\tbe quiet\n"
- " -c\t\tforce cooperative mode (don't try exclusive)\n"
- " -w\t\tlock DB for writing while checking\n"
- " -d\t\tdisable page-by-page traversal of B-tree\n"
- " -i\t\tignore wrong order errors (for custom comparators case)\n"
- " -s subdb\tprocess a specific subdatabase only\n",
- prog);
+ fprintf(
+ stderr,
+ "usage: %s [-V] [-v] [-n] [-q] [-c] [-w] [-d] [-i] [-s subdb] dbpath\n"
+ " -V\t\tprint version and exit\n"
+ " -v\t\tmore verbose, could be used multiple times\n"
+ " -n\t\tNOSUBDIR mode for open\n"
+ " -q\t\tbe quiet\n"
+ " -c\t\tforce cooperative mode (don't try exclusive)\n"
+ " -w\t\tlock DB for writing while checking\n"
+ " -d\t\tdisable page-by-page traversal of B-tree\n"
+ " -i\t\tignore wrong order errors (for custom comparators case)\n"
+ " -s subdb\tprocess a specific subdatabase only\n",
+ prog);
exit(EXIT_INTERRUPTED);
}
@@ -912,7 +914,6 @@ int main(int argc, char *argv[]) {
char *prog = argv[0];
char *envname;
int problems_maindb = 0, problems_freedb = 0, problems_meta = 0;
- bool dont_traversal = false;
bool locked = false;
double elapsed;
@@ -1171,6 +1172,14 @@ int main(int argc, char *argv[]) {
}
printf(", %" PRIu64 " pages\n",
envinfo.mi_geo.current / envinfo.mi_dxb_pagesize);
+#if defined(_WIN32) || defined(_WIN64)
+ if (envinfo.mi_geo.shrink && envinfo.mi_geo.current != envinfo.mi_geo.upper)
+ print(" WARNING: Due Windows system limitations a "
+ "file couldn't\n be truncated while database "
+ "is opened. So, the size of\n database file "
+ "may by large than the database itself,\n "
+ "until it will be closed or reopened in read-write mode.\n");
+#endif
print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64
", lag %" PRIi64 "\n",
envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid,
diff --git a/libs/libmdbx/src/src/tools/mdbx_stat.c b/libs/libmdbx/src/src/tools/mdbx_stat.c
index bd70eb7006..461f569afb 100644
--- a/libs/libmdbx/src/src/tools/mdbx_stat.c
+++ b/libs/libmdbx/src/src/tools/mdbx_stat.c
@@ -72,7 +72,7 @@ static int reader_list_func(void *ctx, int num, int slot, mdbx_pid_t pid,
size_t bytes_used, size_t bytes_retained) {
(void)ctx;
if (num == 1)
- printf("Reader Table Status\n"
+ printf("Reader Table\n"
" #\tslot\t%6s %*s %20s %10s %13s %13s\n",
"pid", (int)sizeof(size_t) * 2, "thread", "txnid", "lag", "used",
"retained");
@@ -212,21 +212,28 @@ int main(int argc, char *argv[]) {
mei.mi_geo.shrink, mei.mi_geo.lower / mst.ms_psize,
mei.mi_geo.upper / mst.ms_psize, mei.mi_geo.grow / mst.ms_psize,
mei.mi_geo.shrink / mst.ms_psize);
+ printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n",
+ mei.mi_mapsize, mei.mi_mapsize / mst.ms_psize);
printf(" Current datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize);
+#if defined(_WIN32) || defined(_WIN64)
+ if (mei.mi_geo.shrink && mei.mi_geo.current != mei.mi_geo.upper)
+ printf(" WARNING: Due Windows system limitations a "
+ "file couldn't\n be truncated while database "
+ "is opened. So, the size of\n database file "
+ "may by large than the database itself,\n "
+ "until it will be closed or reopened in read-write mode.\n");
+#endif
} else {
printf(" Fixed datafile: %" PRIu64 " bytes, %" PRIu64 " pages\n",
mei.mi_geo.current, mei.mi_geo.current / mst.ms_psize);
}
- printf(" Current mapsize: %" PRIu64 " bytes, %" PRIu64 " pages \n",
- mei.mi_mapsize, mei.mi_mapsize / mst.ms_psize);
- printf(" Number of pages used: %" PRIu64 "\n", mei.mi_last_pgno + 1);
printf(" Last transaction ID: %" PRIu64 "\n", mei.mi_recent_txnid);
- printf(" Tail transaction ID: %" PRIu64 " (%" PRIi64 ")\n",
+ printf(" Latter reader transaction ID: %" PRIu64 " (%" PRIi64 ")\n",
mei.mi_latter_reader_txnid,
mei.mi_latter_reader_txnid - mei.mi_recent_txnid);
printf(" Max readers: %u\n", mei.mi_maxreaders);
- printf(" Number of readers used: %u\n", mei.mi_numreaders);
+ printf(" Number of reader slots uses: %u\n", mei.mi_numreaders);
} else {
/* LY: zap warnings from gcc */
memset(&mst, 0, sizeof(mst));
@@ -262,7 +269,7 @@ int main(int argc, char *argv[]) {
pgno_t pages = 0, *iptr;
pgno_t reclaimable = 0;
- printf("Freelist Status\n");
+ printf("Garbage Collection\n");
dbi = 0;
rc = mdbx_cursor_open(txn, dbi, &cursor);
if (rc) {
@@ -343,20 +350,23 @@ int main(int argc, char *argv[]) {
if (envinfo) {
uint64_t value = mei.mi_mapsize / mst.ms_psize;
double percent = value / 100.0;
- printf("Page Allocation Info\n");
- printf(" Max pages: %" PRIu64 " 100%%\n", value);
+ printf("Page Usage\n");
+ printf(" Total: %" PRIu64 " 100%%\n", value);
+
+ value = mei.mi_geo.current / mst.ms_psize;
+ printf(" Backed: %" PRIu64 " %.1f%%\n", value, value / percent);
value = mei.mi_last_pgno + 1;
- printf(" Pages used: %" PRIu64 " %.1f%%\n", value, value / percent);
+ printf(" Allocated: %" PRIu64 " %.1f%%\n", value, value / percent);
value = mei.mi_mapsize / mst.ms_psize - (mei.mi_last_pgno + 1);
printf(" Remained: %" PRIu64 " %.1f%%\n", value, value / percent);
value = mei.mi_last_pgno + 1 - pages;
- printf(" Used now: %" PRIu64 " %.1f%%\n", value, value / percent);
+ printf(" Used: %" PRIu64 " %.1f%%\n", value, value / percent);
value = pages;
- printf(" Unallocated: %" PRIu64 " %.1f%%\n", value, value / percent);
+ printf(" GC: %" PRIu64 " %.1f%%\n", value, value / percent);
value = pages - reclaimable;
printf(" Detained: %" PRIu64 " %.1f%%\n", value, value / percent);
@@ -368,7 +378,7 @@ int main(int argc, char *argv[]) {
mei.mi_mapsize / mst.ms_psize - (mei.mi_last_pgno + 1) + reclaimable;
printf(" Available: %" PRIu64 " %.1f%%\n", value, value / percent);
} else
- printf(" Free pages: %" PRIaPGNO "\n", pages);
+ printf(" GC: %" PRIaPGNO " pages\n", pages);
}
rc = mdbx_dbi_open(txn, subname, 0, &dbi);
diff --git a/libs/libmdbx/src/test/CMakeLists.txt b/libs/libmdbx/src/test/CMakeLists.txt
index bb4abd5d6d..0cc22182c9 100644
--- a/libs/libmdbx/src/test/CMakeLists.txt
+++ b/libs/libmdbx/src/test/CMakeLists.txt
@@ -1,10 +1,10 @@
-set(TARGET mdbx_test)
-project(${TARGET})
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+ set(TEST_OSAL windows)
+else()
+ set(TEST_OSAL unix)
+endif()
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-cast-qual")
-
-add_executable(${TARGET}
+add_executable(mdbx_test
base.h
cases.cc
chrono.cc
@@ -21,7 +21,7 @@ add_executable(${TARGET}
log.h
main.cc
osal.h
- osal-unix.cc
+ osal-${TEST_OSAL}.cc
test.cc
test.h
try.cc
@@ -29,9 +29,25 @@ add_executable(${TARGET}
utils.h
append.cc
ttl.cc
+ nested.cc
)
-target_link_libraries(${TARGET}
- mdbx
- )
+set_target_properties(mdbx_test PROPERTIES
+ INTERPROCEDURAL_OPTIMIZATION $<BOOL:${INTERPROCEDURAL_OPTIMIZATION}>
+ CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
+
+if(CC_HAS_FASTMATH)
+ target_compile_options(mdbx_test PRIVATE "-ffast-math")
+endif()
+if(CC_HAS_VISIBILITY AND (LTO_ENABLED OR INTERPROCEDURAL_OPTIMIZATION))
+ set_target_properties(mdbx_test PROPERTIES LINK_FLAGS "-fvisibility=hidden")
+endif()
+
+target_link_libraries(mdbx_test mdbx ${LIB_MATH} ${CMAKE_THREAD_LIBS_INIT})
+if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+ target_link_libraries(mdbx_test winmm.lib)
+endif()
+if(UNIX AND NOT SUBPROJECT)
+ add_subdirectory(pcrf)
+endif()
diff --git a/libs/libmdbx/src/test/append.cc b/libs/libmdbx/src/test/append.cc
index 273f68b810..a30351dcde 100644
--- a/libs/libmdbx/src/test/append.cc
+++ b/libs/libmdbx/src/test/append.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -15,7 +15,6 @@
#include "test.h"
bool testcase_append::run() {
- MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("append: bailout-prepare due '%s'", mdbx_strerror(err));
@@ -54,7 +53,7 @@ bool testcase_append::run() {
}
log_trace("append: append-a %" PRIu64, serial);
- generate_pair(serial, key, data);
+ generate_pair(serial);
int cmp = inserted_number ? mdbx_cmp(txn_guard.get(), dbi, &key->value,
&last_key->value)
: 1;
diff --git a/libs/libmdbx/src/test/base.h b/libs/libmdbx/src/test/base.h
index 5ca134a81d..05dfd1c4e3 100644
--- a/libs/libmdbx/src/test/base.h
+++ b/libs/libmdbx/src/test/base.h
@@ -80,14 +80,19 @@
#include <unordered_set>
#include <vector>
+#define MDBX_INTERNAL_FUNC
+#define MDBX_INTERNAL_VAR extern
+#define MDBX_TOOLS /* Avoid using internal mdbx_assert() */
#include "../mdbx.h"
-#include "../src/defs.h"
-#include "../src/osal.h"
+#include "../src/elements/defs.h"
+#include "../src/elements/osal.h"
#if !defined(__thread) && (defined(_MSC_VER) || defined(__DMC__))
#define __thread __declspec(thread)
#endif /* __thread */
+#include "../src/elements/options.h"
+
#ifdef _MSC_VER
#pragma warning(pop)
#pragma warning(disable : 4201) /* nonstandard extension used : \
diff --git a/libs/libmdbx/src/test/cases.cc b/libs/libmdbx/src/test/cases.cc
index a98834a457..b01169004b 100644
--- a/libs/libmdbx/src/test/cases.cc
+++ b/libs/libmdbx/src/test/cases.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -17,7 +17,6 @@
void configure_actor(unsigned &last_space_id, const actor_testcase testcase,
const char *space_id_cstr, const actor_params &params) {
unsigned wait4id = 0;
-
if (params.waitfor_nops) {
for (auto i = global::actors.rbegin(); i != global::actors.rend(); ++i) {
if (i->is_waitable(params.waitfor_nops)) {
@@ -61,15 +60,15 @@ void testcase_setup(const char *casename, actor_params &params,
unsigned &last_space_id) {
if (strcmp(casename, "basic") == 0) {
log_notice(">>> testcase_setup(%s)", casename);
- configure_actor(last_space_id, ac_jitter, nullptr, params);
- configure_actor(last_space_id, ac_hill, nullptr, params);
- configure_actor(last_space_id, ac_ttl, nullptr, params);
- configure_actor(last_space_id, ac_jitter, nullptr, params);
+ configure_actor(last_space_id, ac_nested, nullptr, params);
configure_actor(last_space_id, ac_hill, nullptr, params);
configure_actor(last_space_id, ac_ttl, nullptr, params);
- configure_actor(last_space_id, ac_try, nullptr, params);
configure_actor(last_space_id, ac_copy, nullptr, params);
configure_actor(last_space_id, ac_append, nullptr, params);
+ configure_actor(last_space_id, ac_jitter, nullptr, params);
+ configure_actor(last_space_id, ac_try, nullptr, params);
+ configure_actor(last_space_id, ac_jitter, nullptr, params);
+ configure_actor(last_space_id, ac_try, nullptr, params);
log_notice("<<< testcase_setup(%s): done", casename);
} else {
failure("unknown testcase `%s`", casename);
diff --git a/libs/libmdbx/src/test/chrono.cc b/libs/libmdbx/src/test/chrono.cc
index 38cb321a81..315d379088 100644
--- a/libs/libmdbx/src/test/chrono.cc
+++ b/libs/libmdbx/src/test/chrono.cc
@@ -16,7 +16,10 @@
namespace chrono {
+#ifndef NSEC_PER_SEC
#define NSEC_PER_SEC 1000000000u
+#endif /* NSEC_PER_SEC */
+
uint32_t ns2fractional(uint32_t ns) {
assert(ns < NSEC_PER_SEC);
/* LY: здесь и далее используется "длинное деление", которое
@@ -30,7 +33,9 @@ uint32_t fractional2ns(uint32_t fractional) {
return (fractional * (uint64_t)NSEC_PER_SEC) >> 32;
}
+#ifndef USEC_PER_SEC
#define USEC_PER_SEC 1000000u
+#endif /* USEC_PER_SEC */
uint32_t us2fractional(uint32_t us) {
assert(us < USEC_PER_SEC);
return ((uint64_t)us << 32) / USEC_PER_SEC;
@@ -40,7 +45,9 @@ uint32_t fractional2us(uint32_t fractional) {
return (fractional * (uint64_t)USEC_PER_SEC) >> 32;
}
+#ifndef MSEC_PER_SEC
#define MSEC_PER_SEC 1000u
+#endif /* MSEC_PER_SEC */
uint32_t ms2fractional(uint32_t ms) {
assert(ms < MSEC_PER_SEC);
return ((uint64_t)ms << 32) / MSEC_PER_SEC;
diff --git a/libs/libmdbx/src/test/chrono.h b/libs/libmdbx/src/test/chrono.h
index 11675195ac..07cdef66cf 100644
--- a/libs/libmdbx/src/test/chrono.h
+++ b/libs/libmdbx/src/test/chrono.h
@@ -15,7 +15,6 @@
#pragma once
#include "base.h"
-#include "log.h"
#include "utils.h"
namespace chrono {
@@ -24,7 +23,7 @@ namespace chrono {
typedef union time {
uint64_t fixedpoint;
- struct {
+ __anonymous_struct_extension__ struct {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint32_t fractional;
union {
diff --git a/libs/libmdbx/src/test/config.cc b/libs/libmdbx/src/test/config.cc
index dd150e9a91..eedfd7b2d4 100644
--- a/libs/libmdbx/src/test/config.cc
+++ b/libs/libmdbx/src/test/config.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -294,14 +294,21 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option,
//-----------------------------------------------------------------------------
-const struct option_verb mode_bits[] = {
- {"rdonly", MDBX_RDONLY}, {"mapasync", MDBX_MAPASYNC},
- {"utterly", MDBX_UTTERLY_NOSYNC}, {"nosubdir", MDBX_NOSUBDIR},
- {"nosync", MDBX_NOSYNC}, {"nometasync", MDBX_NOMETASYNC},
- {"writemap", MDBX_WRITEMAP}, {"notls", MDBX_NOTLS},
- {"nordahead", MDBX_NORDAHEAD}, {"nomeminit", MDBX_NOMEMINIT},
- {"coalesce", MDBX_COALESCE}, {"lifo", MDBX_LIFORECLAIM},
- {"perturb", MDBX_PAGEPERTURB}, {nullptr, 0}};
+const struct option_verb mode_bits[] = {{"rdonly", MDBX_RDONLY},
+ {"mapasync", MDBX_MAPASYNC},
+ {"nosync-utterly", MDBX_UTTERLY_NOSYNC},
+ {"nosubdir", MDBX_NOSUBDIR},
+ {"nosync-safe", MDBX_SAFE_NOSYNC},
+ {"nometasync", MDBX_NOMETASYNC},
+ {"writemap", MDBX_WRITEMAP},
+ {"notls", MDBX_NOTLS},
+ {"nordahead", MDBX_NORDAHEAD},
+ {"nomeminit", MDBX_NOMEMINIT},
+ {"coalesce", MDBX_COALESCE},
+ {"lifo", MDBX_LIFORECLAIM},
+ {"perturb", MDBX_PAGEPERTURB},
+ {"accede", MDBX_ACCEDE},
+ {nullptr, 0}};
const struct option_verb table_bits[] = {
{"key.reverse", MDBX_REVERSEKEY},
@@ -314,7 +321,7 @@ const struct option_verb table_bits[] = {
static void dump_verbs(const char *caption, size_t bits,
const struct option_verb *verbs) {
- log_info("%s: 0x%" PRIx64 " = ", caption, (uint64_t)bits);
+ log_verbose("%s: 0x%" PRIx64 " = ", caption, (uint64_t)bits);
const char *comma = "";
while (verbs->mask && bits) {
@@ -330,7 +337,7 @@ static void dump_verbs(const char *caption, size_t bits,
}
static void dump_duration(const char *caption, unsigned duration) {
- log_info("%s: ", caption);
+ log_verbose("%s: ", caption);
if (duration) {
if (duration > 24 * 3600)
logging::feed("%u_", duration / (24 * 3600));
@@ -347,84 +354,92 @@ void dump(const char *title) {
logging::local_suffix indent(title);
for (auto i = global::actors.begin(); i != global::actors.end(); ++i) {
- log_info("#%u, testcase %s, space_id/table %u\n", i->actor_id,
- testcase2str(i->testcase), i->space_id);
+ log_verbose("#%u, testcase %s, space_id/table %u\n", i->actor_id,
+ testcase2str(i->testcase), i->space_id);
indent.push();
if (i->params.loglevel) {
- log_info("log: level %u, %s\n", i->params.loglevel,
- i->params.pathname_log.empty() ? "console"
- : i->params.pathname_log.c_str());
+ log_verbose("log: level %u, %s\n", i->params.loglevel,
+ i->params.pathname_log.empty()
+ ? "console"
+ : i->params.pathname_log.c_str());
}
- log_info("database: %s, size %" PRIuPTR "[%" PRIiPTR "..%" PRIiPTR
- ", %i %i, %i]\n",
- i->params.pathname_db.c_str(), i->params.size_now,
- i->params.size_lower, i->params.size_upper,
- i->params.shrink_threshold, i->params.growth_step,
- i->params.pagesize);
+ log_verbose("database: %s, size %" PRIuPTR "[%" PRIiPTR "..%" PRIiPTR
+ ", %i %i, %i]\n",
+ i->params.pathname_db.c_str(), i->params.size_now,
+ i->params.size_lower, i->params.size_upper,
+ i->params.shrink_threshold, i->params.growth_step,
+ i->params.pagesize);
dump_verbs("mode", i->params.mode_flags, mode_bits);
dump_verbs("table", i->params.table_flags, table_bits);
if (i->params.test_nops)
- log_info("iterations/records %u\n", i->params.test_nops);
+ log_verbose("iterations/records %u\n", i->params.test_nops);
else
dump_duration("duration", i->params.test_duration);
if (i->params.nrepeat)
- log_info("repeat %u\n", i->params.nrepeat);
+ log_verbose("repeat %u\n", i->params.nrepeat);
else
- log_info("repeat ETERNALLY\n");
+ log_verbose("repeat ETERNALLY\n");
- log_info("threads %u\n", i->params.nthreads);
+ log_verbose("threads %u\n", i->params.nthreads);
- log_info(
+ log_verbose(
"keygen.params: case %s, width %u, mesh %u, rotate %u, offset %" PRIu64
", split %u/%u\n",
keygencase2str(i->params.keygen.keycase), i->params.keygen.width,
i->params.keygen.mesh, i->params.keygen.rotate, i->params.keygen.offset,
i->params.keygen.split,
i->params.keygen.width - i->params.keygen.split);
- log_info("keygen.seed: %u\n", i->params.keygen.seed);
- log_info("key: minlen %u, maxlen %u\n", i->params.keylen_min,
- i->params.keylen_max);
- log_info("data: minlen %u, maxlen %u\n", i->params.datalen_min,
- i->params.datalen_max);
+ log_verbose("keygen.seed: %u\n", i->params.keygen.seed);
+ log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min,
+ i->params.keylen_max);
+ log_verbose("data: minlen %u, maxlen %u\n", i->params.datalen_min,
+ i->params.datalen_max);
- log_info("batch: read %u, write %u\n", i->params.batch_read,
- i->params.batch_write);
+ log_verbose("batch: read %u, write %u\n", i->params.batch_read,
+ i->params.batch_write);
if (i->params.waitfor_nops)
- log_info("wait: actor %u for %u ops\n", i->wait4id,
- i->params.waitfor_nops);
+ log_verbose("wait: actor %u for %u ops\n", i->wait4id,
+ i->params.waitfor_nops);
else if (i->params.delaystart)
dump_duration("delay", i->params.delaystart);
else
- log_info("no-delay\n");
+ log_verbose("no-delay\n");
if (i->params.inject_writefaultn)
- log_info("inject-writefault on %u ops\n", i->params.inject_writefaultn);
+ log_verbose("inject-writefault on %u ops\n",
+ i->params.inject_writefaultn);
else
- log_info("no-inject-writefault\n");
+ log_verbose("no-inject-writefault\n");
- log_info("limits: readers %u, tables %u\n", i->params.max_readers,
- i->params.max_tables);
+ log_verbose("limits: readers %u, tables %u, txn-bytes %zu\n",
+ i->params.max_readers, i->params.max_tables,
+ mdbx_limits_txnsize_max(i->params.pagesize));
+
+ log_verbose("drop table: %s\n", i->params.drop_table ? "Yes" : "No");
+ log_verbose("ignore MDBX_MAP_FULL error: %s\n",
+ i->params.ignore_dbfull ? "Yes" : "No");
+ log_verbose("verifying by speculum: %s\n",
+ i->params.speculum ? "Yes" : "No");
- log_info("drop table: %s\n", i->params.drop_table ? "Yes" : "No");
- log_info("ignore MDBX_MAP_FULL error: %s\n",
- i->params.ignore_dbfull ? "Yes" : "No");
indent.pop();
}
dump_duration("timeout", global::config::timeout_duration_seconds);
- log_info("cleanup: before %s, after %s\n",
- global::config::cleanup_before ? "Yes" : "No",
- global::config::cleanup_after ? "Yes" : "No");
-
- log_info("failfast: %s\n", global::config::failfast ? "Yes" : "No");
- log_info("progress indicator: %s\n",
- global::config::progress_indicator ? "Yes" : "No");
+ log_verbose("cleanup: before %s, after %s\n",
+ global::config::cleanup_before ? "Yes" : "No",
+ global::config::cleanup_after ? "Yes" : "No");
+
+ log_verbose("failfast: %s\n", global::config::failfast ? "Yes" : "No");
+ log_verbose("progress indicator: %s\n",
+ global::config::progress_indicator ? "Yes" : "No");
+ log_verbose("console mode: %s\n",
+ global::config::console_mode ? "Yes" : "No");
}
} /* namespace config */
@@ -452,26 +467,31 @@ const std::string actor_config::serialize(const char *prefix) const {
checksum.push(params.pathname_db);
result.append(params.pathname_db);
- result.append("|");
+ result.push_back('|');
checksum.push(params.pathname_log);
result.append(params.pathname_log);
- result.append("|");
+ result.push_back('|');
static_assert(std::is_pod<actor_params_pod>::value,
"actor_params_pod should by POD");
result.append(data2hex(static_cast<const actor_params_pod *>(&params),
sizeof(actor_params_pod), checksum));
- result.append("|");
+ result.push_back('|');
static_assert(std::is_pod<actor_config_pod>::value,
"actor_config_pod should by POD");
result.append(data2hex(static_cast<const actor_config_pod *>(this),
sizeof(actor_config_pod), checksum));
- result.append("|");
+ result.push_back('|');
+ result.push_back(global::config::progress_indicator ? 'Y' : 'N');
+ checksum.push(global::config::progress_indicator);
+ result.push_back(global::config::console_mode ? 'Y' : 'N');
+ checksum.push(global::config::console_mode);
+ result.push_back('|');
result.append(osal_serialize(checksum));
- result.append("|");
+ result.push_back('|');
result.append(std::to_string(checksum.value));
return result;
@@ -535,6 +555,20 @@ bool actor_config::deserialize(const char *str, actor_config &config) {
TRACE("<< actor_config::deserialize: slash-5\n");
return false;
}
+ if ((str[0] == 'Y' || str[0] == 'N') && (str[1] == 'Y' || str[1] == 'N')) {
+ global::config::progress_indicator = str[0] == 'Y';
+ checksum.push(global::config::progress_indicator);
+ global::config::console_mode = str[1] == 'Y';
+ checksum.push(global::config::console_mode);
+ str = slash + 1;
+
+ slash = strchr(str, '|');
+ if (!slash) {
+ TRACE("<< actor_config::deserialize: slash-6\n");
+ return false;
+ }
+ }
+
if (!config.osal_deserialize(str, slash, checksum)) {
TRACE("<< actor_config::deserialize: osal\n");
return false;
@@ -556,10 +590,7 @@ unsigned actor_params::mdbx_keylen_min() const {
}
unsigned actor_params::mdbx_keylen_max() const {
- return (table_flags & MDBX_INTEGERKEY)
- ? 8
- : std::min((unsigned)mdbx_limits_keysize_max(pagesize),
- (unsigned)UINT16_MAX);
+ return (unsigned)mdbx_limits_keysize_max(pagesize, table_flags);
}
unsigned actor_params::mdbx_datalen_min() const {
@@ -567,10 +598,6 @@ unsigned actor_params::mdbx_datalen_min() const {
}
unsigned actor_params::mdbx_datalen_max() const {
- return (table_flags & MDBX_INTEGERDUP)
- ? 8
- : std::min((table_flags & MDBX_DUPSORT)
- ? (unsigned)mdbx_limits_keysize_max(pagesize)
- : (unsigned)MDBX_MAXDATASIZE,
- (unsigned)UINT16_MAX);
+ return std::min((unsigned)UINT16_MAX,
+ (unsigned)mdbx_limits_valsize_max(pagesize, table_flags));
}
diff --git a/libs/libmdbx/src/test/config.h b/libs/libmdbx/src/test/config.h
index 89889d8eb6..9e083f3f22 100644
--- a/libs/libmdbx/src/test/config.h
+++ b/libs/libmdbx/src/test/config.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -29,12 +29,13 @@ enum actor_testcase {
ac_try,
ac_copy,
ac_append,
- ac_ttl
+ ac_ttl,
+ ac_nested
};
enum actor_status {
as_unknown,
- as_debuging,
+ as_debugging,
as_running,
as_successful,
as_killed,
@@ -123,8 +124,6 @@ inline bool parse_option_intptr(int argc, char *const argv[], int &narg,
#pragma pack(push, 1)
struct keygen_params_pod {
- keygen_case keycase;
-
/* Параметры генератора пар key-value.
*
* Ключи и значения генерируются по задаваемым параметрам на основе "плоской"
@@ -231,11 +230,10 @@ struct keygen_params_pod {
uint8_t split;
uint32_t seed;
uint64_t offset;
+ keygen_case keycase;
};
struct actor_params_pod {
- unsigned loglevel;
-
unsigned mode_flags;
unsigned table_flags;
intptr_t size_lower;
@@ -264,8 +262,10 @@ struct actor_params_pod {
unsigned max_tables;
keygen_params_pod keygen;
+ uint8_t loglevel;
bool drop_table;
bool ignore_dbfull;
+ bool speculum;
};
struct actor_config_pod {
diff --git a/libs/libmdbx/src/test/copy.cc b/libs/libmdbx/src/test/copy.cc
index e239d41e34..ff53153e1a 100644
--- a/libs/libmdbx/src/test/copy.cc
+++ b/libs/libmdbx/src/test/copy.cc
@@ -1,4 +1,4 @@
-#include "test.h"
+#include "test.h"
void testcase_copy::copy_db(const bool with_compaction) {
int err = osal_removefile(copy_pathname);
diff --git a/libs/libmdbx/src/test/dead.cc b/libs/libmdbx/src/test/dead.cc
index a1a8b5f9de..8f83bbeb2e 100644
--- a/libs/libmdbx/src/test/dead.cc
+++ b/libs/libmdbx/src/test/dead.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
diff --git a/libs/libmdbx/src/test/hill.cc b/libs/libmdbx/src/test/hill.cc
index 1b03ddf0fc..9d9890951a 100644
--- a/libs/libmdbx/src/test/hill.cc
+++ b/libs/libmdbx/src/test/hill.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -15,12 +15,13 @@
#include "test.h"
bool testcase_hill::run() {
- MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("hill: bailout-prepare due '%s'", mdbx_strerror(err));
- return true;
+ return false;
}
+ speculum.clear();
+ speculum_commited.clear();
/* LY: тест "холмиком":
* - сначала наполняем таблицу циклическими CRUD-манипуляциями,
@@ -63,6 +64,7 @@ bool testcase_hill::run() {
uint64_t commited_serial = serial_count;
unsigned txn_nops = 0;
+ bool rc = false;
while (should_continue()) {
const keygen::serial_t a_serial = serial_count;
if (unlikely(!keyvalue_maker.increment(serial_count, 1))) {
@@ -78,53 +80,74 @@ bool testcase_hill::run() {
log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift,
a_serial);
generate_pair(a_serial, a_key, a_data_1, age_shift);
- err = mdbx_put(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
- insert_flags);
+
+ err = insert(a_key, a_data_1, insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at insert-a due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_put(insert-a.1)", err);
}
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after insert-a, before commit");
+ goto bailout;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
commited_serial = a_serial;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after insert-a, after commit");
+ goto bailout;
+ }
}
// создаем вторую запись из пары
log_trace("uphill: insert-b %" PRIu64, b_serial);
generate_pair(b_serial, b_key, b_data, 0);
- err = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
- insert_flags);
+ err = insert(b_key, b_data, insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at insert-b due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_put(insert-b)", err);
}
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after insert-b, before commit");
+ goto bailout;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
commited_serial = a_serial;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after insert-b, after commit");
+ goto bailout;
+ }
}
// обновляем данные в первой записи
@@ -132,52 +155,73 @@ bool testcase_hill::run() {
a_serial);
generate_pair(a_serial, a_key, a_data_0, 0);
checkdata("uphill: update-a", dbi, a_key->value, a_data_1->value);
- err = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_0->value,
- &a_data_1->value, update_flags);
+ err = replace(a_key, a_data_0, a_data_1, update_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at update-a due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_replace(update-a: 1->0)", err);
}
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after update-a, before commit");
+ goto bailout;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
commited_serial = a_serial;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after update-a, after commit");
+ goto bailout;
+ }
}
// удаляем вторую запись
log_trace("uphill: delete-b %" PRIu64, b_serial);
checkdata("uphill: delete-b", dbi, b_key->value, b_data->value);
- err = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
+ err = remove(b_key, b_data);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("uphill: bailout at delete-b due '%s'", mdbx_strerror(err));
txn_restart(true, false);
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_del(b)", err);
}
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after delete-b, before commit");
+ goto bailout;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("uphill: bailout at commit due '%s'", mdbx_strerror(err));
serial_count = commited_serial;
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
commited_serial = a_serial;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("uphill: bailout after delete-b, after commit");
+ goto bailout;
+ }
}
report(1);
@@ -204,101 +248,145 @@ bool testcase_hill::run() {
generate_pair(a_serial, a_key, a_data_0, 0);
generate_pair(a_serial, a_key, a_data_1, age_shift);
checkdata("downhill: update-a", dbi, a_key->value, a_data_0->value);
- err = mdbx_replace(txn_guard.get(), dbi, &a_key->value, &a_data_1->value,
- &a_data_0->value, update_flags);
+ err = replace(a_key, a_data_1, a_data_0, update_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at update-a due '%s'",
mdbx_strerror(err));
txn_end(true);
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_put(update-a: 0->1)", err);
}
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after update-a, before commit");
+ break;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after update-a, after commit");
+ break;
+ }
}
// создаем вторую запись из пары
log_trace("downhill: insert-b %" PRIu64, b_serial);
generate_pair(b_serial, b_key, b_data, 0);
- err = mdbx_put(txn_guard.get(), dbi, &b_key->value, &b_data->value,
- insert_flags);
+ err = insert(b_key, b_data, insert_flags);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at insert-a due '%s'",
mdbx_strerror(err));
txn_end(true);
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_put(insert-b)", err);
}
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after insert-b, before commit");
+ break;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after insert-b, after commit");
+ break;
+ }
}
// удаляем первую запись
log_trace("downhill: delete-a (age %" PRIu64 ") %" PRIu64, age_shift,
a_serial);
checkdata("downhill: delete-a", dbi, a_key->value, a_data_1->value);
- err = mdbx_del(txn_guard.get(), dbi, &a_key->value, &a_data_1->value);
+ err = remove(a_key, a_data_1);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at delete-a due '%s'",
mdbx_strerror(err));
txn_end(true);
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_del(a)", err);
}
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after delete-a, before commit");
+ break;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after delete-a, after commit");
+ break;
+ }
}
// удаляем вторую запись
log_trace("downhill: delete-b %" PRIu64, b_serial);
checkdata("downhill: delete-b", dbi, b_key->value, b_data->value);
- err = mdbx_del(txn_guard.get(), dbi, &b_key->value, &b_data->value);
+ err = remove(b_key, b_data);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
log_notice("downhill: bailout at delete-b due '%s'",
mdbx_strerror(err));
txn_end(true);
+ speculum = speculum_commited;
break;
}
failure_perror("mdbx_del(b)", err);
}
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after delete-b, before commit");
+ break;
+ }
if (++txn_nops >= config.params.batch_write) {
err = breakable_restart();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("downhill: bailout at commit due '%s'", mdbx_strerror(err));
+ speculum = speculum_commited;
break;
}
+ speculum_commited = speculum;
txn_nops = 0;
+ if (!speculum_verify()) {
+ log_notice("downhill: bailout after delete-b, after commit");
+ goto bailout;
+ }
}
report(1);
}
+ rc = speculum_verify();
+bailout:
if (txn_guard) {
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS))
@@ -312,10 +400,10 @@ bool testcase_hill::run() {
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("hill: bailout-clean due '%s'", mdbx_strerror(err));
- return true;
+ return rc;
}
} else
db_table_close(dbi);
}
- return true;
+ return rc;
}
diff --git a/libs/libmdbx/src/test/jitter.cc b/libs/libmdbx/src/test/jitter.cc
index 82d1d764ff..c7b8bccb51 100644
--- a/libs/libmdbx/src/test/jitter.cc
+++ b/libs/libmdbx/src/test/jitter.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -15,10 +15,26 @@
#include "test.h"
bool testcase_jitter::run() {
+ int err;
+ size_t upper_limit = config.params.size_upper;
+ if (upper_limit < 1)
+ upper_limit = config.params.size_now * 2;
+
while (should_continue()) {
jitter_delay();
db_open();
+ if (upper_limit < 1) {
+ MDBX_envinfo info;
+ err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &info,
+ sizeof(info));
+ if (err)
+ failure_perror("mdbx_env_info_ex()", err);
+ upper_limit = (info.mi_geo.upper < INTPTR_MAX)
+ ? (intptr_t)info.mi_geo.upper
+ : INTPTR_MAX;
+ }
+
if (flipcoin()) {
jitter_delay();
txn_begin(true);
@@ -27,19 +43,29 @@ bool testcase_jitter::run() {
txn_end(flipcoin());
}
+ const bool coin4size = flipcoin();
jitter_delay();
txn_begin(mode_readonly());
jitter_delay();
if (!mode_readonly()) {
fetch_canary();
update_canary(1);
- /* TODO:
- * - db_setsize()
- * ...
- */
+ err = mdbx_env_set_geometry(
+ db_guard.get(), -1, -1,
+ coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, -1, -1);
+ if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE &&
+ err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE)
+ failure_perror("mdbx_env_set_geometry-1", err);
}
txn_end(flipcoin());
+ err = mdbx_env_set_geometry(
+ db_guard.get(), -1, -1,
+ !coin4size ? upper_limit * 2 / 3 : upper_limit * 3 / 2, -1, -1, -1);
+ if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE &&
+ err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE)
+ failure_perror("mdbx_env_set_geometry-2", err);
+
if (flipcoin()) {
jitter_delay();
txn_begin(true);
@@ -48,6 +74,12 @@ bool testcase_jitter::run() {
}
jitter_delay();
+ err =
+ mdbx_env_set_geometry(db_guard.get(), -1, -1, upper_limit, -1, -1, -1);
+ if (err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE &&
+ err != MDBX_MAP_FULL && err != MDBX_TOO_LARGE)
+ failure_perror("mdbx_env_set_geometry-3", err);
+
db_close();
/* just 'align' nops with other tests with batching */
diff --git a/libs/libmdbx/src/test/keygen.cc b/libs/libmdbx/src/test/keygen.cc
index 0110b049bf..c2098e6ef9 100644
--- a/libs/libmdbx/src/test/keygen.cc
+++ b/libs/libmdbx/src/test/keygen.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -72,7 +72,7 @@ serial_t injective(const serial_t serial,
}
void __hot maker::pair(serial_t serial, const buffer &key, buffer &value,
- serial_t value_age) {
+ serial_t value_age, const bool keylen_changeable) {
assert(mapping.width >= serial_minwith && mapping.width <= serial_maxwith);
assert(mapping.split <= mapping.width);
assert(mapping.mesh <= mapping.width);
@@ -131,14 +131,65 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value,
log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial,
value_serial);
- mk(key_serial, key_essentials, *key);
- mk(value_serial, value_essentials, *value);
+ mk_begin(key_serial, key_essentials, *key);
+ mk_begin(value_serial, value_essentials, *value);
+
+#if 0 /* unused for now */
+ if (key->value.iov_len + value->value.iov_len > pair_maxlen) {
+ unsigned extra = key->value.iov_len + value->value.iov_len - pair_maxlen;
+ if (keylen_changeable &&
+ key->value.iov_len > std::max(8u, (unsigned)key_essentials.minlen)) {
+#if defined(__GNUC__) || defined(__clang__)
+ const bool coin = __builtin_parityll(serial) != 0;
+#else
+ const bool coin = INT64_C(0xF2CEECA9989BD96A) * int64_t(serial) < 0;
+#endif
+ if (coin) {
+ const unsigned gap =
+ key->value.iov_len - std::max(8u, (unsigned)key_essentials.minlen);
+ const unsigned chop = std::min(gap, extra);
+ log_trace("keygen-pair: chop %u key-len %u -> %u", chop,
+ (unsigned)key->value.iov_len,
+ (unsigned)key->value.iov_len - chop);
+ key->value.iov_len -= chop;
+ extra -= chop;
+ }
+ }
+ if (extra && value->value.iov_len >
+ std::max(8u, (unsigned)value_essentials.minlen)) {
+ const unsigned gap = value->value.iov_len -
+ std::max(8u, (unsigned)value_essentials.minlen);
+ const unsigned chop = std::min(gap, extra);
+ log_trace("keygen-pair: chop %u value-len %u -> %u", chop,
+ (unsigned)value->value.iov_len,
+ (unsigned)value->value.iov_len - chop);
+ value->value.iov_len -= chop;
+ extra -= chop;
+ }
+ if (keylen_changeable && extra &&
+ key->value.iov_len > std::max(8u, (unsigned)key_essentials.minlen)) {
+ const unsigned gap =
+ key->value.iov_len - std::max(8u, (unsigned)key_essentials.minlen);
+ const unsigned chop = std::min(gap, extra);
+ log_trace("keygen-pair: chop %u key-len %u -> %u", chop,
+ (unsigned)key->value.iov_len,
+ (unsigned)key->value.iov_len - chop);
+ key->value.iov_len -= chop;
+ extra -= chop;
+ }
+ }
+#else
+ (void)keylen_changeable;
+#endif /* unused for now */
+
+ mk_continue(key_serial, key_essentials, *key);
+ mk_continue(value_serial, value_essentials, *value);
if (log_enabled(logging::trace)) {
- char dump_key[128], dump_value[128];
+ char dump_key[4096], dump_value[4096];
log_trace("keygen-pair: key %s, value %s",
- mdbx_dkey(&key->value, dump_key, sizeof(dump_key)),
- mdbx_dkey(&value->value, dump_value, sizeof(dump_value)));
+ mdbx_dump_val(&key->value, dump_key, sizeof(dump_key)),
+ mdbx_dump_val(&value->value, dump_value, sizeof(dump_value)));
}
}
@@ -146,19 +197,22 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id,
unsigned thread_number) {
key_essentials.flags =
actor.table_flags & (MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT);
- assert(actor.keylen_min <= UINT8_MAX);
- key_essentials.minlen = (uint8_t)actor.keylen_min;
- assert(actor.keylen_max <= UINT16_MAX);
- key_essentials.maxlen = (uint16_t)actor.keylen_max;
+ assert(actor.keylen_min <= UINT16_MAX);
+ key_essentials.minlen = (uint16_t)actor.keylen_min;
+ assert(actor.keylen_max <= UINT32_MAX);
+ key_essentials.maxlen = std::min(
+ (uint32_t)actor.keylen_max,
+ (uint32_t)mdbx_limits_keysize_max(actor.pagesize, key_essentials.flags));
value_essentials.flags =
actor.table_flags & (MDBX_INTEGERDUP | MDBX_REVERSEDUP);
- assert(actor.datalen_min <= UINT8_MAX);
- value_essentials.minlen = (uint8_t)actor.datalen_min;
- assert(actor.datalen_max <= UINT16_MAX);
- value_essentials.maxlen = (uint16_t)actor.datalen_max;
+ assert(actor.datalen_min <= UINT16_MAX);
+ value_essentials.minlen = (uint16_t)actor.datalen_min;
+ assert(actor.datalen_max <= UINT32_MAX);
+ value_essentials.maxlen = std::min(
+ (uint32_t)actor.datalen_max,
+ (uint32_t)mdbx_limits_valsize_max(actor.pagesize, key_essentials.flags));
- assert(thread_number < 2);
(void)thread_number;
mapping = actor.keygen;
salt = (actor.keygen.seed + actor_id) * UINT64_C(14653293970879851569);
@@ -226,18 +280,25 @@ buffer alloc(size_t limit) {
return buffer(ptr);
}
-void __hot maker::mk(const serial_t serial, const essentials &params,
- result &out) {
+void __hot maker::mk_begin(const serial_t serial, const essentials &params,
+ result &out) {
assert(out.limit >= params.maxlen);
assert(params.maxlen >= params.minlen);
assert(params.maxlen >= length(serial));
- out.value.iov_base = out.bytes;
out.value.iov_len =
(params.maxlen > params.minlen)
? params.minlen + serial % (params.maxlen - params.minlen)
: params.minlen;
+ if ((params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) == 0 &&
+ out.value.iov_len < 8)
+ out.value.iov_len = std::max(length(serial), out.value.iov_len);
+}
+
+void __hot maker::mk_continue(const serial_t serial, const essentials &params,
+ result &out) {
+ out.value.iov_base = out.bytes;
if (params.flags & (MDBX_INTEGERKEY | MDBX_INTEGERDUP)) {
assert(params.maxlen == params.minlen);
assert(params.minlen == 4 || params.minlen == 8);
@@ -251,17 +312,13 @@ void __hot maker::mk(const serial_t serial, const essentials &params,
unaligned::store(out.bytes + out.value.iov_len - 8, htobe64(serial));
} else {
out.u64 = htobe64(serial);
- if (out.value.iov_len < 8) {
- out.value.iov_len = std::max(length(serial), out.value.iov_len);
+ if (out.value.iov_len < 8)
out.value.iov_base = out.bytes + 8 - out.value.iov_len;
- }
}
} else {
out.u64 = htole64(serial);
if (out.value.iov_len > 8)
memset(out.bytes + 8, '\0', out.value.iov_len - 8);
- else
- out.value.iov_len = std::max(length(serial), out.value.iov_len);
}
assert(out.value.iov_len >= params.minlen);
diff --git a/libs/libmdbx/src/test/keygen.h b/libs/libmdbx/src/test/keygen.h
index d0299e1e7d..d25b88349f 100644
--- a/libs/libmdbx/src/test/keygen.h
+++ b/libs/libmdbx/src/test/keygen.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -93,7 +93,7 @@ struct result {
//-----------------------------------------------------------------------------
-struct buffer_deleter : public std::unary_function<void, result *> {
+struct buffer_deleter /* : public std::unary_function<void, result *> */ {
void operator()(result *buffer) const { free(buffer); }
};
@@ -107,18 +107,25 @@ class maker {
serial_t salt;
struct essentials {
- uint8_t minlen;
- uint8_t flags;
- uint16_t maxlen;
+ uint16_t minlen;
+ uint16_t flags;
+ uint32_t maxlen;
} key_essentials, value_essentials;
- static void mk(const serial_t serial, const essentials &params, result &out);
+ static void mk_begin(const serial_t serial, const essentials &params,
+ result &out);
+ static void mk_continue(const serial_t serial, const essentials &params,
+ result &out);
+ static void mk(const serial_t serial, const essentials &params, result &out) {
+ mk_begin(serial, params, out);
+ mk_continue(serial, params, out);
+ }
public:
maker() { memset(this, 0, sizeof(*this)); }
void pair(serial_t serial, const buffer &key, buffer &value,
- serial_t value_age);
+ serial_t value_age, const bool keylen_changeable);
void setup(const config::actor_params_pod &actor, unsigned actor_id,
unsigned thread_number);
void make_ordered();
diff --git a/libs/libmdbx/src/test/log.cc b/libs/libmdbx/src/test/log.cc
index 79544e11bb..2b22e28785 100644
--- a/libs/libmdbx/src/test/log.cc
+++ b/libs/libmdbx/src/test/log.cc
@@ -20,7 +20,7 @@ void failure(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fflushall();
- logging::output(logging::failure, fmt, ap);
+ logging::output_nocheckloglevel_ap(logging::failure, fmt, ap);
va_end(ap);
fflushall();
exit(EXIT_FAILURE);
@@ -37,29 +37,18 @@ void __noreturn failure_perror(const char *what, int errnum) {
//-----------------------------------------------------------------------------
-static void mdbx_logger(int type, const char *function, int line,
+static void mdbx_logger(int priority, const char *function, int line,
const char *msg, va_list args) {
- logging::loglevel level = logging::info;
- if (type & MDBX_DBG_EXTRA)
- level = logging::extra;
- if (type & MDBX_DBG_TRACE)
- level = logging::trace;
- if (type & MDBX_DBG_PRINT)
- level = logging::verbose;
-
if (!function)
function = "unknown";
- if (type & MDBX_DBG_ASSERT) {
- log_error("mdbx: assertion failure: %s, %d", function, line);
- level = logging::failure;
- }
- if (logging::output(
- level,
- strncmp(function, "mdbx_", 5) == 0 ? "%s: " : "mdbx: %s: ", function))
- logging::feed_ap(msg, args);
- if (type & MDBX_DBG_ASSERT)
- abort();
+ if (priority == MDBX_LOG_FATAL)
+ log_error("mdbx: fatal failure: %s, %d", function, line);
+
+ logging::output_nocheckloglevel(
+ logging::loglevel(priority),
+ strncmp(function, "mdbx_", 5) == 0 ? "%s: " : "mdbx %s: ", function);
+ logging::feed_ap(msg, args);
}
namespace logging {
@@ -69,19 +58,16 @@ static std::string suffix;
static loglevel level;
static FILE *last;
-void setlevel(loglevel _level) {
- level = (_level > error) ? failure : _level;
- int mdbx_dbg_opts = MDBX_DBG_ASSERT | MDBX_DBG_JITTER | MDBX_DBG_DUMP;
- if (level <= trace)
- mdbx_dbg_opts |= MDBX_DBG_TRACE;
- if (level <= verbose)
- mdbx_dbg_opts |= MDBX_DBG_PRINT;
- int rc = mdbx_setup_debug(mdbx_dbg_opts, mdbx_logger);
+void setlevel(loglevel priority) {
+ level = priority;
+ int rc = mdbx_setup_debug(priority,
+ MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_JITTER,
+ mdbx_logger);
log_trace("set mdbx debug-opts: 0x%02x", rc);
}
-void setup(loglevel _level, const std::string &_prefix) {
- setlevel(_level);
+void setup(loglevel priority, const std::string &_prefix) {
+ setlevel(priority);
prefix = _prefix;
}
@@ -95,10 +81,10 @@ const char *level2str(const loglevel alevel) {
return "extra";
case trace:
return "trace";
+ case debug:
+ return "debug";
case verbose:
return "verbose";
- case info:
- return "info";
case notice:
return "notice";
case warning:
@@ -111,26 +97,28 @@ const char *level2str(const loglevel alevel) {
}
bool output(const loglevel priority, const char *format, ...) {
- if (priority < level)
+ if (lower(priority, level))
return false;
va_list ap;
va_start(ap, format);
- output(priority, format, ap);
+ output_nocheckloglevel_ap(priority, format, ap);
va_end(ap);
return true;
}
-bool output(const logging::loglevel priority, const char *format, va_list ap) {
+void output_nocheckloglevel_ap(const logging::loglevel priority,
+ const char *format, va_list ap) {
if (last) {
putc('\n', last);
fflush(last);
+ if (last == stderr) {
+ putc('\n', stdout);
+ fflush(stdout);
+ }
last = nullptr;
}
- if (priority < level)
- return false;
-
chrono::time now = chrono::now_realtime();
struct tm tm;
#ifdef _MSC_VER
@@ -144,14 +132,14 @@ bool output(const logging::loglevel priority, const char *format, va_list ap) {
last = stdout;
fprintf(last,
- "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05u %-10s %.4s ] %s" /* TODO */,
+ "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */,
tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
- tm.tm_sec, chrono::fractional2us(now.fractional), osal_getpid(),
+ tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(),
prefix.c_str(), level2str(priority), suffix.c_str());
va_list ones;
memset(&ones, 0, sizeof(ones)) /* zap MSVC and other stupid compilers */;
- if (priority >= error)
+ if (same_or_higher(priority, error))
va_copy(ones, ap);
vfprintf(last, format, ap);
@@ -178,29 +166,36 @@ bool output(const logging::loglevel priority, const char *format, va_list ap) {
break;
}
- if (priority >= error) {
+ if (same_or_higher(priority, error)) {
if (last != stderr) {
- fprintf(stderr, "[ %05u %-10s %.4s ] %s", osal_getpid(), prefix.c_str(),
- level2str(priority), suffix.c_str());
+ fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(),
+ prefix.c_str(), level2str(priority), suffix.c_str());
vfprintf(stderr, format, ones);
- if (end != '\n')
- putc('\n', stderr);
- fflush(stderr);
+ if (end == '\n')
+ fflush(stderr);
+ else
+ last = stderr;
}
va_end(ones);
}
-
- return true;
}
bool feed_ap(const char *format, va_list ap) {
if (!last)
return false;
+ if (last == stderr) {
+ va_list ones;
+ va_copy(ones, ap);
+ vfprintf(stdout, format, ones);
+ va_end(ones);
+ }
vfprintf(last, format, ap);
size_t len = strlen(format);
if (len && format[len - 1] == '\n') {
fflush(last);
+ if (last == stderr)
+ fflush(stdout);
last = nullptr;
}
return true;
@@ -242,73 +237,123 @@ void local_suffix::pop() {
local_suffix::~local_suffix() { suffix.erase(trim_pos); }
+void progress_canary(bool active) {
+ static chrono::time progress_timestamp;
+ chrono::time now = chrono::now_motonic();
+
+ if (now.fixedpoint - progress_timestamp.fixedpoint <
+ chrono::from_ms(42).fixedpoint)
+ return;
+
+ if (osal_progress_push(active)) {
+ progress_timestamp = now;
+ return;
+ }
+
+ if (progress_timestamp.fixedpoint == 0) {
+ putc('>', stderr);
+ progress_timestamp = now;
+ } else if (global::config::console_mode) {
+ if (active) {
+ static int last_point = -1;
+ int point = (now.fixedpoint >> 29) & 3;
+ if (point != last_point) {
+ progress_timestamp = now;
+ fprintf(stderr, "%c\b", "-\\|/"[last_point = point]);
+ }
+ } else if (now.fixedpoint - progress_timestamp.fixedpoint >
+ chrono::from_seconds(2).fixedpoint) {
+ progress_timestamp = now;
+ fprintf(stderr, "%c\b", "@*"[now.utc & 1]);
+ }
+ } else {
+ static int count;
+ if (active && now.fixedpoint - progress_timestamp.fixedpoint >
+ chrono::from_seconds(1).fixedpoint) {
+ putc('.', stderr);
+ progress_timestamp = now;
+ ++count;
+ } else if (now.fixedpoint - progress_timestamp.fixedpoint >
+ chrono::from_seconds(5).fixedpoint) {
+ putc("@*"[now.utc & 1], stderr);
+ progress_timestamp = now;
+ ++count;
+ }
+ if (count == 60) {
+ count = 0;
+ putc('\n', stderr);
+ }
+ }
+ fflush(stderr);
+}
+
} // namespace logging
void log_extra(const char *msg, ...) {
- if (logging::extra >= logging::level) {
+ if (logging::same_or_higher(logging::extra, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::extra, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::extra, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
void log_trace(const char *msg, ...) {
- if (logging::trace >= logging::level) {
+ if (logging::same_or_higher(logging::trace, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::trace, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::trace, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
-void log_verbose(const char *msg, ...) {
- if (logging::verbose >= logging::level) {
+void log_debug(const char *msg, ...) {
+ if (logging::same_or_higher(logging::debug, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::verbose, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::debug, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
-void log_info(const char *msg, ...) {
- if (logging::info >= logging::level) {
+void log_verbose(const char *msg, ...) {
+ if (logging::same_or_higher(logging::verbose, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::info, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::verbose, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
void log_notice(const char *msg, ...) {
- if (logging::notice >= logging::level) {
+ if (logging::same_or_higher(logging::notice, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::notice, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::notice, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
void log_warning(const char *msg, ...) {
- if (logging::warning >= logging::level) {
+ if (logging::same_or_higher(logging::warning, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::warning, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::warning, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
}
void log_error(const char *msg, ...) {
- if (logging::error >= logging::level) {
+ if (logging::same_or_higher(logging::error, logging::level)) {
va_list ap;
va_start(ap, msg);
- logging::output(logging::error, msg, ap);
+ logging::output_nocheckloglevel_ap(logging::error, msg, ap);
va_end(ap);
} else
logging::last = nullptr;
@@ -319,7 +364,7 @@ void log_trouble(const char *where, const char *what, int errnum) {
}
bool log_enabled(const logging::loglevel priority) {
- return (priority >= logging::level);
+ return logging::same_or_higher(priority, logging::level);
}
void log_flush(void) { fflushall(); }
diff --git a/libs/libmdbx/src/test/log.h b/libs/libmdbx/src/test/log.h
index 7d6b4012f1..bb8f997b36 100644
--- a/libs/libmdbx/src/test/log.h
+++ b/libs/libmdbx/src/test/log.h
@@ -15,6 +15,7 @@
#pragma once
#include "base.h"
+#include "chrono.h"
void __noreturn usage(void);
void __noreturn __printf_args(1, 2) failure(const char *fmt, ...);
@@ -24,27 +25,47 @@ const char *test_strerror(int errnum);
namespace logging {
enum loglevel {
- extra,
- trace,
- verbose,
- info,
- notice,
- warning,
- error,
- failure,
+ extra = MDBX_LOG_EXTRA,
+ trace = MDBX_LOG_TRACE,
+ debug = MDBX_LOG_DEBUG,
+ verbose = MDBX_LOG_VERBOSE,
+ notice = MDBX_LOG_NOTICE,
+ warning = MDBX_LOG_WARN,
+ error = MDBX_LOG_ERROR,
+ failure = MDBX_LOG_FATAL
};
+inline bool lower(loglevel left, loglevel right) {
+ static_assert(MDBX_LOG_EXTRA > MDBX_LOG_FATAL, "WTF?");
+ return left > right;
+}
+
+inline bool same_or_higher(loglevel left, loglevel right) {
+ return left <= right;
+}
+
const char *level2str(const loglevel level);
-void setup(loglevel level, const std::string &prefix);
+void setup(loglevel priority, const std::string &prefix);
void setup(const std::string &prefix);
-void setlevel(loglevel level);
+void setlevel(loglevel priority);
-bool output(const loglevel priority, const char *format, va_list ap);
+void output_nocheckloglevel_ap(const loglevel priority, const char *format,
+ va_list ap);
bool __printf_args(2, 3)
output(const loglevel priority, const char *format, ...);
bool feed_ap(const char *format, va_list ap);
bool __printf_args(1, 2) feed(const char *format, ...);
+void inline __printf_args(2, 3)
+ output_nocheckloglevel(const loglevel priority, const char *format, ...) {
+ va_list ap;
+ va_start(ap, format);
+ output_nocheckloglevel_ap(priority, format, ap);
+ va_end(ap);
+}
+
+void progress_canary(bool active);
+
class local_suffix {
protected:
size_t trim_pos;
@@ -66,8 +87,8 @@ public:
void __printf_args(1, 2) log_extra(const char *msg, ...);
void __printf_args(1, 2) log_trace(const char *msg, ...);
+void __printf_args(1, 2) log_debug(const char *msg, ...);
void __printf_args(1, 2) log_verbose(const char *msg, ...);
-void __printf_args(1, 2) log_info(const char *msg, ...);
void __printf_args(1, 2) log_notice(const char *msg, ...);
void __printf_args(1, 2) log_warning(const char *msg, ...);
void __printf_args(1, 2) log_error(const char *msg, ...);
diff --git a/libs/libmdbx/src/test/long_stochastic.sh b/libs/libmdbx/src/test/long_stochastic.sh
index 954d18268a..58ec6d5c6e 100644
--- a/libs/libmdbx/src/test/long_stochastic.sh
+++ b/libs/libmdbx/src/test/long_stochastic.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
if ! which make cc c++ tee lz4 >/dev/null; then
echo "Please install the following prerequisites: make cc c++ tee lz4" >&2
exit 1
@@ -7,19 +7,42 @@ fi
set -euo pipefail
UNAME="$(uname -s 2>/dev/null || echo Unknown)"
+
+## NOTE: Valgrind could produce some false-positive warnings
+## in multi-process environment with shared memory.
+## For instance, when the process "A" explicitly marks a memory
+## region as "undefined", the process "B" fill it,
+## and after this process "A" read such region, etc.
+#VALGRIND="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt"
+
+###############################################################################
+# 1. clean data from prev runs and examine available RAM
+
+if [[ -v VALGRIND && ! -z "$VALGRIND" ]]; then
+ rm -f valgrind-*.log
+else
+ VALGRIND=time
+fi
+
+WANNA_MOUNT=0
case ${UNAME} in
Linux)
MAKE=make
if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then
+ for old_test_dir in $(ls -d /dev/shm/mdbx-test.[0-9]*); do
+ rm -rf $old_test_dir
+ done
TESTDB_DIR="/dev/shm/mdbx-test.$$"
fi
mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/*
+
if LC_ALL=C free | grep -q -i available; then
ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 7) / 1024))
else
ram_avail_mb=$(($(LC_ALL=C free | grep -i Mem: | tr -s [:blank:] ' ' | cut -d ' ' -f 4) / 1024))
fi
;;
+
FreeBSD)
MAKE=gmake
if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then
@@ -27,18 +50,46 @@ case ${UNAME} in
umount $old_test_dir && rm -r $old_test_dir
done
TESTDB_DIR="/tmp/mdbx-test.$$"
- rm -rf $TESTDB_DIR && mkdir -p $TESTDB_DIR && mount -t tmpfs tmpfs $TESTDB_DIR
+ rm -rf $TESTDB_DIR && mkdir -p $TESTDB_DIR
+ WANNA_MOUNT=1
else
mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/*
fi
+
ram_avail_mb=$(($(LC_ALL=C vmstat -s | grep -ie '[0-9] pages free$' | cut -d p -f 1) * ($(LC_ALL=C vmstat -s | grep -ie '[0-9] bytes per page$' | cut -d b -f 1) / 1024) / 1024))
;;
+
+ Darwin)
+ MAKE=make
+ if [[ ! -v TESTDB_DIR || -z "$TESTDB_DIR" ]]; then
+ for vol in $(ls -d /Volumes/mdx[0-9]*[0-9]tst); do
+ disk=$(mount | grep $vol | cut -d ' ' -f 1)
+ echo "umount: volume $vol disk $disk"
+ hdiutil unmount $vol -force
+ hdiutil detach $disk
+ done
+ TESTDB_DIR="/Volumes/mdx$$tst"
+ WANNA_MOUNT=1
+ else
+ mkdir -p $TESTDB_DIR && rm -f $TESTDB_DIR/*
+ fi
+
+ pagesize=$(($(LC_ALL=C vm_stat | grep -o 'page size of [0-9]\+ bytes' | cut -d' ' -f 4) / 1024))
+ freepages=$(LC_ALL=C vm_stat | grep '^Pages free:' | grep -o '[0-9]\+\.$' | cut -d'.' -f 1)
+ ram_avail_mb=$((pagesize * freepages / 1024))
+ echo "pagesize ${pagesize}K, freepages ${freepages}, ram_avail_mb ${ram_avail_mb}"
+
+ ;;
+
*)
echo "FIXME: ${UNAME} not supported by this script"
exit 2
;;
esac
+###############################################################################
+# 2. estimate reasonable RAM space for test-db
+
echo "=== ${ram_avail_mb}M RAM available"
ram_reserve4logs_mb=1234
if [ $ram_avail_mb -lt $ram_reserve4logs_mb ]; then
@@ -70,16 +121,47 @@ if [ $db_size_mb -gt 3072 ]; then
fi
echo "=== use ${db_size_mb}M for DB"
-${MAKE} TESTDB=${TESTDB_DIR}/smoke.db TESTLOG=${TESTDB_DIR}/smoke.log check
+###############################################################################
+# 3. Create test-directory in ramfs/tmpfs, i.e. create/format/mount if required
+case ${UNAME} in
+ Linux)
+ ;;
+
+ FreeBSD)
+ if [[ WANNA_MOUNT ]]; then
+ mount -t tmpfs tmpfs $TESTDB_DIR
+ fi
+ ;;
+
+ Darwin)
+ if [[ WANNA_MOUNT ]]; then
+ ramdisk_size_mb=$((42 + db_size_mb * 2 + ram_reserve4logs_mb))
+ number_of_sectors=$((ramdisk_size_mb * 2048))
+ ramdev=$(hdiutil attach -nomount ram://${number_of_sectors})
+ diskutil erasevolume ExFAT "mdx$$tst" ${ramdev}
+ fi
+ ;;
+
+ *)
+ echo "FIXME: ${UNAME} not supported by this script"
+ exit 2
+ ;;
+esac
+
+###############################################################################
+# 4. Run basic test, i.e. `make check`
+
+${MAKE} TEST_DB=${TESTDB_DIR}/smoke.db TEST_LOG=${TESTDB_DIR}/smoke.log check
rm -f ${TESTDB_DIR}/*
###############################################################################
+# 5. run stochastic iterations
function rep9 { printf "%*s" $1 '' | tr ' ' '9'; }
function join { local IFS="$1"; shift; echo "$*"; }
function bit2option { local -n arr=$1; (( ($2&(1<<$3)) != 0 )) && echo -n '+' || echo -n '-'; echo "${arr[$3]}"; }
-options=(writemap coalesce lifo)
+options=(writemap coalesce lifo notls)
function bits2list {
local -n arr=$1
@@ -95,13 +177,13 @@ function probe {
echo "=============================================== $(date)"
echo "${caption}: $*"
rm -f ${TESTDB_DIR}/* \
- && ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_DIR}/long.db "$@" | lz4 > ${TESTDB_DIR}/long.log.lz4 \
- && ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \
- && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \
+ && ${VALGRIND} ./mdbx_test --ignore-dbfull --repeat=42 --pathname=${TESTDB_DIR}/long.db "$@" | lz4 > ${TESTDB_DIR}/long.log.lz4 \
+ && ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db | tee ${TESTDB_DIR}/long-chk.log \
+ && ([ ! -e ${TESTDB_DIR}/long.db-copy ] || ${VALGRIND} ./mdbx_chk -nvvv ${TESTDB_DIR}/long.db-copy | tee ${TESTDB_DIR}/long-chk-copy.log) \
|| (echo "FAILED"; exit 1)
}
-###############################################################################
+#------------------------------------------------------------------------------
count=0
for nops in $(seq 2 6); do
diff --git a/libs/libmdbx/src/test/main.cc b/libs/libmdbx/src/test/main.cc
index 959359a515..c00837fa72 100644
--- a/libs/libmdbx/src/test/main.cc
+++ b/libs/libmdbx/src/test/main.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -14,9 +14,100 @@
#include "test.h"
+#if !(defined(_WIN32) || defined(_WIN64))
+#include <sys/resource.h>
+#include <sys/time.h>
+#endif /* !Windows */
+
void __noreturn usage(void) {
- printf("usage:\n"
- "\tFIXME\n");
+ puts(
+ "usage:\n"
+ " --help or -h Show this text\n"
+ "Common parameters:\n"
+ " --pathname=... Path and/or name of database files\n"
+ " --repeat=N Set repeat counter\n"
+ " --threads=N Number of thread (unsunpported for now)\n"
+ " --timeout=N[s|m|h|d] Set timeout in seconds/minutes/hours/days\n"
+ " --failfast[=YES/no] Lill all actors on first failure/error\n"
+ " --max-readers=N See mdbx_env_set_maxreaders() description\n"
+ " --max-tables=N Se mdbx_env_set_maxdbs() description\n"
+ " --dump-config[=YES/no] Dump entire test config before run\n"
+ " --progress[=YES/no] Enable/disable progress `canary`\n"
+ " --console[=yes/no] Enable/disable console-like output\n"
+ " --cleanup-before[=YES/no] Cleanup/remove and re-create database\n"
+ " --cleanup-after[=YES/no] Cleanup/remove database after completion\n"
+ "Database size control:\n"
+ " --pagesize=... Database page size: min, max, 256..65536\n"
+ " --size-lower=N[K|M|G|T] Lower-bound of size in Kb/Mb/Gb/Tb\n"
+ " --size-upper Upper-bound of size in Kb/Mb/Gb/Tb\n"
+ " --size Initial size in Kb/Mb/Gb/Tb\n"
+ " --shrink-threshold Shrink threshold in Kb/Mb/Gb/Tb\n"
+ " --growth-step Grow step in Kb/Mb/Gb/Tb\n"
+ "Predefined complext scenarios/cases:\n"
+ " --case=... Only `basic` scenario implemented for now\n"
+ " basic == Simultaneous multi-process execution\n"
+ " of test-actors: nested,hill,ttl,copy,append,jitter,try\n"
+ "Test actors:\n"
+ " --hill Fill-up and empty-down\n"
+ " by CRUD-operation quads\n"
+ " --ttl Stochastic time-to-live simulation\n"
+ " --nested Nested transactionы\n"
+ " with stochastic-size bellows\n"
+ " --jitter Jitter/delays simulation\n"
+ " --try Try write-transaction, no more\n"
+ " --copy Online copy/backup\n"
+ " --append Append-mode insertions\n"
+ " --dead.reader Dead-reader simulator\n"
+ " --dead.writer Dead-writer simulator\n"
+ "Actor options:\n"
+ " --batch.read=N Read-operations batch size\n"
+ " --batch.write=N Write-operations batch size\n"
+ " --delay=N | --no-delay (no)Delay test-actor before start\n"
+ " --wait4ops=N | --no-wait4ops (no)Wait for previous test-actor\n"
+ " completes # ops before start\n"
+ " --duration=N[s|m|h|d] Define running duration\n"
+ " --nops=N[K|M|G|T] Define number of operations/steps\n"
+ " --inject-writefault[=yes|NO] TBD (see the source code)\n"
+ " --drop[=yes|NO] Drop key-value space/table on "
+ "completion\n"
+ " --ignore-dbfull[=yes|NO] Ignore MDBX_MAP_FULL error\n"
+ " --speculum[=yes|NO] Use internal `speculum` to check "
+ "dataset\n"
+ "Keys and Value:\n"
+ " --keylen.min=N Minimal keys length\n"
+ " --keylen.max=N Miximal keys length\n"
+ " --datalen.min=N Minimal data length\n"
+ " --datalen.max=N Miximal data length\n"
+ " --keygen.width=N TBD (see the source code)\n"
+ " --keygen.mesh=N TBD (see the source code)\n"
+ " --keygen.seed=N TBD (see the source code)\n"
+ " --keygen.split=N TBD (see the source code)\n"
+ " --keygen.rotate=N TBD (see the source code)\n"
+ " --keygen.offset=N TBD (see the source code)\n"
+ " --keygen.case=random Generator case (only `random` for now)\n"
+ "Database operation mode:\n"
+ " --mode={[+-]FLAG}[,[+-]FLAG]...\n"
+ " nosubdir == MDBX_NOSUBDIR\n"
+ " rdonly == MDBX_RDONLY\n"
+ " nometasync == MDBX_NOMETASYNC\n"
+ " lifo == MDBX_LIFORECLAIM\n"
+ " coalesce == MDBX_COALESCE\n"
+ " nosync-safe == MDBX_SAFE_NOSYNC\n"
+ " writemap == MDBX_WRITEMAP\n"
+ " mapasync == MDBX_MAPASYNC\n"
+ " nosync-utterly == MDBX_UTTERLY_NOSYNC\n"
+ " perturb == MDBX_PAGEPERTURB\n"
+ " notls == MDBX_NOTLS\n"
+ " nordahead == MDBX_NORDAHEAD\n"
+ " nomeminit == MDBX_NOMEMINIT\n"
+ "Key-value space/table options:\n"
+ " --table={[+-]FLAG}[,[+-]FLAG]...\n"
+ " key.reverse == MDBX_REVERSEKEY\n"
+ " key.integer == MDBX_INTEGERKEY\n"
+ " data.dups == MDBX_DUPSORT\n"
+ " data.integer == MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT\n"
+ " data.fixed == MDBX_DUPFIXED | MDBX_DUPSORT\n"
+ " data.reverse == MDBX_REVERSEDUP | MDBX_DUPSORT\n");
exit(EXIT_FAILURE);
}
@@ -25,17 +116,15 @@ void __noreturn usage(void) {
void actor_params::set_defaults(const std::string &tmpdir) {
pathname_log = "";
loglevel =
-#ifdef NDEBUG
- logging::info;
-#elif defined(_WIN32) || defined(_WIN64)
+#if defined(NDEBUG) || defined(_WIN32) || defined(_WIN64)
logging::verbose;
#else
logging::trace;
#endif
pathname_db = tmpdir + "mdbx-test.db";
- mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NORDAHEAD |
- MDBX_NOMEMINIT | MDBX_COALESCE | MDBX_LIFORECLAIM;
+ mode_flags = MDBX_NOSUBDIR | MDBX_WRITEMAP | MDBX_MAPASYNC | MDBX_NOMEMINIT |
+ MDBX_COALESCE | MDBX_LIFORECLAIM | MDBX_ACCEDE;
table_flags = MDBX_DUPSORT;
size_lower = -1;
@@ -73,6 +162,7 @@ void actor_params::set_defaults(const std::string &tmpdir) {
drop_table = false;
ignore_dbfull = false;
+ speculum = false;
max_readers = 42;
max_tables = 42;
@@ -82,7 +172,8 @@ void actor_params::set_defaults(const std::string &tmpdir) {
global::config::cleanup_before = true;
global::config::cleanup_after = true;
global::config::failfast = true;
- global::config::progress_indicator = osal_istty(STDERR_FILENO);
+ global::config::progress_indicator = true;
+ global::config::console_mode = osal_istty(STDERR_FILENO);
}
namespace global {
@@ -103,6 +194,7 @@ bool cleanup_before;
bool cleanup_after;
bool failfast;
bool progress_indicator;
+bool console_mode;
} /* namespace config */
} /* namespace global */
@@ -130,7 +222,7 @@ int main(int argc, char *const argv[]) {
#endif /* _DEBUG */
if (argc < 2)
- failure("No parameters given\n");
+ failure("No parameters given. Try --help\n");
if (argc == 2 && strncmp(argv[1], global::thunk_param_prefix,
strlen(global::thunk_param_prefix)) == 0)
@@ -139,6 +231,10 @@ int main(int argc, char *const argv[]) {
? EXIT_SUCCESS
: EXIT_FAILURE;
+ if (argc == 2 &&
+ (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0))
+ usage();
+
actor_params params;
params.set_defaults(osal_tempdir());
global::config::dump_config = true;
@@ -168,8 +264,8 @@ int main(int argc, char *const argv[]) {
}
if (config::parse_option(argc, argv, narg, "pagesize", params.pagesize,
- mdbx_limits_pgsize_min(),
- mdbx_limits_pgsize_max())) {
+ int(mdbx_limits_pgsize_min()),
+ int(mdbx_limits_pgsize_max()))) {
const unsigned keylen_max = params.mdbx_keylen_max();
if (params.keylen_min > keylen_max)
params.keylen_min = keylen_max;
@@ -182,6 +278,17 @@ int main(int argc, char *const argv[]) {
params.datalen_max = datalen_max;
continue;
}
+ if (config::parse_option(argc, argv, narg, "repeat", params.nrepeat,
+ config::no_scale))
+ continue;
+ if (config::parse_option(argc, argv, narg, "threads", params.nthreads,
+ config::no_scale, 1, 64))
+ continue;
+ if (config::parse_option(argc, argv, narg, "timeout",
+ global::config::timeout_duration_seconds,
+ config::duration, 1))
+ continue;
+
if (config::parse_option_intptr(argc, argv, narg, "size-lower",
params.size_lower,
mdbx_limits_dbsize_min(params.pagesize),
@@ -231,17 +338,6 @@ int main(int argc, char *const argv[]) {
keycase_setup(value, params);
continue;
}
-
- if (config::parse_option(argc, argv, narg, "repeat", params.nrepeat,
- config::no_scale))
- continue;
- if (config::parse_option(argc, argv, narg, "threads", params.nthreads,
- config::no_scale, 1, 64))
- continue;
- if (config::parse_option(argc, argv, narg, "timeout",
- global::config::timeout_duration_seconds,
- config::duration, 1))
- continue;
if (config::parse_option(argc, argv, narg, "keylen.min", params.keylen_min,
config::no_scale, params.mdbx_keylen_min(),
params.mdbx_keylen_max())) {
@@ -296,6 +392,8 @@ int main(int argc, char *const argv[]) {
if (config::parse_option(argc, argv, narg, "ignore-dbfull",
params.ignore_dbfull))
continue;
+ if (config::parse_option(argc, argv, narg, "speculum", params.speculum))
+ continue;
if (config::parse_option(argc, argv, narg, "dump-config",
global::config::dump_config))
continue;
@@ -316,7 +414,7 @@ int main(int argc, char *const argv[]) {
params.delaystart = 0;
continue;
}
- if (config::parse_option(argc, argv, narg, "no-wait", nullptr)) {
+ if (config::parse_option(argc, argv, narg, "no-wait4ops", nullptr)) {
params.waitfor_nops = 0;
continue;
}
@@ -346,6 +444,10 @@ int main(int argc, char *const argv[]) {
configure_actor(last_space_id, ac_deadwrite, value, params);
continue;
}
+ if (config::parse_option(argc, argv, narg, "try", nullptr)) {
+ configure_actor(last_space_id, ac_try, value, params);
+ continue;
+ }
if (config::parse_option(argc, argv, narg, "copy", nullptr)) {
configure_actor(last_space_id, ac_copy, value, params);
continue;
@@ -358,17 +460,24 @@ int main(int argc, char *const argv[]) {
configure_actor(last_space_id, ac_ttl, value, params);
continue;
}
+ if (config::parse_option(argc, argv, narg, "nested", nullptr)) {
+ configure_actor(last_space_id, ac_nested, value, params);
+ continue;
+ }
if (config::parse_option(argc, argv, narg, "failfast",
global::config::failfast))
continue;
if (config::parse_option(argc, argv, narg, "progress",
global::config::progress_indicator))
continue;
+ if (config::parse_option(argc, argv, narg, "console",
+ global::config::console_mode))
+ continue;
if (*argv[narg] != '-')
testcase_setup(argv[narg], params, last_space_id);
else
- failure("Unknown option '%s'\n", argv[narg]);
+ failure("Unknown option '%s'. Try --help\n", argv[narg]);
}
if (global::config::dump_config)
@@ -456,8 +565,8 @@ int main(int argc, char *const argv[]) {
if (!actor)
continue;
- log_info("actor #%u, id %d, pid %u: %s\n", actor->actor_id,
- actor->space_id, pid, status2str(status));
+ log_verbose("actor #%u, id %d, pid %ld: %s\n", actor->actor_id,
+ actor->space_id, (long)pid, status2str(status));
if (status > as_running) {
left -= 1;
if (status != as_successful) {
@@ -480,9 +589,37 @@ int main(int argc, char *const argv[]) {
log_notice("RESULT: %s\n", failed ? "Failed" : "Successful");
if (global::config::cleanup_before) {
if (failed)
- log_info("skip cleanup");
+ log_verbose("skip cleanup");
else
cleanup();
}
+
+#if !(defined(_WIN32) || defined(_WIN64))
+ struct rusage spent;
+ if (!getrusage(global::singlemode ? RUSAGE_SELF : RUSAGE_CHILDREN, &spent)) {
+ log_notice("%6s: user %f, system %f", "CPU",
+ spent.ru_utime.tv_sec + spent.ru_utime.tv_usec * 1e-6,
+ spent.ru_stime.tv_sec + spent.ru_stime.tv_usec * 1e-6);
+#if defined(__linux__) || defined(__gnu_linux__) || defined(__FreeBSD__) || \
+ defined(__NetBSD__) || defined(__OpenBSD__) || defined(__BSD__) || \
+ defined(__bsdi__) || defined(__DragonFly__) || defined(__APPLE__) || \
+ defined(__MACH__) || defined(__sun)
+ log_notice("%6s: read %ld, write %ld", "IOPs", spent.ru_inblock,
+ spent.ru_oublock);
+ if (spent.ru_maxrss > 0)
+ log_notice("%6s: %ld Kb", "RAM",
+ spent.ru_maxrss
+#if defined(__sun)
+ * getpagesize() / 1024u
+#elif defined(__APPLE__)
+ / 1024u
+#endif
+ );
+ log_notice("%6s: reclaims %ld, faults %ld, swaps %ld", "Paging",
+ spent.ru_minflt, spent.ru_majflt, spent.ru_nswap);
+#endif /* Linux */
+ }
+#endif /* !Windows */
+
return failed ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/libs/libmdbx/src/test/osal-unix.cc b/libs/libmdbx/src/test/osal-unix.cc
index 0157bace23..4b8694c708 100644
--- a/libs/libmdbx/src/test/osal-unix.cc
+++ b/libs/libmdbx/src/test/osal-unix.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -21,112 +21,266 @@
#include <sys/wait.h>
#include <unistd.h>
-#ifdef __APPLE__
+#ifndef MDBX_LOCKING
+#error "Opps, MDBX_LOCKING is undefined!"
+#endif
+
+#if defined(__APPLE__) && (MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008)
#include "darwin/pthread_barrier.c"
+#endif /* __APPLE__ && MDBX_LOCKING >= MDBX_LOCKING_POSIX2001 */
+
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
+
+#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+#include <semaphore.h>
+
+#if __cplusplus >= 201103L
+#include <atomic>
+static __inline __maybe_unused int atomic_decrement(std::atomic_int *p) {
+ return std::atomic_fetch_sub(p, 1) - 1;
+}
+#else
+static __inline __maybe_unused int atomic_decrement(volatile int *p) {
+#if defined(__GNUC__) || defined(__clang__)
+ return __sync_sub_and_fetch(p, 1);
+#elif defined(_MSC_VER)
+ STATIC_ASSERT(sizeof(volatile long) == sizeof(volatile int));
+ return _InterlockedDecrement((volatile long *)p);
+#elif defined(__APPLE__)
+ return OSAtomicDecrement32Barrier((volatile int *)p);
+#else
+#error FIXME: Unsupported compiler
#endif
+}
+#endif /* C++11 */
+#endif /* MDBX_LOCKING == MDBX_LOCKING_POSIX1988 */
+
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+static int ipc;
+static pid_t ipc_overlord_pid;
+static void ipc_remove(void) {
+ if (ipc_overlord_pid == getpid())
+ semctl(ipc, 0, IPC_RMID, nullptr);
+}
+#else
struct shared_t {
+#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008
pthread_barrier_t barrier;
pthread_mutex_t mutex;
- size_t conds_size;
- pthread_cond_t conds[1];
+ size_t count;
+ pthread_cond_t events[1];
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+ struct {
+#if __cplusplus >= 201103L
+ std::atomic_int countdown;
+#else
+ volatile int countdown;
+#endif /* C++11 */
+ sem_t sema;
+ } barrier;
+ size_t count;
+ sem_t events[1];
+#else
+#error "FIXME"
+#endif /* MDBX_LOCKING */
};
-
static shared_t *shared;
+#endif /* MDBX_LOCKING != MDBX_LOCKING_SYSV */
void osal_wait4barrier(void) {
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+ struct sembuf op;
+ op.sem_num = 0;
+ op.sem_op = -1;
+ op.sem_flg = IPC_NOWAIT;
+ if (semop(ipc, &op, 1))
+ failure_perror("semop(dec)", errno);
+ op.sem_op = 0;
+ op.sem_flg = 0;
+ if (semop(ipc, &op, 1))
+ failure_perror("semop(wait)", errno);
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008
assert(shared != nullptr && shared != MAP_FAILED);
- int rc = pthread_barrier_wait(&shared->barrier);
- if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) {
- failure_perror("pthread_barrier_wait(shared)", rc);
- }
+ int err = pthread_barrier_wait(&shared->barrier);
+ if (err != 0 && err != PTHREAD_BARRIER_SERIAL_THREAD)
+ failure_perror("pthread_barrier_wait(shared)", err);
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+ assert(shared != nullptr && shared != MAP_FAILED);
+ int err = (atomic_decrement(&shared->barrier.countdown) > 0 &&
+ sem_wait(&shared->barrier.sema))
+ ? errno
+ : 0;
+ if (err != 0)
+ failure_perror("sem_wait(shared)", err);
+ if (sem_post(&shared->barrier.sema))
+ failure_perror("sem_post(shared)", errno);
+#else
+#error "FIXME"
+#endif /* MDBX_LOCKING */
}
void osal_setup(const std::vector<actor_config> &actors) {
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+ if (ipc_overlord_pid)
+ failure("ipc already created by %ld pid", (long)ipc_overlord_pid);
+ ipc_overlord_pid = getpid();
+#ifndef SEM_A
+#define SEM_A S_IRUSR
+#endif
+#ifndef SEM_R
+#define SEM_R S_IWUSR
+#endif
+ ipc = semget(IPC_PRIVATE, actors.size() + 2, IPC_CREAT | SEM_A | SEM_R);
+ if (ipc < 0)
+ failure_perror("semget(IPC_PRIVATE, shared_sems)", errno);
+ if (atexit(ipc_remove))
+ failure_perror("atexit(ipc_remove)", errno);
+ if (semctl(ipc, 0, SETVAL, (int)(actors.size() + 1)))
+ failure_perror("semctl(SETVAL.0, shared_sems)", errno);
+ for (size_t i = 1; i < actors.size() + 2; ++i)
+ if (semctl(ipc, i, SETVAL, 1))
+ failure_perror("semctl(SETVAL.N, shared_sems)", errno);
+#else
assert(shared == nullptr);
-
- pthread_mutexattr_t mutexattr;
- int rc = pthread_mutexattr_init(&mutexattr);
- if (rc)
- failure_perror("pthread_mutexattr_init()", rc);
- rc = pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED);
- if (rc)
- failure_perror("pthread_mutexattr_setpshared()", rc);
-
- pthread_barrierattr_t barrierattr;
- rc = pthread_barrierattr_init(&barrierattr);
- if (rc)
- failure_perror("pthread_barrierattr_init()", rc);
- rc = pthread_barrierattr_setpshared(&barrierattr, PTHREAD_PROCESS_SHARED);
- if (rc)
- failure_perror("pthread_barrierattr_setpshared()", rc);
-
- pthread_condattr_t condattr;
- rc = pthread_condattr_init(&condattr);
- if (rc)
- failure_perror("pthread_condattr_init()", rc);
- rc = pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED);
- if (rc)
- failure_perror("pthread_condattr_setpshared()", rc);
-
shared = (shared_t *)mmap(
- nullptr, sizeof(shared_t) + actors.size() * sizeof(pthread_cond_t),
- PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ nullptr, sizeof(shared_t) + actors.size() * sizeof(shared->events[0]),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS
+#ifdef MAP_HASSEMAPHORE
+ | MAP_HASSEMAPHORE
+#endif
+ ,
+ -1, 0);
if (MAP_FAILED == (void *)shared)
failure_perror("mmap(shared_conds)", errno);
- rc = pthread_mutex_init(&shared->mutex, &mutexattr);
- if (rc)
- failure_perror("pthread_mutex_init(shared)", rc);
+ shared->count = actors.size() + 1;
- rc = pthread_barrier_init(&shared->barrier, &barrierattr, actors.size() + 1);
- if (rc)
- failure_perror("pthread_barrier_init(shared)", rc);
+#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008
+ pthread_barrierattr_t barrierattr;
+ int err = pthread_barrierattr_init(&barrierattr);
+ if (err)
+ failure_perror("pthread_barrierattr_init()", err);
+ err = pthread_barrierattr_setpshared(&barrierattr, PTHREAD_PROCESS_SHARED);
+ if (err)
+ failure_perror("pthread_barrierattr_setpshared()", err);
+
+ err = pthread_barrier_init(&shared->barrier, &barrierattr, shared->count);
+ if (err)
+ failure_perror("pthread_barrier_init(shared)", err);
+ pthread_barrierattr_destroy(&barrierattr);
- const size_t n = actors.size() + 1;
- for (size_t i = 0; i < n; ++i) {
- pthread_cond_t *event = &shared->conds[i];
- rc = pthread_cond_init(event, &condattr);
- if (rc)
- failure_perror("pthread_cond_init(shared)", rc);
+ pthread_mutexattr_t mutexattr;
+ err = pthread_mutexattr_init(&mutexattr);
+ if (err)
+ failure_perror("pthread_mutexattr_init()", err);
+ err = pthread_mutexattr_setpshared(&mutexattr, PTHREAD_PROCESS_SHARED);
+ if (err)
+ failure_perror("pthread_mutexattr_setpshared()", err);
+
+ pthread_condattr_t condattr;
+ err = pthread_condattr_init(&condattr);
+ if (err)
+ failure_perror("pthread_condattr_init()", err);
+ err = pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED);
+ if (err)
+ failure_perror("pthread_condattr_setpshared()", err);
+
+ err = pthread_mutex_init(&shared->mutex, &mutexattr);
+ if (err)
+ failure_perror("pthread_mutex_init(shared)", err);
+
+ for (size_t i = 0; i < shared->count; ++i) {
+ pthread_cond_t *event = &shared->events[i];
+ err = pthread_cond_init(event, &condattr);
+ if (err)
+ failure_perror("pthread_cond_init(shared)", err);
log_trace("osal_setup: event(shared pthread_cond) %" PRIuPTR " -> %p", i,
- event);
+ __Wpedantic_format_voidptr(event));
}
- shared->conds_size = actors.size() + 1;
-
- pthread_barrierattr_destroy(&barrierattr);
pthread_condattr_destroy(&condattr);
pthread_mutexattr_destroy(&mutexattr);
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+ shared->barrier.countdown = shared->count;
+ if (sem_init(&shared->barrier.sema, true, 1))
+ failure_perror("sem_init(shared.barrier)", errno);
+ for (size_t i = 0; i < shared->count; ++i) {
+ sem_t *event = &shared->events[i];
+ if (sem_init(event, true, 0))
+ failure_perror("sem_init(shared.event)", errno);
+ log_trace("osal_setup: event(shared sem_init) %" PRIuPTR " -> %p", i,
+ __Wpedantic_format_voidptr(event));
+ }
+#else
+#error "FIXME"
+#endif /* MDBX_LOCKING */
+#endif /* MDBX_LOCKING != MDBX_LOCKING_SYSV */
}
void osal_broadcast(unsigned id) {
- assert(shared != nullptr && shared != MAP_FAILED);
log_trace("osal_broadcast: event %u", id);
- if (id >= shared->conds_size)
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+ if (semctl(ipc, id + 1, SETVAL, 0))
+ failure_perror("semctl(SETVAL)", errno);
+#else
+ assert(shared != nullptr && shared != MAP_FAILED);
+ if (id >= shared->count)
failure("osal_broadcast: id > limit");
- int rc = pthread_cond_broadcast(shared->conds + id);
- if (rc)
- failure_perror("sem_post(shared)", rc);
+#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008
+ int err = pthread_cond_broadcast(shared->events + id);
+ if (err)
+ failure_perror("pthread_cond_broadcast(shared)", err);
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+ if (sem_post(shared->events + id))
+ failure_perror("sem_post(shared)", errno);
+#else
+#error "FIXME"
+#endif /* MDBX_LOCKING */
+#endif /* MDBX_LOCKING != MDBX_LOCKING_SYSV */
}
int osal_waitfor(unsigned id) {
- assert(shared != nullptr && shared != MAP_FAILED);
-
log_trace("osal_waitfor: event %u", id);
- if (id >= shared->conds_size)
+#if MDBX_LOCKING == MDBX_LOCKING_SYSV
+ struct sembuf op;
+ memset(&op, 0, sizeof(op));
+ op.sem_num = (short)(id + 1);
+ int rc = semop(ipc, &op, 1) ? errno : MDBX_SUCCESS;
+#else
+ assert(shared != nullptr && shared != MAP_FAILED);
+ if (id >= shared->count)
failure("osal_waitfor: id > limit");
+#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
+ MDBX_LOCKING == MDBX_LOCKING_POSIX2008
int rc = pthread_mutex_lock(&shared->mutex);
if (rc != 0)
failure_perror("pthread_mutex_lock(shared)", rc);
- rc = pthread_cond_wait(shared->conds + id, &shared->mutex);
+ rc = pthread_cond_wait(shared->events + id, &shared->mutex);
if (rc && rc != EINTR)
failure_perror("pthread_cond_wait(shared)", rc);
rc = pthread_mutex_unlock(&shared->mutex);
if (rc != 0)
failure_perror("pthread_mutex_unlock(shared)", rc);
+#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
+ int rc = sem_wait(shared->events + id) ? errno : 0;
+ if (rc == 0 && sem_post(shared->events + id))
+ failure_perror("sem_post(shared)", errno);
+#else
+#error "FIXME"
+#endif /* MDBX_LOCKING */
+#endif /* MDBX_LOCKING != MDBX_LOCKING_SYSV */
return (rc == 0) ? true : false;
}
@@ -149,21 +303,69 @@ bool actor_config::osal_deserialize(const char *str, const char *end,
//-----------------------------------------------------------------------------
+static pid_t overlord_pid;
+
+static volatile sig_atomic_t sigusr1_head, sigusr2_head;
+static void handler_SIGUSR(int signum) {
+ switch (signum) {
+ case SIGUSR1:
+ sigusr1_head += 1;
+ return;
+ case SIGUSR2:
+ sigusr2_head += 1;
+ return;
+ default:
+ abort();
+ }
+}
+
+bool osal_progress_push(bool active) {
+ if (overlord_pid) {
+ if (kill(overlord_pid, active ? SIGUSR1 : SIGUSR2))
+ failure_perror("osal_progress_push: kill(overload)", errno);
+ return true;
+ }
+
+ return false;
+}
+
+//-----------------------------------------------------------------------------
+
static std::unordered_map<pid_t, actor_status> childs;
-static void handler_SIGCHLD(int unused) { (void)unused; }
+static volatile sig_atomic_t sigalarm_head;
+static void handler_SIGCHLD(int signum) {
+ if (signum == SIGALRM)
+ sigalarm_head += 1;
+}
mdbx_pid_t osal_getpid(void) { return getpid(); }
int osal_delay(unsigned seconds) { return sleep(seconds) ? errno : 0; }
int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) {
- if (childs.empty())
- signal(SIGCHLD, handler_SIGCHLD);
+ if (childs.empty()) {
+ struct sigaction act;
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = handler_SIGCHLD;
+ sigaction(SIGCHLD, &act, nullptr);
+ sigaction(SIGALRM, &act, nullptr);
+ act.sa_handler = handler_SIGUSR;
+ sigaction(SIGUSR1, &act, nullptr);
+ sigaction(SIGUSR2, &act, nullptr);
+
+ sigset_t mask;
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+ sigaddset(&mask, SIGUSR1);
+ sigaddset(&mask, SIGUSR2);
+ sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+ }
pid = fork();
if (pid == 0) {
+ overlord_pid = getppid();
const bool result = test_execute(config);
exit(result ? EXIT_SUCCESS : EXIT_FAILURE);
}
@@ -171,7 +373,8 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) {
if (pid < 0)
return errno;
- log_trace("osal_actor_start: fork pid %i for %u", pid, config.actor_id);
+ log_trace("osal_actor_start: fork pid %ld for %u", (long)pid,
+ config.actor_id);
childs[pid] = as_running;
return 0;
}
@@ -186,60 +389,67 @@ void osal_killall_actors(void) {
}
int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) {
- struct timespec ts;
- ts.tv_nsec = 0;
- ts.tv_sec = (timeout > INT_MAX) ? INT_MAX : timeout;
-retry:
- int status, options = WNOHANG;
+ static sig_atomic_t sigalarm_tail;
+ alarm(0) /* cancel prev timeout */;
+ sigalarm_tail = sigalarm_head /* reset timeout flag */;
+
+ int options = WNOHANG;
+ if (timeout) {
+ alarm((timeout > INT_MAX) ? INT_MAX : timeout);
+ options = 0;
+ }
+
#ifdef WUNTRACED
options |= WUNTRACED;
#endif
#ifdef WCONTINUED
options |= WCONTINUED;
#endif
- pid = waitpid(0, &status, options);
-
- if (pid > 0) {
- if (WIFEXITED(status))
- childs[pid] =
- (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed;
- else if (WCOREDUMP(status))
- childs[pid] = as_coredump;
- else if (WIFSIGNALED(status))
- childs[pid] = as_killed;
- else if (WIFSTOPPED(status))
- childs[pid] = as_debuging;
- else if (WIFCONTINUED(status))
- childs[pid] = as_running;
- else {
- assert(false);
- }
- return 0;
- }
- if (pid == 0) {
- /* child still running */
- if (ts.tv_sec == 0 && ts.tv_nsec == 0)
- ts.tv_nsec = 1;
- if (nanosleep(&ts, &ts) == 0) {
- /* timeout and no signal from child */
- pid = 0;
+ while (sigalarm_tail == sigalarm_head) {
+ int status;
+ pid = waitpid(0, &status, options);
+
+ if (pid > 0) {
+ if (WIFEXITED(status))
+ childs[pid] =
+ (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed;
+ else if (WCOREDUMP(status))
+ childs[pid] = as_coredump;
+ else if (WIFSIGNALED(status))
+ childs[pid] = as_killed;
+ else if (WIFSTOPPED(status))
+ childs[pid] = as_debugging;
+ else if (WIFCONTINUED(status))
+ childs[pid] = as_running;
+ else {
+ assert(false);
+ }
return 0;
}
- if (errno == EINTR)
- goto retry;
- }
- switch (errno) {
- case EINTR:
- pid = 0;
- return 0;
+ static sig_atomic_t sigusr1_tail, sigusr2_tail;
+ if (sigusr1_tail != sigusr1_head) {
+ sigusr1_tail = sigusr1_head;
+ logging::progress_canary(true);
+ if (pid < 0 && errno == EINTR)
+ continue;
+ }
+ if (sigusr2_tail != sigusr2_head) {
+ sigusr2_tail = sigusr2_head;
+ logging::progress_canary(false);
+ if (pid < 0 && errno == EINTR)
+ continue;
+ }
- case ECHILD:
- default:
- pid = 0;
- return errno;
+ if (pid == 0)
+ break;
+
+ int err = errno;
+ if (err != EINTR)
+ return err;
}
+ return 0 /* timeout */;
}
void osal_yield(void) {
@@ -254,9 +464,17 @@ void osal_udelay(unsigned us) {
static unsigned threshold_us;
if (threshold_us == 0) {
+#if defined(_POSIX_CPUTIME) && _POSIX_CPUTIME > -1 && \
+ defined(CLOCK_PROCESS_CPUTIME_ID)
if (clock_getres(CLOCK_PROCESS_CPUTIME_ID, &ts)) {
int rc = errno;
- failure_perror("clock_getres(CLOCK_PROCESS_CPUTIME_ID)", rc);
+ log_warning("clock_getres(CLOCK_PROCESS_CPUTIME_ID), failed errno %d",
+ rc);
+ }
+#endif /* CLOCK_PROCESS_CPUTIME_ID */
+ if (threshold_us == 0 && clock_getres(CLOCK_MONOTONIC, &ts)) {
+ int rc = errno;
+ failure_perror("clock_getres(CLOCK_MONOTONIC)", rc);
}
chrono::time threshold = chrono::from_timespec(ts);
assert(threshold.seconds() == 0);
diff --git a/libs/libmdbx/src/test/osal-windows.cc b/libs/libmdbx/src/test/osal-windows.cc
index 975d8268f5..6de674651b 100644
--- a/libs/libmdbx/src/test/osal-windows.cc
+++ b/libs/libmdbx/src/test/osal-windows.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -16,6 +16,7 @@
static std::unordered_map<unsigned, HANDLE> events;
static HANDLE hBarrierSemaphore, hBarrierEvent;
+static HANDLE hProgressActiveEvent, hProgressPassiveEvent;
static int waitstatus2errcode(DWORD result) {
switch (result) {
@@ -85,6 +86,16 @@ void osal_setup(const std::vector<actor_config> &actors) {
if (!hBarrierEvent)
failure_perror("CreateEvent(BarrierEvent)", GetLastError());
hBarrierEvent = make_inheritable(hBarrierEvent);
+
+ hProgressActiveEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (!hProgressActiveEvent)
+ failure_perror("CreateEvent(ProgressActiveEvent)", GetLastError());
+ hProgressActiveEvent = make_inheritable(hProgressActiveEvent);
+
+ hProgressPassiveEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+ if (!hProgressPassiveEvent)
+ failure_perror("CreateEvent(ProgressPassiveEvent)", GetLastError());
+ hProgressPassiveEvent = make_inheritable(hProgressPassiveEvent);
}
void osal_broadcast(unsigned id) {
@@ -112,6 +123,8 @@ const std::string
actor_config::osal_serialize(simple_checksum &checksum) const {
checksum.push(hBarrierSemaphore);
checksum.push(hBarrierEvent);
+ checksum.push(hProgressActiveEvent);
+ checksum.push(hProgressPassiveEvent);
HANDLE hWait = INVALID_HANDLE_VALUE;
if (wait4id) {
@@ -125,8 +138,8 @@ actor_config::osal_serialize(simple_checksum &checksum) const {
checksum.push(hSignal);
}
- return format("%p.%p.%p.%p", hBarrierSemaphore, hBarrierEvent, hWait,
- hSignal);
+ return format("%p.%p.%p.%p.%p.%p", hBarrierSemaphore, hBarrierEvent, hWait,
+ hSignal, hProgressActiveEvent, hProgressPassiveEvent);
}
bool actor_config::osal_deserialize(const char *str, const char *end,
@@ -137,17 +150,22 @@ bool actor_config::osal_deserialize(const char *str, const char *end,
assert(hBarrierSemaphore == 0);
assert(hBarrierEvent == 0);
+ assert(hProgressActiveEvent == 0);
+ assert(hProgressPassiveEvent == 0);
assert(events.empty());
HANDLE hWait, hSignal;
- if (sscanf_s(copy.c_str(), "%p.%p.%p.%p", &hBarrierSemaphore, &hBarrierEvent,
- &hWait, &hSignal) != 4) {
+ if (sscanf_s(copy.c_str(), "%p.%p.%p.%p.%p.%p", &hBarrierSemaphore,
+ &hBarrierEvent, &hWait, &hSignal, &hProgressActiveEvent,
+ &hProgressPassiveEvent) != 6) {
TRACE("<< osal_deserialize: failed\n");
return false;
}
checksum.push(hBarrierSemaphore);
checksum.push(hBarrierEvent);
+ checksum.push(hProgressActiveEvent);
+ checksum.push(hProgressPassiveEvent);
if (wait4id) {
checksum.push(hWait);
@@ -168,6 +186,17 @@ bool actor_config::osal_deserialize(const char *str, const char *end,
typedef std::pair<HANDLE, actor_status> child;
static std::unordered_map<mdbx_pid_t, child> childs;
+bool osal_progress_push(bool active) {
+ if (!childs.empty()) {
+ if (!SetEvent(active ? hProgressActiveEvent : hProgressPassiveEvent))
+ failure_perror("osal_progress_push: SetEvent(overlord.progress)",
+ GetLastError());
+ return true;
+ }
+
+ return false;
+}
+
static void ArgvQuote(std::string &CommandLine, const std::string &Argument,
bool Force = false)
@@ -314,7 +343,7 @@ actor_status osal_actor_info(const mdbx_pid_t pid) {
break;
case EXCEPTION_BREAKPOINT:
case EXCEPTION_SINGLE_STEP:
- status = as_debuging;
+ status = as_debugging;
break;
case STATUS_CONTROL_C_EXIT:
status = as_killed;
@@ -344,32 +373,45 @@ void osal_killall_actors(void) {
int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) {
std::vector<HANDLE> handles;
- handles.reserve(childs.size());
+ handles.reserve(childs.size() + 2);
+ handles.push_back(hProgressActiveEvent);
+ handles.push_back(hProgressPassiveEvent);
for (const auto &pair : childs)
if (pair.second.second <= as_running)
handles.push_back(pair.second.first);
- DWORD rc =
- MsgWaitForMultipleObjectsEx((DWORD)handles.size(), &handles[0],
- (timeout > 60) ? 60 * 1000 : timeout * 1000,
- QS_ALLINPUT | QS_ALLPOSTMESSAGE, 0);
+ while (true) {
+ DWORD rc =
+ MsgWaitForMultipleObjectsEx((DWORD)handles.size(), &handles[0],
+ (timeout > 60) ? 60 * 1000 : timeout * 1000,
+ QS_ALLINPUT | QS_ALLPOSTMESSAGE, 0);
- if (rc >= WAIT_OBJECT_0 && rc < WAIT_OBJECT_0 + handles.size()) {
- pid = 0;
- for (const auto &pair : childs)
- if (pair.second.first == handles[rc - WAIT_OBJECT_0]) {
- pid = pair.first;
- break;
- }
- return 0;
- }
+ if (rc == WAIT_OBJECT_0) {
+ logging::progress_canary(true);
+ continue;
+ }
+ if (rc == WAIT_OBJECT_0 + 1) {
+ logging::progress_canary(false);
+ continue;
+ }
- if (rc == WAIT_TIMEOUT) {
- pid = 0;
- return 0;
- }
+ if (rc >= WAIT_OBJECT_0 + 2 && rc < WAIT_OBJECT_0 + handles.size()) {
+ pid = 0;
+ for (const auto &pair : childs)
+ if (pair.second.first == handles[rc - WAIT_OBJECT_0]) {
+ pid = pair.first;
+ break;
+ }
+ return 0;
+ }
- return waitstatus2errcode(rc);
+ if (rc == WAIT_TIMEOUT) {
+ pid = 0;
+ return 0;
+ }
+
+ return waitstatus2errcode(rc);
+ }
}
void osal_yield(void) { SwitchToThread(); }
diff --git a/libs/libmdbx/src/test/osal.h b/libs/libmdbx/src/test/osal.h
index 5acf7ad094..6d0e1c4eb6 100644
--- a/libs/libmdbx/src/test/osal.h
+++ b/libs/libmdbx/src/test/osal.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -26,6 +26,8 @@ void osal_killall_actors(void);
int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout);
void osal_wait4barrier(void);
+bool osal_progress_push(bool active);
+
mdbx_pid_t osal_getpid(void);
int osal_delay(unsigned seconds);
void osal_udelay(unsigned us);
diff --git a/libs/libmdbx/src/test/pcrf/CMakeLists.txt b/libs/libmdbx/src/test/pcrf/CMakeLists.txt
index 399c33f88f..8bd3e3d859 100644
--- a/libs/libmdbx/src/test/pcrf/CMakeLists.txt
+++ b/libs/libmdbx/src/test/pcrf/CMakeLists.txt
@@ -1,7 +1,5 @@
set(TARGET pcrf_test)
-project(${TARGET})
-
add_executable(${TARGET} pcrf_test.c)
-
+target_include_directories(${TARGET} PRIVATE "${PROJECT_SOURCE_DIR}")
target_link_libraries(${TARGET} mdbx)
diff --git a/libs/libmdbx/src/test/pcrf/pcrf_test.c b/libs/libmdbx/src/test/pcrf/pcrf_test.c
index 213c8b1d80..206bd21d4e 100644
--- a/libs/libmdbx/src/test/pcrf/pcrf_test.c
+++ b/libs/libmdbx/src/test/pcrf/pcrf_test.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2016-2017 Leonid Yuriev <leo@yuriev.ru>.
+ * Copyright 2016-2019 Leonid Yuriev <leo@yuriev.ru>.
* Copyright 2015 Vladimir Romanov
* <https://www.linkedin.com/in/vladimirromanov>, Yota Lab.
*
@@ -36,7 +36,7 @@
(int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), \
(int)((addr)&0xff)
-char opt_db_path[PATH_MAX] = "/root/lmdbx_bench2";
+char opt_db_path[PATH_MAX] = "./mdbx_bench2";
static MDBX_env *env;
#define REC_COUNT 10240000
int64_t ids[REC_COUNT * 10];
@@ -105,12 +105,12 @@ static void db_connect() {
MDBX_dbi dbi_ip;
MDBX_CHECK(mdbx_env_create(&env));
- MDBX_CHECK(
- mdbx_env_set_mapsize(env, REC_COUNT * sizeof(session_data_t) * 10));
+ MDBX_CHECK(mdbx_env_set_geometry(
+ env, 0, 0, REC_COUNT * sizeof(session_data_t) * 10, -1, -1, -1));
MDBX_CHECK(mdbx_env_set_maxdbs(env, 30));
MDBX_CHECK(mdbx_env_open(env, opt_db_path,
MDBX_CREATE | MDBX_WRITEMAP | MDBX_MAPASYNC |
- MDBX_NOSYNC | MDBX_LIFORECLAIM,
+ MDBX_SAFE_NOSYNC | MDBX_LIFORECLAIM,
0664));
MDBX_txn *txn;
@@ -126,7 +126,7 @@ static void db_connect() {
printf("Connection open\n");
}
-static void create_record(int64_t record_id) {
+static void create_record(uint64_t record_id) {
MDBX_dbi dbi_session;
MDBX_dbi dbi_session_id;
MDBX_dbi dbi_event;
@@ -136,11 +136,13 @@ static void create_record(int64_t record_id) {
session_data_t data;
// transaction init
snprintf(data.session_id1, sizeof(data.session_id1),
- "prefix%02ld_%02ld.fill.fill.fill.fill.fill.fill;%ld",
- record_id % 3 + 1, record_id % 9 + 1, record_id);
+ "prefix%02u_%02u.fill.fill.fill.fill.fill.fill;%" PRIu64,
+ (unsigned)(record_id % 3) + 1, (unsigned)(record_id % 9) + 1,
+ record_id);
snprintf(data.session_id2, sizeof(data.session_id2),
- "dprefix%ld;%ld.fill.fill.;suffix", record_id,
- record_id % 1000000000 + 99999);
+ "dprefix%" PRIu64 ";%" PRIu64 ".fill.fill.;suffix", record_id,
+ (record_id + UINT64_C(1442695040888963407)) %
+ UINT64_C(6364136223846793005));
snprintf(data.ip, sizeof(data.ip), "%d.%d.%d.%d",
IP_PRINTF_ARG_HOST(record_id & 0xFFFFFFFF));
event.obj_id = record_id;
@@ -241,8 +243,9 @@ static void get_db_stat(const char *db, int64_t *ms_branch_pages,
MDBX_CHECK(mdbx_dbi_open(txn, db, MDBX_CREATE, &dbi));
MDBX_CHECK(mdbx_dbi_stat(txn, dbi, &stat, sizeof(stat)));
mdbx_txn_abort(txn);
- printf("%15s | %15ld | %5u | %10ld | %10ld | %11ld |\n", db,
- stat.ms_branch_pages, stat.ms_depth, stat.ms_entries,
+ printf("%15s | %15" PRIu64 " | %5u | %10" PRIu64 " | %10" PRIu64
+ " | %11" PRIu64 " |\n",
+ db, stat.ms_branch_pages, stat.ms_depth, stat.ms_entries,
stat.ms_leaf_pages, stat.ms_overflow_pages);
(*ms_branch_pages) += stat.ms_branch_pages;
(*ms_leaf_pages) += stat.ms_leaf_pages;
@@ -253,8 +256,8 @@ static void periodic_stat(void) {
int64_t ms_leaf_pages = 0;
MDBX_stat mst;
MDBX_envinfo mei;
- MDBX_CHECK(mdbx_env_stat(env, &mst, sizeof(mst)));
- MDBX_CHECK(mdbx_env_info(env, &mei, sizeof(mei)));
+ MDBX_CHECK(mdbx_env_stat_ex(env, NULL, &mst, sizeof(mst)));
+ MDBX_CHECK(mdbx_env_info_ex(env, NULL, &mei, sizeof(mei)));
printf("Environment Info\n");
printf(" Pagesize: %u\n", mst.ms_psize);
if (mei.mi_geo.lower != mei.mi_geo.upper) {
@@ -287,8 +290,8 @@ static void periodic_stat(void) {
get_db_stat("session_id", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("event", &ms_branch_pages, &ms_leaf_pages);
get_db_stat("ip", &ms_branch_pages, &ms_leaf_pages);
- printf("%15s | %15ld | %5s | %10s | %10ld | %11s |\n", "", ms_branch_pages,
- "", "", ms_leaf_pages, "");
+ printf("%15s | %15" PRIu64 " | %5s | %10s | %10" PRIu64 " | %11s |\n", "",
+ ms_branch_pages, "", "", ms_leaf_pages, "");
static int64_t prev_add_count;
static int64_t prev_del_count;
@@ -297,26 +300,32 @@ static void periodic_stat(void) {
static int64_t t = -1;
if (t > 0) {
int64_t delta = (getClockUs() - t);
- printf(
- "CPS: add %ld, delete %ld, items processed - %ldK data=%ldK key=%ldK\n",
- (mdbx_add_count - prev_add_count) * 1000000 / delta,
- (mdbx_del_count - prev_del_count) * 1000000 / delta, obj_id / 1024,
- mdbx_data_size / 1024, mdbx_key_size / 1024);
- printf("usage data=%ld%%", ((mdbx_data_size + mdbx_key_size) * 100) /
- ((ms_leaf_pages + ms_branch_pages) * 4096));
+ printf("CPS: add %" PRIu64 ", delete %" PRIu64
+ ", items processed - %" PRIu64 "K data=%" PRIu64 "K key=%" PRIu64
+ "K\n",
+ (mdbx_add_count - prev_add_count) * 1000000 / delta,
+ (mdbx_del_count - prev_del_count) * 1000000 / delta, obj_id / 1024,
+ mdbx_data_size / 1024, mdbx_key_size / 1024);
+ printf("usage data=%" PRIu64 "%%",
+ ((mdbx_data_size + mdbx_key_size) * 100) /
+ ((ms_leaf_pages + ms_branch_pages) * 4096));
if (prev_add_time != mdbx_add_time) {
- printf(" Add : %ld c/s", (mdbx_add_count - prev_add_count) * 1000000 /
- (mdbx_add_time - prev_add_time));
+ printf(" Add : %" PRIu64 " c/s", (mdbx_add_count - prev_add_count) *
+ 1000000 /
+ (mdbx_add_time - prev_add_time));
}
if (prev_del_time != mdbx_del_time) {
- printf(" Del : %ld c/s", (mdbx_del_count - prev_del_count) * 1000000 /
- (mdbx_del_time - prev_del_time));
+ printf(" Del : %" PRIu64 " c/s", (mdbx_del_count - prev_del_count) *
+ 1000000 /
+ (mdbx_del_time - prev_del_time));
}
if (mdbx_add_time) {
- printf(" tAdd : %ld c/s", mdbx_add_count * 1000000 / mdbx_add_time);
+ printf(" tAdd : %" PRIu64 " c/s",
+ mdbx_add_count * 1000000 / mdbx_add_time);
}
if (mdbx_del_time) {
- printf(" tDel : %ld c/s", mdbx_del_count * 1000000 / mdbx_del_time);
+ printf(" tDel : %" PRIu64 " c/s",
+ mdbx_del_count * 1000000 / mdbx_del_time);
}
puts("");
}
@@ -385,14 +394,14 @@ int main(int argc, char **argv) {
id = get_id_from_pool();
delete_record(id);
}
- // for (i = 0; i < 50; i++) {
- // int64_t id = obj_id++;
- // create_record(id);
- // add_id_to_pool(id);
- // }
- // int64_t id = obj_id++;
- // create_record(id);
- // add_id_to_pool(id);
+ // for (i = 0; i < 50; i++) {
+ // int64_t id = obj_id++;
+ // create_record(id);
+ // add_id_to_pool(id);
+ // }
+ // int64_t id = obj_id++;
+ // create_record(id);
+ // add_id_to_pool(id);
int64_t now = getClockUs();
if ((now - t) > 10000000L) {
periodic_stat();
diff --git a/libs/libmdbx/src/test/test.cc b/libs/libmdbx/src/test/test.cc
index e9e925e9a9..ff1676274b 100644
--- a/libs/libmdbx/src/test/test.cc
+++ b/libs/libmdbx/src/test/test.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -37,6 +37,8 @@ const char *testcase2str(const actor_testcase testcase) {
return "append";
case ac_ttl:
return "ttl";
+ case ac_nested:
+ return "nested";
}
}
@@ -45,8 +47,8 @@ const char *status2str(actor_status status) {
default:
assert(false);
return "?!";
- case as_debuging:
- return "debuging";
+ case as_debugging:
+ return "debugging";
case as_running:
return "running";
case as_successful:
@@ -76,15 +78,16 @@ const char *keygencase2str(const keygen_case keycase) {
//-----------------------------------------------------------------------------
-int testcase::oom_callback(MDBX_env *env, int pid, mdbx_tid_t tid, uint64_t txn,
- unsigned gap, int retry) {
+int testcase::oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid,
+ uint64_t txn, unsigned gap, size_t space,
+ int retry) {
testcase *self = (testcase *)mdbx_env_get_userctx(env);
if (retry == 0)
- log_notice("oom_callback: waitfor pid %u, thread %" PRIuPTR
- ", txn #%" PRIu64 ", gap %d",
- pid, (size_t)tid, txn, gap);
+ log_notice("oom_callback: waitfor pid %lu, thread %" PRIuPTR
+ ", txn #%" PRIu64 ", gap %d, scape %zu",
+ (long)pid, (size_t)tid, txn, gap, space);
if (self->should_continue(true)) {
osal_yield();
@@ -186,7 +189,8 @@ int testcase::breakable_commit() {
if (err == MDBX_MAP_FULL && config.params.ignore_dbfull) {
rc = err;
err = mdbx_txn_abort(txn);
- if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH))
+ if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH &&
+ err != MDBX_BAD_TXN))
failure_perror("mdbx_txn_abort()", err);
} else
failure_perror("mdbx_txn_commit()", err);
@@ -196,6 +200,20 @@ int testcase::breakable_commit() {
return rc;
}
+unsigned testcase::txn_underutilization_x256(MDBX_txn *txn) const {
+ if (txn) {
+ MDBX_txn_info info;
+ int err = mdbx_txn_info(txn, &info, false);
+ if (unlikely(err != MDBX_SUCCESS))
+ failure_perror("mdbx_txn_info()", err);
+ const size_t left = size_t(info.txn_space_leftover);
+ const size_t total =
+ size_t(info.txn_space_leftover) + size_t(info.txn_space_dirty);
+ return (unsigned)(left / (total >> 8));
+ }
+ return 0;
+}
+
void testcase::txn_end(bool abort) {
log_trace(">> txn_end(%s)", abort ? "abort" : "commit");
assert(txn_guard);
@@ -203,7 +221,8 @@ void testcase::txn_end(bool abort) {
MDBX_txn *txn = txn_guard.release();
if (abort) {
int err = mdbx_txn_abort(txn);
- if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH))
+ if (unlikely(err != MDBX_SUCCESS && err != MDBX_THREAD_MISMATCH &&
+ err != MDBX_BAD_TXN))
failure_perror("mdbx_txn_abort()", err);
} else {
txn_inject_writefault(txn);
@@ -215,18 +234,18 @@ void testcase::txn_end(bool abort) {
log_trace("<< txn_end(%s)", abort ? "abort" : "commit");
}
-void testcase::cursor_open(unsigned dbi) {
- log_trace(">> cursor_open(%u)", dbi);
+void testcase::cursor_open(MDBX_dbi handle) {
+ log_trace(">> cursor_open(%u)", handle);
assert(!cursor_guard);
assert(txn_guard);
MDBX_cursor *cursor = nullptr;
- int rc = mdbx_cursor_open(txn_guard.get(), dbi, &cursor);
+ int rc = mdbx_cursor_open(txn_guard.get(), handle, &cursor);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_cursor_open()", rc);
cursor_guard.reset(cursor);
- log_trace("<< cursor_open(%u)", dbi);
+ log_trace("<< cursor_open(%u)", handle);
}
void testcase::cursor_close() {
@@ -264,8 +283,9 @@ void testcase::txn_inject_writefault(MDBX_txn *txn) {
if (config.params.inject_writefaultn && txn) {
if (config.params.inject_writefaultn <= nops_completed &&
(mdbx_txn_flags(txn) & MDBX_RDONLY) == 0) {
- log_info("== txn_inject_writefault(): got %u nops or more, inject FAULT",
- config.params.inject_writefaultn);
+ log_verbose(
+ "== txn_inject_writefault(): got %u nops or more, inject FAULT",
+ config.params.inject_writefaultn);
log_flush();
#if defined(_WIN32) || defined(_WIN64) || defined(_WINDOWS)
TerminateProcess(GetCurrentProcess(), 42);
@@ -305,21 +325,9 @@ bool testcase::wait4start() {
}
void testcase::kick_progress(bool active) const {
- chrono::time now = chrono::now_motonic();
- if (active) {
- static int last_point = -1;
- int point = (now.fixedpoint >> 29) & 3;
- if (point != last_point) {
- last.progress_timestamp = now;
- fprintf(stderr, "%c\b", "-\\|/"[last_point = point]);
- fflush(stderr);
- }
- } else if (now.fixedpoint - last.progress_timestamp.fixedpoint >
- chrono::from_seconds(2).fixedpoint) {
- last.progress_timestamp = now;
- fprintf(stderr, "%c\b", "@*"[now.utc & 1]);
- fflush(stderr);
- }
+ if (!global::config::progress_indicator)
+ return;
+ logging::progress_canary(active);
}
void testcase::report(size_t nops_done) {
@@ -328,11 +336,10 @@ void testcase::report(size_t nops_done) {
return;
nops_completed += nops_done;
- log_verbose("== complete +%" PRIuPTR " iteration, total %" PRIuPTR " done",
- nops_done, nops_completed);
+ log_debug("== complete +%" PRIuPTR " iteration, total %" PRIuPTR " done",
+ nops_done, nops_completed);
- if (global::config::progress_indicator)
- kick_progress(true);
+ kick_progress(true);
if (config.signal_nops && !signalled &&
config.signal_nops <= nops_completed) {
@@ -387,7 +394,7 @@ bool testcase::should_continue(bool check_timeout_only) const {
nops_completed >= config.params.test_nops)
result = false;
- if (result && global::config::progress_indicator)
+ if (result)
kick_progress(false);
return result;
@@ -430,14 +437,14 @@ void testcase::update_canary(uint64_t increment) {
log_trace("<< update_canary: sequence = %" PRIu64, canary_now.y);
}
-int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &dbi) {
+int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &handle) {
db_open();
int err, retry_left = 42;
for (;;) {
txn_begin(false);
- dbi = db_table_open(true);
- db_table_clear(dbi);
+ handle = db_table_open(true);
+ db_table_clear(handle);
err = breakable_commit();
if (likely(err == MDBX_SUCCESS)) {
txn_begin(false);
@@ -464,7 +471,7 @@ MDBX_dbi testcase::db_table_open(bool create) {
failure("snprintf(tablename): %d", rc);
tablename = tablename_buf;
}
- log_verbose("use %s table", tablename ? tablename : "MAINDB");
+ log_debug("use %s table", tablename ? tablename : "MAINDB");
MDBX_dbi handle = 0;
int rc = mdbx_dbi_open(txn_guard.get(), tablename,
@@ -490,9 +497,9 @@ void testcase::db_table_drop(MDBX_dbi handle) {
}
}
-void testcase::db_table_clear(MDBX_dbi handle) {
+void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) {
log_trace(">> testcase::db_table_clear, handle %u", handle);
- int rc = mdbx_drop(txn_guard.get(), handle, false);
+ int rc = mdbx_drop(txn ? txn : txn_guard.get(), handle, false);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror("mdbx_drop(delete=false)", rc);
log_trace("<< testcase::db_table_clear");
@@ -510,7 +517,7 @@ void testcase::db_table_close(MDBX_dbi handle) {
void testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check,
MDBX_val expected_valued) {
MDBX_val actual_value = expected_valued;
- int rc = mdbx_get2(txn_guard.get(), handle, &key2check, &actual_value);
+ int rc = mdbx_get_nearest(txn_guard.get(), handle, &key2check, &actual_value);
if (unlikely(rc != MDBX_SUCCESS))
failure_perror(step, rc);
if (!is_samedata(&actual_value, &expected_valued))
@@ -560,6 +567,9 @@ bool test_execute(const actor_config &config_const) {
case ac_ttl:
test.reset(new testcase_ttl(config, pid));
break;
+ case ac_nested:
+ test.reset(new testcase_nested(config, pid));
+ break;
default:
test.reset(new testcase(config, pid));
break;
@@ -582,13 +592,13 @@ bool test_execute(const actor_config &config_const) {
}
if (config.params.nrepeat == 1)
- log_info("test successed");
+ log_verbose("test successed");
else {
if (config.params.nrepeat)
- log_info("test successed (iteration %zi of %zi)", iter,
- size_t(config.params.nrepeat));
+ log_verbose("test successed (iteration %zi of %zi)", iter,
+ size_t(config.params.nrepeat));
else
- log_info("test successed (iteration %zi)", iter);
+ log_verbose("test successed (iteration %zi)", iter);
config.params.keygen.seed += INT32_C(0xA4F4D37B);
}
@@ -599,3 +609,135 @@ bool test_execute(const actor_config &config_const) {
return false;
}
}
+
+//-----------------------------------------------------------------------------
+
+int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata,
+ unsigned flags) {
+ int err = mdbx_put(txn_guard.get(), dbi, &akey->value, &adata->value, flags);
+ if (err == MDBX_SUCCESS && config.params.speculum) {
+ const auto S_key = S(akey);
+ const auto S_data = S(adata);
+ const bool inserted = speculum.emplace(S_key, S_data).second;
+ assert(inserted);
+ (void)inserted;
+ }
+ return err;
+}
+
+int testcase::replace(const keygen::buffer &akey,
+ const keygen::buffer &new_data,
+ const keygen::buffer &old_data, unsigned flags) {
+ if (config.params.speculum) {
+ const auto S_key = S(akey);
+ const auto S_old = S(old_data);
+ const auto S_new = S(new_data);
+ const auto removed = speculum.erase(SET::key_type(S_key, S_old));
+ assert(removed == 1);
+ (void)removed;
+ const bool inserted = speculum.emplace(S_key, S_new).second;
+ assert(inserted);
+ (void)inserted;
+ }
+ return mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value,
+ &old_data->value, flags);
+}
+
+int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) {
+ if (config.params.speculum) {
+ const auto S_key = S(akey);
+ const auto S_data = S(adata);
+ const auto removed = speculum.erase(SET::key_type(S_key, S_data));
+ assert(removed == 1);
+ (void)removed;
+ }
+ return mdbx_del(txn_guard.get(), dbi, &akey->value, &adata->value);
+}
+
+bool testcase::speculum_verify() {
+ if (!config.params.speculum)
+ return true;
+
+ if (!txn_guard)
+ txn_begin(true);
+
+ char dump_key[128], dump_value[128];
+ char dump_mkey[128], dump_mvalue[128];
+
+ MDBX_cursor *cursor;
+ int err = mdbx_cursor_open(txn_guard.get(), dbi, &cursor);
+ if (err != MDBX_SUCCESS)
+ failure_perror("mdbx_cursor_open()", err);
+
+ bool rc = true;
+ MDBX_val akey, avalue;
+ MDBX_val mkey, mvalue;
+ err = mdbx_cursor_get(cursor, &akey, &avalue, MDBX_FIRST);
+
+ unsigned extra = 0, lost = 0, n = 0;
+ assert(std::is_sorted(speculum.cbegin(), speculum.cend(), ItemCompare(this)));
+ auto it = speculum.cbegin();
+ while (true) {
+ if (err != MDBX_SUCCESS) {
+ akey.iov_len = avalue.iov_len = 0;
+ akey.iov_base = avalue.iov_base = nullptr;
+ }
+ const auto S_key = S(akey);
+ const auto S_data = S(avalue);
+ if (it != speculum.cend()) {
+ mkey.iov_base = (void *)it->first.c_str();
+ mkey.iov_len = it->first.size();
+ mvalue.iov_base = (void *)it->second.c_str();
+ mvalue.iov_len = it->second.size();
+ }
+ if (err == MDBX_SUCCESS && it != speculum.cend() && S_key == it->first &&
+ S_data == it->second) {
+ ++it;
+ err = mdbx_cursor_get(cursor, &akey, &avalue, MDBX_NEXT);
+ } else if (err == MDBX_SUCCESS &&
+ (it == speculum.cend() || S_key < it->first ||
+ (S_key == it->first && S_data < it->second))) {
+ extra += 1;
+ if (it != speculum.cend()) {
+ log_error("extra pair %u/%u: db{%s, %s} < mi{%s, %s}", n, extra,
+ mdbx_dump_val(&akey, dump_key, sizeof(dump_key)),
+ mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)),
+ mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)),
+ mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue)));
+ } else {
+ log_error("extra pair %u/%u: db{%s, %s} < mi.END", n, extra,
+ mdbx_dump_val(&akey, dump_key, sizeof(dump_key)),
+ mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)));
+ }
+ err = mdbx_cursor_get(cursor, &akey, &avalue, MDBX_NEXT);
+ rc = false;
+ } else if (it != speculum.cend() &&
+ (err == MDBX_NOTFOUND || S_key > it->first ||
+ (S_key == it->first && S_data > it->second))) {
+ lost += 1;
+ if (err == MDBX_NOTFOUND) {
+ log_error("lost pair %u/%u: db.END > mi{%s, %s}", n, lost,
+ mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)),
+ mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue)));
+ } else {
+ log_error("lost pair %u/%u: db{%s, %s} > mi{%s, %s}", n, lost,
+ mdbx_dump_val(&akey, dump_key, sizeof(dump_key)),
+ mdbx_dump_val(&avalue, dump_value, sizeof(dump_value)),
+ mdbx_dump_val(&mkey, dump_mkey, sizeof(dump_mkey)),
+ mdbx_dump_val(&mvalue, dump_mvalue, sizeof(dump_mvalue)));
+ }
+ ++it;
+ rc = false;
+ } else if (err == MDBX_NOTFOUND && it == speculum.cend()) {
+ break;
+ } else if (err != MDBX_SUCCESS) {
+ failure_perror("mdbx_cursor_get()", err);
+ } else {
+ assert(!"WTF?");
+ }
+ n += 1;
+ }
+
+ mdbx_cursor_close(cursor);
+ return rc;
+}
diff --git a/libs/libmdbx/src/test/test.h b/libs/libmdbx/src/test/test.h
index fb5ad4ee83..630059ac3f 100644
--- a/libs/libmdbx/src/test/test.h
+++ b/libs/libmdbx/src/test/test.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -22,6 +22,24 @@
#include "osal.h"
#include "utils.h"
+#include <deque>
+#include <set>
+#include <stack>
+#include <tuple>
+
+#ifndef HAVE_cxx17_std_string_view
+#if __cplusplus >= 201703L && __has_include(<string_view>)
+#include <string_view>
+#define HAVE_cxx17_std_string_view 1
+#else
+#define HAVE_cxx17_std_string_view 0
+#endif
+#endif /* HAVE_cxx17_std_string_view */
+
+#if HAVE_cxx17_std_string_view
+#include <string_view>
+#endif
+
bool test_execute(const actor_config &config);
std::string thunk_param(const actor_config &config);
void testcase_setup(const char *casename, actor_params &params,
@@ -49,25 +67,26 @@ extern bool cleanup_before;
extern bool cleanup_after;
extern bool failfast;
extern bool progress_indicator;
+extern bool console_mode;
} /* namespace config */
} /* namespace global */
//-----------------------------------------------------------------------------
-struct db_deleter : public std::unary_function<void, MDBX_env *> {
+struct db_deleter /* : public std::unary_function<void, MDBX_env *> */ {
void operator()(MDBX_env *env) const { mdbx_env_close(env); }
};
-struct txn_deleter : public std::unary_function<void, MDBX_txn *> {
+struct txn_deleter /* : public std::unary_function<void, MDBX_txn *> */ {
void operator()(MDBX_txn *txn) const {
int rc = mdbx_txn_abort(txn);
if (rc)
- log_trouble(mdbx_func_, "mdbx_txn_abort()", rc);
+ log_trouble(__func__, "mdbx_txn_abort()", rc);
}
};
-struct cursor_deleter : public std::unary_function<void, MDBX_cursor *> {
+struct cursor_deleter /* : public std::unary_function<void, MDBX_cursor *> */ {
void operator()(MDBX_cursor *cursor) const { mdbx_cursor_close(cursor); }
};
@@ -79,9 +98,45 @@ typedef std::unique_ptr<MDBX_cursor, cursor_deleter> scoped_cursor_guard;
class testcase {
protected:
+#if HAVE_cxx17_std_string_view
+ using data_view = std::string_view;
+#else
+ using data_view = std::string;
+#endif
+ static inline data_view S(const MDBX_val &v) {
+ return data_view(static_cast<const char *>(v.iov_base), v.iov_len);
+ }
+ static inline data_view S(const keygen::buffer &b) { return S(b->value); }
+
+ using Item = std::pair<std::string, std::string>;
+ struct ItemCompare {
+ const testcase *context;
+ ItemCompare(const testcase *owner) : context(owner) {}
+
+ bool operator()(const Item &a, const Item &b) const {
+ MDBX_val va, vb;
+ va.iov_base = (void *)a.first.data();
+ va.iov_len = a.first.size();
+ vb.iov_base = (void *)b.first.data();
+ vb.iov_len = b.first.size();
+ int cmp = mdbx_cmp(context->txn_guard.get(), context->dbi, &va, &vb);
+ if (cmp == 0 &&
+ (context->config.params.table_flags & MDBX_DUPSORT) != 0) {
+ va.iov_base = (void *)a.second.data();
+ va.iov_len = a.second.size();
+ vb.iov_base = (void *)b.second.data();
+ vb.iov_len = b.second.size();
+ cmp = mdbx_dcmp(context->txn_guard.get(), context->dbi, &va, &vb);
+ }
+ return cmp < 0;
+ }
+ };
+ using SET = std::set<Item, ItemCompare>;
+
const actor_config &config;
const mdbx_pid_t pid;
+ MDBX_dbi dbi;
scoped_db_guard db_guard;
scoped_txn_guard txn_guard;
scoped_cursor_guard cursor_guard;
@@ -95,12 +150,23 @@ protected:
struct {
mdbx_canary canary;
- mutable chrono::time progress_timestamp;
} last;
- static int oom_callback(MDBX_env *env, int pid, mdbx_tid_t tid, uint64_t txn,
- unsigned gap, int retry);
+ SET speculum;
+ bool speculum_verify();
+ int insert(const keygen::buffer &akey, const keygen::buffer &adata,
+ unsigned flags);
+ int replace(const keygen::buffer &akey, const keygen::buffer &new_value,
+ const keygen::buffer &old_value, unsigned flags);
+ int remove(const keygen::buffer &akey, const keygen::buffer &adata);
+ static int oom_callback(MDBX_env *env, mdbx_pid_t pid, mdbx_tid_t tid,
+ uint64_t txn, unsigned gap, size_t space, int retry);
+
+ bool is_nested_txn_available() const {
+ return (config.params.mode_flags & MDBX_WRITEMAP) == 0;
+ }
+ void kick_progress(bool active) const;
void db_prepare();
void db_open();
void db_close();
@@ -109,21 +175,21 @@ protected:
void txn_end(bool abort);
int breakable_restart();
void txn_restart(bool abort, bool readonly, unsigned flags = 0);
- void cursor_open(unsigned dbi);
+ void cursor_open(MDBX_dbi handle);
void cursor_close();
void txn_inject_writefault(void);
void txn_inject_writefault(MDBX_txn *txn);
void fetch_canary();
void update_canary(uint64_t increment);
- void kick_progress(bool active) const;
void checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check,
MDBX_val expected_valued);
+ unsigned txn_underutilization_x256(MDBX_txn *txn) const;
MDBX_dbi db_table_open(bool create);
void db_table_drop(MDBX_dbi handle);
- void db_table_clear(MDBX_dbi handle);
+ void db_table_clear(MDBX_dbi handle, MDBX_txn *txn = nullptr);
void db_table_close(MDBX_dbi handle);
- int db_open__begin__table_create_open_clean(MDBX_dbi &dbi);
+ int db_open__begin__table_create_open_clean(MDBX_dbi &handle);
bool wait4start();
void report(size_t nops_done);
@@ -131,13 +197,12 @@ protected:
bool should_continue(bool check_timeout_only = false) const;
void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key,
- keygen::buffer &out_value, keygen::serial_t data_age = 0) {
- keyvalue_maker.pair(serial, out_key, out_value, data_age);
+ keygen::buffer &out_value, keygen::serial_t data_age) {
+ keyvalue_maker.pair(serial, out_key, out_value, data_age, false);
}
- void generate_pair(const keygen::serial_t serial,
- keygen::serial_t data_age = 0) {
- generate_pair(serial, key, data, data_age);
+ void generate_pair(const keygen::serial_t serial) {
+ keyvalue_maker.pair(serial, key, data, 0, true);
}
bool mode_readonly() const {
@@ -146,7 +211,8 @@ protected:
public:
testcase(const actor_config &config, const mdbx_pid_t pid)
- : config(config), pid(pid), signalled(false), nops_completed(0) {
+ : config(config), pid(pid), signalled(false), nops_completed(0),
+ speculum(ItemCompare(this)) {
start_timestamp.reset();
memset(&last, 0, sizeof(last));
}
@@ -161,49 +227,52 @@ class testcase_ttl : public testcase {
public:
testcase_ttl(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_hill : public testcase {
+ using inherited = testcase;
+ SET speculum_commited;
+
public:
testcase_hill(const actor_config &config, const mdbx_pid_t pid)
- : testcase(config, pid) {}
- bool run();
+ : testcase(config, pid), speculum_commited(ItemCompare(this)) {}
+ bool run() override;
};
class testcase_append : public testcase {
public:
testcase_append(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_deadread : public testcase {
public:
testcase_deadread(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_deadwrite : public testcase {
public:
testcase_deadwrite(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_jitter : public testcase {
public:
testcase_jitter(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_try : public testcase {
public:
testcase_try(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid) {}
- bool run();
+ bool run() override;
};
class testcase_copy : public testcase {
@@ -214,5 +283,31 @@ public:
testcase_copy(const actor_config &config, const mdbx_pid_t pid)
: testcase(config, pid),
copy_pathname(config.params.pathname_db + "-copy") {}
- bool run();
+ bool run() override;
+};
+
+class testcase_nested : public testcase {
+ using inherited = testcase;
+ using FIFO = std::deque<std::pair<uint64_t, unsigned>>;
+
+ uint64_t serial;
+ FIFO fifo;
+ std::stack<std::tuple<scoped_txn_guard, uint64_t, FIFO, SET>> stack;
+
+ bool trim_tail(unsigned window_width);
+ bool grow_head(unsigned head_count);
+ bool pop_txn(bool abort);
+ bool pop_txn() {
+ return pop_txn(inherited::is_nested_txn_available() ? flipcoin_x3()
+ : flipcoin_x2());
+ }
+ void push_txn();
+ bool stochastic_breakable_restart_with_nested(bool force_restart = false);
+
+public:
+ testcase_nested(const actor_config &config, const mdbx_pid_t pid)
+ : testcase(config, pid) {}
+ bool setup() override;
+ bool run() override;
+ bool teardown() override;
};
diff --git a/libs/libmdbx/src/test/test.vcxproj b/libs/libmdbx/src/test/test.vcxproj
deleted file mode 100644
index 93de00e3a0..0000000000
--- a/libs/libmdbx/src/test/test.vcxproj
+++ /dev/null
@@ -1,209 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
- <ItemGroup Label="ProjectConfigurations">
- <ProjectConfiguration Include="Debug|Win32">
- <Configuration>Debug</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|Win32">
- <Configuration>Release</Configuration>
- <Platform>Win32</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Debug|x64">
- <Configuration>Debug</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- <ProjectConfiguration Include="Release|x64">
- <Configuration>Release</Configuration>
- <Platform>x64</Platform>
- </ProjectConfiguration>
- </ItemGroup>
- <ItemGroup>
- <ProjectReference Include="..\dll.vcxproj">
- <Project>{6d19209b-ece7-4b9c-941c-0aa2b484f199}</Project>
- </ProjectReference>
- </ItemGroup>
- <PropertyGroup Label="Globals">
- <ProjectGuid>{30E29CE6-E6FC-4D32-AA07-46A55FAF3A31}</ProjectGuid>
- <Keyword>Win32Proj</Keyword>
- <RootNamespace>mdbxtest</RootNamespace>
- <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <CharacterSet>MultiByte</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>MultiByte</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>true</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <CharacterSet>MultiByte</CharacterSet>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
- <ConfigurationType>Application</ConfigurationType>
- <UseDebugLibraries>false</UseDebugLibraries>
- <PlatformToolset>v140</PlatformToolset>
- <WholeProgramOptimization>true</WholeProgramOptimization>
- <CharacterSet>MultiByte</CharacterSet>
- </PropertyGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
- <ImportGroup Label="ExtensionSettings">
- </ImportGroup>
- <ImportGroup Label="Shared">
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
- <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
- </ImportGroup>
- <PropertyGroup Label="UserMacros" />
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <LinkIncremental>true</LinkIncremental>
- <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
- <IntDir>$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
- <TargetName>mdbx_test</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <LinkIncremental>false</LinkIncremental>
- <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
- <IntDir>$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
- <TargetName>mdbx_test</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
- <LinkIncremental>true</LinkIncremental>
- <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
- <IntDir>$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
- <TargetName>mdbx_test</TargetName>
- </PropertyGroup>
- <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
- <LinkIncremental>false</LinkIncremental>
- <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
- <IntDir>$(SolutionDir)$(Platform)\$(Configuration)\$(ProjectName)\</IntDir>
- <TargetName>mdbx_test</TargetName>
- </PropertyGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
- <ClCompile>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <WarningLevel>Level4</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;MDBX_DEBUG=1;%(PreprocessorDefinitions);LIBMDBX_IMPORTS=1</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>test.h</PrecompiledHeaderFile>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalDependencies>winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
- <ClCompile>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <WarningLevel>Level4</WarningLevel>
- <Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>_DEBUG;_CONSOLE;MDBX_DEBUG=1;%(PreprocessorDefinitions);LIBMDBX_IMPORTS=1</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>test.h</PrecompiledHeaderFile>
- <TreatWarningAsError>true</TreatWarningAsError>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalDependencies>winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
- <ClCompile>
- <WarningLevel>Level4</WarningLevel>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions);LIBMDBX_IMPORTS=1</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>test.h</PrecompiledHeaderFile>
- <OmitFramePointers>true</OmitFramePointers>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalDependencies>winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
- <ClCompile>
- <WarningLevel>Level4</WarningLevel>
- <PrecompiledHeader>Use</PrecompiledHeader>
- <Optimization>MaxSpeed</Optimization>
- <FunctionLevelLinking>true</FunctionLevelLinking>
- <IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions);LIBMDBX_IMPORTS=1</PreprocessorDefinitions>
- <SDLCheck>true</SDLCheck>
- <PrecompiledHeaderFile>test.h</PrecompiledHeaderFile>
- <OmitFramePointers>true</OmitFramePointers>
- </ClCompile>
- <Link>
- <SubSystem>Console</SubSystem>
- <EnableCOMDATFolding>true</EnableCOMDATFolding>
- <OptimizeReferences>true</OptimizeReferences>
- <GenerateDebugInformation>true</GenerateDebugInformation>
- <AdditionalDependencies>winmm.lib;%(AdditionalDependencies)</AdditionalDependencies>
- </Link>
- </ItemDefinitionGroup>
- <ItemGroup>
- <ClInclude Include="base.h" />
- <ClInclude Include="chrono.h" />
- <ClInclude Include="config.h" />
- <ClInclude Include="keygen.h" />
- <ClInclude Include="log.h" />
- <ClInclude Include="osal.h" />
- <ClInclude Include="test.h" />
- <ClInclude Include="utils.h" />
- </ItemGroup>
- <ItemGroup>
- <ClCompile Include="ttl.cc" />
- <ClCompile Include="append.cc" />
- <ClCompile Include="cases.cc" />
- <ClCompile Include="chrono.cc" />
- <ClCompile Include="config.cc" />
- <ClCompile Include="copy.cc" />
- <ClCompile Include="dead.cc" />
- <ClCompile Include="hill.cc" />
- <ClCompile Include="try.cc" />
- <ClCompile Include="jitter.cc" />
- <ClCompile Include="keygen.cc" />
- <ClCompile Include="log.cc" />
- <ClCompile Include="main.cc" />
- <ClCompile Include="osal-windows.cc">
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
- <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
- </ClCompile>
- <ClCompile Include="test.cc" />
- <ClCompile Include="utils.cc" />
- </ItemGroup>
- <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
- <ImportGroup Label="ExtensionTargets">
- </ImportGroup>
-</Project>
diff --git a/libs/libmdbx/src/test/try.cc b/libs/libmdbx/src/test/try.cc
index 150abd36de..adb0113096 100644
--- a/libs/libmdbx/src/test/try.cc
+++ b/libs/libmdbx/src/test/try.cc
@@ -1,4 +1,4 @@
-#include "test.h"
+#include "test.h"
bool testcase_try::run() {
db_open();
diff --git a/libs/libmdbx/src/test/ttl.cc b/libs/libmdbx/src/test/ttl.cc
index b2650f0c13..782a8b4d56 100644
--- a/libs/libmdbx/src/test/ttl.cc
+++ b/libs/libmdbx/src/test/ttl.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -29,11 +29,10 @@ static unsigned edge2count(uint64_t edge, unsigned count_max) {
}
bool testcase_ttl::run() {
- MDBX_dbi dbi;
int err = db_open__begin__table_create_open_clean(dbi);
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: bailout-prepare due '%s'", mdbx_strerror(err));
- return true;
+ return false;
}
/* LY: тест "эмуляцией time-to-live":
@@ -54,18 +53,8 @@ bool testcase_ttl::run() {
/* LY: для параметризации используем подходящие параметры, которые не имеют
* здесь смысла в первоначальном значении. */
- const unsigned window_max_lower =
-#ifdef __APPLE__
- 333;
-#else
- 999;
-#endif
- const unsigned count_max_lower =
-#ifdef __APPLE__
- 333;
-#else
- 999;
-#endif
+ const unsigned window_max_lower = 333;
+ const unsigned count_max_lower = 333;
const unsigned window_max = (config.params.batch_read > window_max_lower)
? config.params.batch_read
@@ -73,8 +62,8 @@ bool testcase_ttl::run() {
const unsigned count_max = (config.params.batch_write > count_max_lower)
? config.params.batch_write
: count_max_lower;
- log_info("ttl: using `batch_read` value %u for window_max", window_max);
- log_info("ttl: using `batch_write` value %u for count_max", count_max);
+ log_verbose("ttl: using `batch_read` value %u for window_max", window_max);
+ log_verbose("ttl: using `batch_write` value %u for count_max", count_max);
uint64_t seed =
prng64_map2_white(config.params.keygen.seed) + config.actor_id;
@@ -87,14 +76,16 @@ bool testcase_ttl::run() {
std::deque<std::pair<uint64_t, unsigned>> fifo;
uint64_t serial = 0;
+ bool rc = false;
while (should_continue()) {
const uint64_t salt = prng64_white(seed) /* mdbx_txn_id(txn_guard.get()) */;
- const unsigned window_width = edge2window(salt, window_max);
+ const unsigned window_width =
+ flipcoin_x4() ? 0 : edge2window(salt, window_max);
unsigned head_count = edge2count(salt, count_max);
- log_verbose("ttl: step #%zu (serial %" PRIu64
- ", window %u, count %u) salt %" PRIu64,
- nops_completed, serial, window_width, head_count, salt);
+ log_debug("ttl: step #%zu (serial %" PRIu64
+ ", window %u, count %u) salt %" PRIu64,
+ nops_completed, serial, window_width, head_count, salt);
if (window_width) {
while (fifo.size() > window_width) {
@@ -104,7 +95,7 @@ bool testcase_ttl::run() {
tail_count);
fifo.pop_back();
for (unsigned n = 0; n < tail_count; ++n) {
- log_trace("ttl: remove-tail %" PRIu64, serial);
+ log_trace("ttl: remove-tail %" PRIu64, tail_serial);
generate_pair(tail_serial);
err = mdbx_del(txn_guard.get(), dbi, &key->value, &data->value);
if (unlikely(err != MDBX_SUCCESS)) {
@@ -158,7 +149,9 @@ bool testcase_ttl::run() {
serial = fifo.front().first;
fifo.pop_front();
}
+
report(1);
+ rc = true;
}
bailout:
@@ -170,10 +163,10 @@ bailout:
err = breakable_commit();
if (unlikely(err != MDBX_SUCCESS)) {
log_notice("ttl: bailout-clean due '%s'", mdbx_strerror(err));
- return true;
+ return false;
}
} else
db_table_close(dbi);
}
- return true;
+ return rc;
}
diff --git a/libs/libmdbx/src/test/utils.cc b/libs/libmdbx/src/test/utils.cc
index d9b3538b99..311cf544ef 100644
--- a/libs/libmdbx/src/test/utils.cc
+++ b/libs/libmdbx/src/test/utils.cc
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -331,6 +331,9 @@ double double_from_upper(uint64_t salt) {
}
bool flipcoin() { return bleach32((uint32_t)entropy_ticks()) & 1; }
+bool flipcoin_x2() { return (bleach32((uint32_t)entropy_ticks()) & 3) == 0; }
+bool flipcoin_x3() { return (bleach32((uint32_t)entropy_ticks()) & 7) == 0; }
+bool flipcoin_x4() { return (bleach32((uint32_t)entropy_ticks()) & 15) == 0; }
bool jitter(unsigned probability_percent) {
const uint32_t top = UINT32_MAX - UINT32_MAX % 100;
diff --git a/libs/libmdbx/src/test/utils.h b/libs/libmdbx/src/test/utils.h
index d1b859acd4..2a5a54de6d 100644
--- a/libs/libmdbx/src/test/utils.h
+++ b/libs/libmdbx/src/test/utils.h
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright 2017-2019 Leonid Yuriev <leo@yuriev.ru>
* and other libmdbx authors: please see AUTHORS file.
* All rights reserved.
@@ -151,7 +151,7 @@ template <typename T> static __inline T load(const void *ptr) {
#if defined(_MSC_VER) && \
(defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64))
return *(const T __unaligned *)ptr;
-#elif UNALIGNED_OK
+#elif MDBX_UNALIGNED_OK
return *(const T *)ptr;
#else
T local;
@@ -161,14 +161,14 @@ template <typename T> static __inline T load(const void *ptr) {
memcpy(&local, (const T *)ptr, sizeof(T));
#endif /* __GNUC__ || __clang__ */
return local;
-#endif /* UNALIGNED_OK */
+#endif /* MDBX_UNALIGNED_OK */
}
template <typename T> static __inline void store(void *ptr, const T &value) {
#if defined(_MSC_VER) && \
(defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64))
*((T __unaligned *)ptr) = value;
-#elif UNALIGNED_OK
+#elif MDBX_UNALIGNED_OK
*(volatile T *)ptr = value;
#else
#if defined(__GNUC__) || defined(__clang__)
@@ -176,7 +176,7 @@ template <typename T> static __inline void store(void *ptr, const T &value) {
#else
memcpy(ptr, &value, sizeof(T));
#endif /* __GNUC__ || __clang__ */
-#endif /* UNALIGNED_OK */
+#endif /* MDBX_UNALIGNED_OK */
}
} /* namespace unaligned */
@@ -355,5 +355,8 @@ uint64_t prng64(void);
void prng_fill(void *ptr, size_t bytes);
bool flipcoin();
+bool flipcoin_x2();
+bool flipcoin_x3();
+bool flipcoin_x4();
bool jitter(unsigned probability_percent);
void jitter_delay(bool extra = false);