summaryrefslogtreecommitdiff
path: root/plugins/Dbx_kv/src/hamsterdb
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/Dbx_kv/src/hamsterdb')
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/AUTHORS6
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/COPYING202
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/CREDITS6
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/NEWS1
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/README261
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/config.h10
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.h2535
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.hpp711
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_int.h319
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_ola.h244
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_srv.h118
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/msstdint.h259
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/include/ham/types.h143
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/0root/root.h102
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/abi.h68
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/dynamic_array.h157
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/error.cc117
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/error.h120
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/mutex.h53
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/packstart.h74
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/packstop.h36
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/pickle.h119
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/scoped_ptr.h54
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/spinlock.h127
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/util.cc36
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1base/util.h62
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.cc31
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.h116
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.cc60
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.h89
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.cc60
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.h151
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/file.h154
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/os.cc29
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/os.h73
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/os_posix.cc474
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/os_win32.cc542
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1os/socket.h75
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/1rb/rb.h977
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2config/db_config.h73
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2config/env_config.h102
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2device/device.h124
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2device/device_disk.h238
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2device/device_factory.h52
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2device/device_inmem.h181
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager.h57
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager_test.h54
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2page/page.cc103
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2page/page.h435
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2page/page_collection.h182
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.am15
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.in627
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protobuf/messages.proto457
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protobuf/protocol.h147
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.am5
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.in451
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.h1839
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.proto646
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2queue/queue.h131
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/2worker/worker.h106
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.cc85
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.h231
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.cc637
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.h196
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_factory.h44
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.cc148
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.h75
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_check.cc325
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.cc561
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.h246
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_erase.cc233
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_find.cc226
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_flags.h95
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_base.h475
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_default.h532
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_pax.h141
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.cc269
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.h455
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index_factory.h445
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_insert.cc214
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_base.h114
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_binary.h273
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_pod.h261
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_varlen.h533
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node.h175
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node_proxy.h609
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_base.h64
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_default.h424
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_duplicate.h1557
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_inline.h230
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_internal.h230
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.cc181
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.h179
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.cc436
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.h113
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visit.cc117
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visitor.h70
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3btree/upfront_index.h684
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3cache/cache.h244
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.cc113
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.h118
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.cc862
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.h329
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_entries.h208
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_state.h104
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_test.h58
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.cc798
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.h155
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_state.h121
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_test.h76
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_worker.h97
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4context/context.h57
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.cc1119
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.h555
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db.cc143
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db.h232
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.cc1776
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.h278
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.cc635
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.h131
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env.cc333
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env.h210
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_header.h184
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.cc760
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.h192
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_local_test.h56
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.cc445
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.h125
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4env/env_test.h60
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn.h298
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.cc368
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.h170
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_factory.h63
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.cc676
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.h566
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.cc108
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.h98
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hamsterdb.cc1633
-rw-r--r--plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hola.cc704
139 files changed, 41937 insertions, 0 deletions
diff --git a/plugins/Dbx_kv/src/hamsterdb/AUTHORS b/plugins/Dbx_kv/src/hamsterdb/AUTHORS
new file mode 100644
index 0000000000..6b0d2a235f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/AUTHORS
@@ -0,0 +1,6 @@
+
+Ger Hobbelt (http://www.hobbelt.com, http://www.hebbut.net - THANKS!)
+ ham_env_get_parameters, ham_db_get_parameters and functions for approximate
+ matching, minor bugfixes and performance improvements plus documentation
+ fixes/improvements; a complete rewrite of the freelist code with HUGE
+ performance gains - THANKS!
diff --git a/plugins/Dbx_kv/src/hamsterdb/COPYING b/plugins/Dbx_kv/src/hamsterdb/COPYING
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/COPYING
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/plugins/Dbx_kv/src/hamsterdb/CREDITS b/plugins/Dbx_kv/src/hamsterdb/CREDITS
new file mode 100644
index 0000000000..d2571aeebd
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/CREDITS
@@ -0,0 +1,6 @@
+
+Jul 20, 2009
+ham_env_get_parameters, ham_db_get_parameters and functions for approximate
+matching, minor bugfixes and performance improvements plus documentation
+improvements were written by Ger Hobbelt, http://www.hobbelt.com,
+http://www.hebbut.net - THANKS!
diff --git a/plugins/Dbx_kv/src/hamsterdb/NEWS b/plugins/Dbx_kv/src/hamsterdb/NEWS
new file mode 100644
index 0000000000..da7acb2eb3
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/NEWS
@@ -0,0 +1 @@
+See http://hamsterdb.com for up-to-date news about the project.
diff --git a/plugins/Dbx_kv/src/hamsterdb/README b/plugins/Dbx_kv/src/hamsterdb/README
new file mode 100644
index 0000000000..66fba73f22
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/README
@@ -0,0 +1,261 @@
+hamsterdb 2.1.10 Mo 23. Feb 23:07:52 CET 2015
+(C) Christoph Rupp, chris@crupp.de; http://www.hamsterdb.com
+
+This is the README file of hamsterdb.
+
+Contents:
+
+1. About
+
+hamsterdb is a database engine written in C/C++. It is fast, production-proven
+and easy to use.
+
+This release has several bug fixes (see below for a list). Thanks to those who
+reported them and invested lots of time to come up with samples to reproduce
+the bugs.
+
+The flag HAM_RECORD_NUMBER is deprecated. It is replaced with
+HAM_RECORD_NUMBER64 for 64bit record numbers, and HAM_RECORD_NUMBER32 for
+32bit record numbers.
+
+A major change under the hood: dirty pages are now flushed asynchronously.
+Expect performance improvements in this release, and more to come in the
+next releases.
+
+2. Changes
+
+New Features
+* Added Cursor.TryFind to hamsterdb-dotnet
+ (thanks, mjmckp <matthew.j.m.peacock@gmail.com>)
+* The page cache eviction was moved to a background thread
+* When reading records from mmapped storage, a pointer into the storage
+ is returned and the record data is no longer copied
+
+Bugfixes
+* Fixed FreeBSD compilation errors (thanks, Heping Wen)
+* issue #46: fixed segfault in approx. matching (thanks, Joel
+ Jacobson)
+* issue #45: fixed segfault in Journal recovery (thanks, Michael
+ Moellney)
+* issue #44: approx. matching returned the wrong key (thanks, Joel
+ Jacobson)
+* issue #43: fixed segfault when flushing transactions (thanks, Joel
+ Jacobson)
+* Fixed compilation error on debian Wheezy, gcc 4.7.2, 32bit (thanks,
+ Thomas Fähnle)
+* Fixed compilation error on OSX (thanks, Daniel Lemire)
+* issue #42: ham_cursor_find returned wrong key w/ approx. matching and
+ transactions
+* Fixed large file support on linux (thanks, Thomas Fähnle)
+
+Other Changes
+* Default compilation flag is now -O3
+* Added a new parameter HAM_PARAM_POSIX_FADVISE (thanks, Thomas Fähnle)
+* Removed dependency to malloc.h
+* The github wiki is now linked into documentation/wiki
+* The macro HAM_API_REVISION is now deprecated; use HAM_VERSION_* instead
+* Deprecated HAM_RECORD_NUMBER (use HAM_RECORD_NUMBER64 instead);
+ introduced a new flag HAM_RECORD_NUMBER32 for 32bit record numbers
+* Implemented ham_cursor_get_record_size() for remote access
+
+To see a list of all changes, look in the file ChangeLog.
+
+3. Roadmap
+- See https://github.com/cruppstahl/hamsterdb/wiki/Roadmap
+
+4. Features
+
+- PRO: SIMD instructions for lookups
+- PRO: transparent AES encryption
+- PRO: transparent CRC32 verification
+- PRO: transparent compression for journal, keys and records using
+ zlib, snappy, lzf or lzo
+- PRO: compression for uint32 keys
+
+- Very fast sorted B+Tree with variable length keys
+- Basic schema support for POD types (i.e. uint32, uint64, real32 etc)
+- Very fast analytical functions
+- Can run as an in-memory database
+- Multiple databases in one file
+- Record number databases ("auto-increment")
+- Duplicate keys
+- Logging and recovery
+- Unlimited number of parallel Transactions
+- Partial reading/writing of records
+- Network access (remote databases) via TCP/Protocol Buffers
+- Very fast database cursors
+- Configurable page size, cache size, key size etc
+- Runs on Linux, Unices, Microsoft Windows and other architectures
+- Uses memory mapped I/O for fast disk access (but falls back to read/write if
+ mmap is not available)
+- Uses 64bit file pointers and supports huge files (>2 GB)
+- Easy to use and well-documented
+- Open source and released under APL 2.0 license
+- Wrappers for C++, Java, .NET, Erlang, Python, Ada and others
+
+5. Known Issues/Bugs
+
+None.
+
+6. Compiling
+
+6.1 Linux, MacOS and other Unix systems
+
+To compile hamsterdb, run ./configure, make, make install.
+
+Run `./configure --help' for more options (i.e. static/dynamic library,
+build with debugging symbols etc).
+
+6.2 Microsoft Visual Studio 8
+
+A Solution file is provided for Microsoft Visual C++ in the "win32" folder
+for MSVC 2008 and MSVC 2010.
+All libraries can be downloaded precompiled from the hamsterdb webpage.
+
+To download Microsoft Visual Studio Express Edition for free, go to
+http://msdn.microsoft.com/vstudio/express/visualc/default.aspx.
+
+6.3 Dependencies
+
+On Ubuntu, the following packages are required:
+ - libdb-dev (optional)
+ - protobuf-compiler
+ - libprotobuf-dev
+ - libgoogle-perftools-dev
+ - libboost-system-dev
+ - libboost-thread-dev
+ - libboost-dev
+ - (libuv needs to be installed from sources - see
+ https://github.com/joyent/libuv)
+
+For Windows, precompiled dependencies are available here:
+https://github.com/cruppstahl/hamsterdb-alien
+
+7. Testing and Example Code
+
+Make automatically compiles several example programs in the directory
+'samples'. To see hamsterdb in action, just run 'samples/db1'
+or any other sample. (or 'win32/out/samples/db1/db1.exe' on Windows platforms).
+
+8. API Documentation
+
+The header files in 'include/ham' have extensive comments. Also, a doxygen
+script is available; run 'make doc' to start doxygen. The generated
+documentation is also available on the hamsterdb web page.
+
+9. Other Ways to Compile hamsterdb
+
+If you want to compile hamsterdb without using the provided ./configure
+environment, you have to set some preprocessor macros:
+
+DEBUG enable debugging output and diagnostic checks (slow!)
+HAM_32BIT compile for 32bit (alias: WIN32)
+HAM_64BIT compile for 64bit (alias: WIN64, also needs WIN32)
+
+Also, if you compile for windows, you have to compile the file
+'src/os_win32.cc' and ignore the file 'src/os_posix.cc'. Vice versa on
+non-Windows platforms.
+
+10. Porting hamsterdb
+
+Porting hamsterdb shouldn't be too difficult. All operating
+system dependend functions are declared in 'src/os.h' and defined
+in 'src/os_win32.cc' or 'src/os_posix.cc'.
+Other compiler- and OS-specific macros are in 'include/ham/types.h'.
+Most likely, these are the only files which have to be touched. Also see item
+9) for important macros.
+
+11. Migrating files from older versions
+
+Usually, hamsterdb releases are backwards compatible. There are some exceptions,
+though. In this case tools are provided to migrate the database. First, export
+your existing database with ham_export linked against the old version.
+(ham_export links statically and will NOT be confused if your system has a
+newer version of hamsterdb installed). Then use the newest version of
+ham_import to import the data into a new database. You can find ham_export
+and ham_import in the "tools" subdirectory.
+
+ Example (ham_export of 2.1.2 was renamed to ham_export-2.1.2 to document
+ that it's an older version):
+
+ ham_export-2.1.2 input.db | ham_import --stdin output.db
+
+12. Licensing
+
+hamsterdb is released under the APL 2.0 license, which allows
+unrestricted use for commercial and non-commercial applications. See the
+file COPYING for more information.
+
+A commercial, closed source version hamsterdb pro with additional functionality
+is available on request. See http://hamsterdb.com for more information.
+
+13. Contact
+
+Author of hamsterdb is
+ Christoph Rupp
+ Paul-Preuss-Str. 63
+ 80995 Muenchen/Germany
+ email: chris@crupp.de
+ web: http://www.hamsterdb.com
+
+14. Other Copyrights
+
+The Google Protocol Buffers ("protobuf") library is Copyright 2008, Google Inc.
+It has the following license:
+
+ Copyright 2008, Google Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Google Inc. nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Code generated by the Protocol Buffer compiler is owned by the owner
+ of the input file used when generating it. This code is not
+ standalone and requires a support library to be linked with it. This
+ support library is itself covered by the above license.
+
+The libuv library is part of the Node project: http://nodejs.org/
+libuv may be distributed alone under Node's license:
+
+ Copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to
+ deal in the Software without restriction, including without limitation the
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ IN THE SOFTWARE.
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/config.h b/plugins/Dbx_kv/src/hamsterdb/config.h
new file mode 100644
index 0000000000..d1fbc4d2f5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/config.h
@@ -0,0 +1,10 @@
+#define _CRT_SECURE_NO_WARNINGS
+
+#define HAM_EXPORT
+
+#define BOOST_SYSTEM_NO_DEPRECATED
+
+#define HAVE_MMAP 1
+#define HAVE_UNMMAP 1
+
+#pragma warning(disable:4100 4127 4512) \ No newline at end of file
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.h
new file mode 100644
index 0000000000..668cfc7cde
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.h
@@ -0,0 +1,2535 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hamsterdb.h
+ * @brief Include file for hamsterdb Embedded Storage
+ * @author Christoph Rupp, chris@crupp.de
+ * @version 2.1.10
+ *
+ * @mainpage
+ *
+ * This manual documents the hamsterdb C API. hamsterdb is a key/value database
+ * that is linked directly into your application, avoiding all the overhead
+ * that is related to external databases and RDBMS systems.
+ *
+ * This header file declares all functions and macros that are needed to use
+ * hamsterdb. The comments are formatted in Doxygen style and can be extracted
+ * to automagically generate documentation. The documentation is also available
+ * online here: <a href="http://hamsterdb.com/public/scripts/html_www">
+ http://hamsterdb.com/public/scripts/html_www</a>.
+ *
+ * In addition, there's a tutorial book hosted on github:
+ * <a href="http://github.com/cruppstahl/hamsterdb/wiki/Tutorial">
+ http://github.com/cruppstahl/hamsterdb/wiki/Tutorial</a>.
+ *
+ * If you want to create or open Databases or Environments (a collection of
+ * multiple Databases), the following functions will be interesting for you:
+ * <table>
+ * <tr><td>@ref ham_env_create</td><td>Creates an Environment</td></tr>
+ * <tr><td>@ref ham_env_open</td><td>Opens an Environment</td></tr>
+ * <tr><td>@ref ham_env_close</td><td>Closes an Environment</td></tr>
+ * <tr><td>@ref ham_env_create_db</td><td>Creates a Database in an
+ Environment</td></tr>
+ * <tr><td>@ref ham_env_open_db</td><td>Opens a Database from an
+ Environment</td></tr>
+ * <tr><td>@ref ham_db_close</td><td>Closes a Database</td></tr>
+ * </table>
+ *
+ * To insert, lookup or delete key/value pairs, the following functions are
+ * used:
+ * <table>
+ * <tr><td>@ref ham_db_insert</td><td>Inserts a key/value pair into a
+ Database</td></tr>
+ * <tr><td>@ref ham_db_find</td><td>Lookup of a key/value pair in a
+ Database</td></tr>
+ * <tr><td>@ref ham_db_erase</td><td>Erases a key/value pair from a
+ Database</td></tr>
+ * </table>
+ *
+ * Alternatively, you can use Cursors to iterate over a Database:
+ * <table>
+ * <tr><td>@ref ham_cursor_create</td><td>Creates a new Cursor</td></tr>
+ * <tr><td>@ref ham_cursor_find</td><td>Positions the Cursor on a key</td></tr>
+ * <tr><td>@ref ham_cursor_insert</td><td>Inserts a new key/value pair with a
+ Cursor</td></tr>
+ * <tr><td>@ref ham_cursor_erase</td><td>Deletes the key/value pair that
+ the Cursor points to</td></tr>
+ * <tr><td>@ref ham_cursor_overwrite</td><td>Overwrites the value of the current key</td></tr>
+ * <tr><td>@ref ham_cursor_move</td><td>Moves the Cursor to the first, next,
+ previous or last key in the Database</td></tr>
+ * <tr><td>@ref ham_cursor_close</td><td>Closes the Cursor</td></tr>
+ * </table>
+ *
+ * If you want to use Transactions, then the following functions are required:
+ * <table>
+ * <tr><td>@ref ham_txn_begin</td><td>Begins a new Transaction</td></tr>
+ * <tr><td>@ref ham_txn_commit</td><td>Commits the current
+ Transaction</td></tr>
+ * <tr><td>@ref ham_txn_abort</td><td>Aborts the current Transaction</td></tr>
+ * </table>
+ *
+ * hamsterdb supports remote Databases. The server can be embedded
+ * into your application or run standalone (see tools/hamzilla for a Unix
+ * daemon or Win32 service which hosts Databases). If you want to embed the
+ * server then the following functions have to be used:
+ * <table>
+ * <tr><td>@ref ham_srv_init</td><td>Initializes the server</td></tr>
+ * <tr><td>@ref ham_srv_add_env</td><td>Adds an Environment to the
+ server. The Environment with all its Databases will then be available
+ remotely.</td></tr>
+ * <tr><td>@ref ham_srv_close</td><td>Closes the server and frees all allocated
+ resources</td></tr>
+ * </table>
+ *
+ * If you need help then you're always welcome to use the <a
+ href="https://groups.google.com/forum/?fromgroups#!forum/hamsterdb-user">
+ mailing list</a>,
+ * drop a message (chris at crupp dot de) or use the <a
+ href="http://hamsterdb.com/index/contact">contact form</a>.
+ *
+ * Have fun!
+ */
+
+#ifndef HAM_HAMSTERDB_H
+#define HAM_HAMSTERDB_H
+
+#include <ham/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* deprecated */
+#define HAM_API_REVISION 3
+
+/**
+ * The version numbers
+ *
+ * @remark A change of the major revision means a significant update
+ * with a lot of new features and API changes.
+ *
+ * The minor version means a significant update without API changes, and the
+ * revision is incremented for each release with minor improvements only.
+ *
+ * The file version describes the version of the binary database format.
+ * hamsterdb is neither backwards- nor forwards-compatible regarding file
+ * format changes.
+ *
+ * If a file was created with hamsterdb pro then the msb of the file version
+ * is set. hamsterdb pro is able to open files created with hamsterdb (APL
+ * version), but not vice versa.
+ *
+ * History of file versions:
+ * 2.1.0: introduced the file version; version is 0
+ * 2.1.3: new btree format, file format cleanups; version is 1
+ * 2.1.4: new btree format for duplicate keys/var. length keys; version is 2
+ * 2.1.5: new freelist; version is 3
+ * 2.1.10: changes in btree node format; version is 4
+ */
+#define HAM_VERSION_MAJ 2
+#define HAM_VERSION_MIN 1
+#define HAM_VERSION_REV 10
+#define HAM_FILE_VERSION 4
+
+/**
+ * The hamsterdb Database structure
+ *
+ * This structure is allocated in @ref ham_env_create_db and
+ * @ref ham_env_open_db. It is deleted in @a ham_db_close.
+ */
+struct ham_db_t;
+typedef struct ham_db_t ham_db_t;
+
+/**
+ * The hamsterdb Environment structure
+ *
+ * This structure is allocated with @ref ham_env_create and @ref ham_env_open
+ * and is deleted in @ref ham_env_close.
+ */
+struct ham_env_t;
+typedef struct ham_env_t ham_env_t;
+
+/**
+ * A Database Cursor
+ *
+ * A Cursor is used for bi-directionally traversing the Database and
+ * for inserting/deleting/searching Database items.
+ *
+ * This structure is allocated with @ref ham_cursor_create and deleted with
+ * @ref ham_cursor_close.
+ */
+struct ham_cursor_t;
+typedef struct ham_cursor_t ham_cursor_t;
+
+/**
+ * A generic record.
+ *
+ * A record represents data items in hamsterdb. Before using a record, it
+ * is important to initialize all record fields with zeroes, i.e. with
+ * the C library routines memset(3) or bzero(2).
+ *
+ * When hamsterdb returns a record structure, the pointer to the record
+ * data is provided in @a data. This pointer is only temporary and will be
+ * overwritten by subsequent hamsterdb API calls using the same Transaction
+ * (or, if Transactions are disabled, using the same Database). The pointer
+ * will also be invalidated after the Transaction is aborted or committed.
+ *
+ * To avoid this, the calling application can allocate the @a data pointer.
+ * In this case, you have to set the flag @ref HAM_RECORD_USER_ALLOC. The
+ * @a size parameter will then return the size of the record. It's the
+ * responsibility of the caller to make sure that the @a data parameter is
+ * large enough for the record.
+ *
+ * The record->data pointer is not threadsafe. For threadsafe access it is
+ * recommended to use @a HAM_RECORD_USER_ALLOC or have each thread manage its
+ * own Transaction.
+ */
+typedef struct {
+ /** The size of the record data, in bytes */
+ uint32_t size;
+
+ /** Pointer to the record data */
+ void *data;
+
+ /** The record flags; see @ref HAM_RECORD_USER_ALLOC */
+ uint32_t flags;
+
+ /** Offset for partial reading/writing; see @ref HAM_PARTIAL */
+ uint32_t partial_offset;
+
+ /** Size for partial reading/writing; see @ref HAM_PARTIAL */
+ uint32_t partial_size;
+
+} ham_record_t;
+
+/** Flag for @ref ham_record_t (only really useful in combination with
+ * @ref ham_cursor_move, @ref ham_cursor_find and @ref ham_db_find)
+ */
+#define HAM_RECORD_USER_ALLOC 1
+
+/**
+ * A macro to statically initialize a @ref ham_record_t structure.
+ *
+ * Usage:
+ * ham_record_t rec = ham_make_record(ptr, size);
+ */
+#define ham_make_record(PTR, SIZE) { SIZE, PTR, 0 }
+
+/**
+ * A generic key.
+ *
+ * A key represents key items in hamsterdb. Before using a key, it
+ * is important to initialize all key fields with zeroes, i.e. with
+ * the C library routines memset(3) or bzero(2).
+ *
+ * hamsterdb usually uses keys to insert, delete or search for items.
+ * However, when using Database Cursors and the function @ref ham_cursor_move,
+ * hamsterdb also returns keys. In this case, the pointer to the key
+ * data is provided in @a data. This pointer is only temporary and will be
+ * overwritten by subsequent calls to @ref ham_cursor_move using the
+ * same Transaction (or, if Transactions are disabled, using the same Database).
+ * The pointer will also be invalidated after the Transaction is aborted
+ * or committed.
+ *
+ * To avoid this, the calling application can allocate the @a data pointer.
+ * In this case, you have to set the flag @ref HAM_KEY_USER_ALLOC. The
+ * @a size parameter will then return the size of the key. It's the
+ * responsibility of the caller to make sure that the @a data parameter is
+ * large enough for the key.
+ *
+ * The key->data pointer is not threadsafe. For threadsafe access it is
+ * recommended to use @a HAM_KEY_USER_ALLOC or have each thread manage its
+ * own Transaction.
+ */
+typedef struct {
+ /** The size of the key, in bytes */
+ uint16_t size;
+
+ /** The data of the key */
+ void *data;
+
+ /** The key flags; see @ref HAM_KEY_USER_ALLOC */
+ uint32_t flags;
+
+ /** For internal use */
+ uint32_t _flags;
+
+} ham_key_t;
+
+/**
+ * A macro to statically initialize a @ref ham_key_t structure.
+ *
+ * Usage:
+ * ham_key_t key = ham_make_key(ptr, size);
+ */
+#define ham_make_key(PTR, SIZE) { SIZE, PTR, 0 }
+
+/** Flag for @ref ham_key_t (only really useful in combination with
+ * @ref ham_cursor_move, @ref ham_cursor_find and @ref ham_db_find)
+ */
+#define HAM_KEY_USER_ALLOC 1
+
+/**
+ * A named parameter.
+ *
+ * These parameter structures are used for functions like @ref ham_env_open,
+ * @ref ham_env_create, etc. to pass variable length parameter lists.
+ *
+ * The lists are always arrays of type ham_parameter_t, with a terminating
+ * element of { 0, NULL}, e.g.
+ *
+ * <pre>
+ * ham_parameter_t parameters[] = {
+ * { HAM_PARAM_CACHE_SIZE, 2 * 1024 * 1024 }, // set cache size to 2 mb
+ * { HAM_PARAM_PAGE_SIZE, 4096 }, // set page size to 4 kb
+ * { 0, NULL }
+ * };
+ * </pre>
+ */
+typedef struct {
+ /** The name of the parameter; all HAM_PARAM_*-constants */
+ uint32_t name;
+
+ /** The value of the parameter. */
+ uint64_t value;
+
+} ham_parameter_t;
+
+
+/**
+ * @defgroup ham_key_types hamsterdb Key Types
+ * @{
+ */
+
+/** A binary blob without type; sorted by memcmp */
+#define HAM_TYPE_BINARY 0
+/** A binary blob without type; sorted by callback function */
+#define HAM_TYPE_CUSTOM 1
+/** An unsigned 8-bit integer */
+#define HAM_TYPE_UINT8 3
+/** An unsigned 16-bit integer */
+#define HAM_TYPE_UINT16 5
+/** An unsigned 32-bit integer */
+#define HAM_TYPE_UINT32 7
+/** An unsigned 64-bit integer */
+#define HAM_TYPE_UINT64 9
+/** An 32-bit float */
+#define HAM_TYPE_REAL32 11
+/** An 64-bit double */
+#define HAM_TYPE_REAL64 12
+
+/**
+ * @}
+ */
+
+
+/**
+ * @defgroup ham_status_codes hamsterdb Status Codes
+ * @{
+ */
+
+/** Operation completed successfully */
+#define HAM_SUCCESS ( 0)
+/** Invalid record size */
+#define HAM_INV_RECORD_SIZE ( -2)
+/** Invalid key size */
+#define HAM_INV_KEY_SIZE ( -3)
+/* deprecated */
+#define HAM_INV_KEYSIZE HAM_INV_KEY_SIZE
+/** Invalid page size (must be 1024 or a multiple of 2048) */
+#define HAM_INV_PAGE_SIZE ( -4)
+/* deprecated */
+#define HAM_INV_PAGESIZE HAM_INV_PAGE_SIZE
+/** Memory allocation failed - out of memory */
+#define HAM_OUT_OF_MEMORY ( -6)
+/** Invalid function parameter */
+#define HAM_INV_PARAMETER ( -8)
+/** Invalid file header */
+#define HAM_INV_FILE_HEADER ( -9)
+/** Invalid file version */
+#define HAM_INV_FILE_VERSION (-10)
+/** Key was not found */
+#define HAM_KEY_NOT_FOUND (-11)
+/** Tried to insert a key which already exists */
+#define HAM_DUPLICATE_KEY (-12)
+/** Internal Database integrity violated */
+#define HAM_INTEGRITY_VIOLATED (-13)
+/** Internal hamsterdb error */
+#define HAM_INTERNAL_ERROR (-14)
+/** Tried to modify the Database, but the file was opened as read-only */
+#define HAM_WRITE_PROTECTED (-15)
+/** Database record not found */
+#define HAM_BLOB_NOT_FOUND (-16)
+/** Generic file I/O error */
+#define HAM_IO_ERROR (-18)
+/** Function is not yet implemented */
+#define HAM_NOT_IMPLEMENTED (-20)
+/** File not found */
+#define HAM_FILE_NOT_FOUND (-21)
+/** Operation would block */
+#define HAM_WOULD_BLOCK (-22)
+/** Object was not initialized correctly */
+#define HAM_NOT_READY (-23)
+/** Database limits reached */
+#define HAM_LIMITS_REACHED (-24)
+/** Object was already initialized */
+#define HAM_ALREADY_INITIALIZED (-27)
+/** Database needs recovery */
+#define HAM_NEED_RECOVERY (-28)
+/** Cursor must be closed prior to Transaction abort/commit */
+#define HAM_CURSOR_STILL_OPEN (-29)
+/** Record filter or file filter not found */
+#define HAM_FILTER_NOT_FOUND (-30)
+/** Operation conflicts with another Transaction */
+#define HAM_TXN_CONFLICT (-31)
+/* internal use: key was erased in a Transaction */
+#define HAM_KEY_ERASED_IN_TXN (-32)
+/** Database cannot be closed because it is modified in a Transaction */
+#define HAM_TXN_STILL_OPEN (-33)
+/** Cursor does not point to a valid item */
+#define HAM_CURSOR_IS_NIL (-100)
+/** Database not found */
+#define HAM_DATABASE_NOT_FOUND (-200)
+/** Database name already exists */
+#define HAM_DATABASE_ALREADY_EXISTS (-201)
+/** Database already open, or: Database handle is already initialized */
+#define HAM_DATABASE_ALREADY_OPEN (-202)
+/** Environment already open, or: Environment handle is already initialized */
+#define HAM_ENVIRONMENT_ALREADY_OPEN (-203)
+/** Invalid log file header */
+#define HAM_LOG_INV_FILE_HEADER (-300)
+/** Remote I/O error/Network error */
+#define HAM_NETWORK_ERROR (-400)
+
+/**
+ * @}
+ */
+
+
+/**
+ * @defgroup ham_static hamsterdb Static Functions
+ * @{
+ */
+
+/**
+ * A typedef for a custom error handler function
+ *
+ * This error handler can be used in combination with
+ * @ref ham_set_errhandler().
+ *
+ * @param message The error message
+ * @param level The error level:
+ * <ul>
+ * <li>@ref HAM_DEBUG_LEVEL_DEBUG (0) </li> a debug message
+ * <li>@ref HAM_DEBUG_LEVEL_NORMAL (1) </li> a normal error message
+ * <li>2</li> reserved
+ * <li>@ref HAM_DEBUG_LEVEL_FATAL (3) </li> a fatal error message
+ * </ul>
+ *
+ * @sa error_levels
+ */
+typedef void HAM_CALLCONV (*ham_errhandler_fun)(int level, const char *message);
+
+/** A debug message */
+#define HAM_DEBUG_LEVEL_DEBUG 0
+
+/** A normal error message */
+#define HAM_DEBUG_LEVEL_NORMAL 1
+
+/** A fatal error message */
+#define HAM_DEBUG_LEVEL_FATAL 3
+
+/**
+ * Sets the global error handler
+ *
+ * This handler will receive all debug messages that are emitted
+ * by hamsterdb. You can install the default handler by setting @a f to 0.
+ *
+ * The default error handler prints all messages to stderr. To install a
+ * different logging facility, you can provide your own error handler.
+ *
+ * Note that the callback function must have the same calling convention
+ * as the hamsterdb library.
+ *
+ * @param f A pointer to the error handler function, or NULL to restore
+ * the default handler
+ */
+HAM_EXPORT void HAM_CALLCONV
+ham_set_errhandler(ham_errhandler_fun f);
+
+/**
+ * Translates a hamsterdb status code to a descriptive error string
+ *
+ * @param status The hamsterdb status code
+ *
+ * @return A pointer to a descriptive error string
+ */
+HAM_EXPORT const char * HAM_CALLCONV
+ham_strerror(ham_status_t status);
+
+/**
+ * Returns the version of the hamsterdb library
+ *
+ * @param major If not NULL, will return the major version number
+ * @param minor If not NULL, will return the minor version number
+ * @param revision If not NULL, will return the revision version number
+ */
+HAM_EXPORT void HAM_CALLCONV
+ham_get_version(uint32_t *major, uint32_t *minor,
+ uint32_t *revision);
+
+/**
+ * @}
+ */
+
+
+/**
+ * @defgroup ham_env hamsterdb Environment Functions
+ * @{
+ */
+
+/**
+ * Creates a Database Environment
+ *
+ * A Database Environment is a collection of Databases, which are all stored
+ * in one physical file (or in-memory). The maximum number of Databases
+ * depends on the page size; the default is above 600.
+ *
+ * Each Database in an Environment is identified by a positive 16bit
+ * value (except 0 and values at or above 0xf000).
+ * Databases in an Environment can be created with @ref ham_env_create_db
+ * or opened with @ref ham_env_open_db.
+ *
+ * Specify a URL instead of a filename (i.e.
+ * "ham://localhost:8080/customers.db") to access a remote hamsterdb Server.
+ *
+ * To enable ACID Transactions, supply the flag @ref HAM_ENABLE_TRANSACTIONS.
+ * By default, hamsterdb will use a Journal for recovering the Environment
+ * and its data in case of a crash, and also to re-apply committed Transactions
+ * which were not yet flushed to disk. This Journalling can be disabled
+ * with the flag @ref HAM_DISABLE_RECOVERY. (It is disabled if the Environment
+ * is in-memory.)
+ *
+ * If Transactions are not required, but hamsterdb should still be able to
+ * recover in case of a crash or power outage, then the flag
+ * @ref HAM_ENABLE_RECOVERY will enable the Journal (without allowing
+ * Transactions.)
+ *
+ * For performance reasons the Journal does not use fsync(2) (or
+ * FlushFileBuffers on Win32) to flush modified buffers to disk. Use the flag
+ * @ref HAM_ENABLE_FSYNC to force the use of fsync.
+ *
+ * @param env A pointer to an Environment handle
+ * @param filename The filename of the Environment file. If the file already
+ * exists, it is overwritten. Can be NULL for an In-Memory
+ * Environment. Can be a URL ("ham://<hostname>:<port>/<environment>")
+ * for remote access.
+ * @param flags Optional flags for opening the Environment, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_ENABLE_FSYNC</li> Flushes all file handles after
+ * committing or aborting a Transaction using fsync(), fdatasync()
+ * or FlushFileBuffers(). This file has no effect
+ * if Transactions are disabled. Slows down performance but makes
+ * sure that all file handles and operating system caches are
+ * transferred to disk, thus providing a stronger durability.
+ * <li>@ref HAM_IN_MEMORY</li> Creates an In-Memory Environment. No
+ * file will be created, and the Database contents are lost after
+ * the Environment is closed. The @a filename parameter can
+ * be NULL. Do <b>NOT</b> specify @a cache_size other than 0.
+ * <li>@ref HAM_DISABLE_MMAP</li> Do not use memory mapped files for I/O.
+ * By default, hamsterdb checks if it can use mmap,
+ * since mmap is faster than read/write. For performance
+ * reasons, this flag should not be used.
+ * <li>@ref HAM_CACHE_UNLIMITED</li> Do not limit the cache. Nearly as
+ * fast as an In-Memory Database. Not allowed in combination
+ * with a limited cache size.
+ * <li>@ref HAM_ENABLE_TRANSACTIONS</li> Enables Transactions for this
+ * Environment. This flag implies @ref HAM_ENABLE_RECOVERY.
+ * <li>@ref HAM_ENABLE_RECOVERY</li> Enables logging/recovery for this
+ * Environment. Not allowed in combination with @ref HAM_IN_MEMORY.
+ * <li>@ref HAM_DISABLE_RECOVERY</li> Disables logging/recovery for this
+ * Environment.
+ * <li>@ref HAM_FLUSH_WHEN_COMMITTED</li> Immediately flushes committed
+ * Transactions and writes them to the Btree. Disabled by default. If
+ * disabled then hamsterdb buffers committed Transactions and only starts
+ * flushing when too many Transactions were committed.
+ * </ul>
+ *
+ * @param mode File access rights for the new file. This is the @a mode
+ * parameter for creat(2). Ignored on Microsoft Windows. Default
+ * is 0644.
+ * @param param An array of ham_parameter_t structures. The following
+ * parameters are available:
+ * <ul>
+ * <li>@ref HAM_PARAM_CACHE_SIZE</li> The size of the Database cache,
+ * in bytes. The default size is defined in src/config.h
+ * as @a HAM_DEFAULT_CACHE_SIZE - usually 2MB
+ * <li>@ref HAM_PARAM_POSIX_FADVISE</li> Sets the "advice" for
+ * posix_fadvise(). Only on supported platforms. Allowed values are
+ * @ref HAM_POSIX_FADVICE_NORMAL (which is the default) or
+ * @ref HAM_POSIX_FADVICE_RANDOM.
+ * <li>@ref HAM_PARAM_PAGE_SIZE</li> The size of a file page, in
+ * bytes. It is recommended not to change the default size. The
+ * default size depends on hardware and operating system.
+ * Page sizes must be 1024 or a multiple of 2048.
+ * <li>@ref HAM_PARAM_FILE_SIZE_LIMIT</li> Sets a file size limit (in bytes).
+ * Disabled by default. Not allowed in combination with @ref HAM_IN_MEMORY.
+ * If the limit is exceeded, API functions return @ref HAM_LIMITS_REACHED.
+ * <li>@ref HAM_PARAM_LOG_DIRECTORY</li> The path of the log file
+ * and the journal files; default is the same path as the database
+ * file. Ignored for remote Environments.
+ * <li>@ref HAM_PARAM_NETWORK_TIMEOUT_SEC</li> Timeout (in seconds) when
+ * waiting for data from a remote server. By default, no timeout is set.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or an
+ * invalid combination of flags or parameters was specified
+ * @return @ref HAM_IO_ERROR if the file could not be opened or
+ * reading/writing failed
+ * @return @ref HAM_INV_FILE_VERSION if the Environment version is not
+ * compatible with the library version
+ * @return @ref HAM_OUT_OF_MEMORY if memory could not be allocated
+ * @return @ref HAM_INV_PAGE_SIZE if @a page_size is not 1024 or
+ * a multiple of 2048
+ * @return @ref HAM_INV_KEY_SIZE if @a key_size is too large (at least 4
+ * keys must fit in a page)
+ * @return @ref HAM_WOULD_BLOCK if another process has locked the file
+ * @return @ref HAM_ENVIRONMENT_ALREADY_OPEN if @a env is already in use
+ *
+ * @sa ham_env_create
+ * @sa ham_env_close
+ * @sa ham_env_open
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_create(ham_env_t **env, const char *filename,
+ uint32_t flags, uint32_t mode, const ham_parameter_t *param);
+
+/**
+ * Opens an existing Database Environment
+ *
+ * This function opens an existing Database Environment.
+ *
+ * A Database Environment is a collection of Databases, which are all stored
+ * in one physical file (or in-memory).
+ *
+ * Each Database in an Environment is identified by a positive 16bit
+ * value (except 0 and values at or above 0xf000).
+ * Databases in an Environment can be created with @ref ham_env_create_db
+ * or opened with @ref ham_env_open_db.
+ *
+ * Specify a URL instead of a filename (i.e.
+ * "ham://localhost:8080/customers.db") to access a remote hamsterdb Server.
+ *
+ * Also see the documentation @ref ham_env_create about Transactions, Recovery
+ * and the use of fsync.
+ *
+ * @param env A valid Environment handle
+ * @param filename The filename of the Environment file, or URL of a hamsterdb
+ * Server
+ * @param flags Optional flags for opening the Environment, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_READ_ONLY </li> Opens the file for reading only.
+ * Operations that need write access (i.e. @ref ham_db_insert) will
+ * return @ref HAM_WRITE_PROTECTED.
+ * <li>@ref HAM_ENABLE_FSYNC</li> Flushes all file handles after
+ * committing or aborting a Transaction using fsync(), fdatasync()
+ * or FlushFileBuffers(). This file has no effect
+ * if Transactions are disabled. Slows down performance but makes
+ * sure that all file handles and operating system caches are
+ * transferred to disk, thus providing a stronger durability.
+ * <li>@ref HAM_DISABLE_MMAP </li> Do not use memory mapped files for I/O.
+ * By default, hamsterdb checks if it can use mmap,
+ * since mmap is faster than read/write. For performance
+ * reasons, this flag should not be used.
+ * <li>@ref HAM_CACHE_UNLIMITED </li> Do not limit the cache. Nearly as
+ * fast as an In-Memory Database. Not allowed in combination
+ * with a limited cache size.
+ * <li>@ref HAM_ENABLE_TRANSACTIONS </li> Enables Transactions for this
+ * Environment. This flag imples @ref HAM_ENABLE_RECOVERY.
+ * <li>@ref HAM_ENABLE_RECOVERY </li> Enables logging/recovery for this
+ * Environment. Will return @ref HAM_NEED_RECOVERY, if the Environment
+ * is in an inconsistent state. Not allowed in combination
+ * with @ref HAM_IN_MEMORY.
+ * <li>@ref HAM_DISABLE_RECOVERY</li> Disables logging/recovery for this
+ * Environment.
+ * <li>@ref HAM_AUTO_RECOVERY </li> Automatically recover the Environment,
+ * if necessary. This flag implies @ref HAM_ENABLE_RECOVERY.
+ * <li>@ref HAM_FLUSH_WHEN_COMMITTED</li> Immediately flushes committed
+ * Transactions and writes them to the Btree. Disabled by default. If
+ * disabled then hamsterdb buffers committed Transactions and only starts
+ * flushing when too many Transactions were committed.
+ * </ul>
+ * @param param An array of ham_parameter_t structures. The following
+ * parameters are available:
+ * <ul>
+ * <li>@ref HAM_PARAM_CACHE_SIZE </li> The size of the Database cache,
+ * in bytes. The default size is defined in src/config.h
+ * as @a HAM_DEFAULT_CACHE_SIZE - usually 2MB
+ * <li>@ref HAM_PARAM_POSIX_FADVISE</li> Sets the "advice" for
+ * posix_fadvise(). Only on supported platforms. Allowed values are
+ * @ref HAM_POSIX_FADVICE_NORMAL (which is the default) or
+ * @ref HAM_POSIX_FADVICE_RANDOM.
+ * <li>@ref HAM_PARAM_FILE_SIZE_LIMIT</li> Sets a file size limit (in bytes).
+ * Disabled by default. If the limit is exceeded, API functions
+ * return @ref HAM_LIMITS_REACHED.
+ * <li>@ref HAM_PARAM_LOG_DIRECTORY</li> The path of the log file
+ * and the journal files; default is the same path as the database
+ * file. Ignored for remote Environments.
+ * <li>@ref HAM_PARAM_NETWORK_TIMEOUT_SEC</li> Timeout (in seconds) when
+ * waiting for data from a remote server. By default, no timeout is set.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success.
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL, an
+ * invalid combination of flags was specified
+ * @return @ref HAM_FILE_NOT_FOUND if the file does not exist
+ * @return @ref HAM_IO_ERROR if the file could not be opened or reading failed
+ * @return @ref HAM_INV_FILE_VERSION if the Environment version is not
+ * compatible with the library version.
+ * @return @ref HAM_OUT_OF_MEMORY if memory could not be allocated
+ * @return @ref HAM_WOULD_BLOCK if another process has locked the file
+ * @return @ref HAM_NEED_RECOVERY if the Database is in an inconsistent state
+ * @return @ref HAM_LOG_INV_FILE_HEADER if the logfile is corrupt
+ * @return @ref HAM_ENVIRONMENT_ALREADY_OPEN if @a env is already in use
+ * @return @ref HAM_NETWORK_ERROR if a remote server is not reachable
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_open(ham_env_t **env, const char *filename,
+ uint32_t flags, const ham_parameter_t *param);
+
+/**
+ * Retrieve the current value for a given Environment setting
+ *
+ * Only those values requested by the parameter array will be stored.
+ *
+ * The following parameters are supported:
+ * <ul>
+ * <li>HAM_PARAM_CACHE_SIZE</li> returns the cache size
+ * <li>HAM_PARAM_PAGE_SIZE</li> returns the page size
+ * <li>HAM_PARAM_MAX_DATABASES</li> returns the max. number of
+ * Databases of this Database's Environment
+ * <li>HAM_PARAM_FLAGS</li> returns the flags which were used to
+ * open or create this Database
+ * <li>HAM_PARAM_FILEMODE</li> returns the @a mode parameter which
+ * was specified when creating this Database
+ * <li>HAM_PARAM_FILENAME</li> returns the filename (the @a value
+ * of this parameter is a const char * pointer casted to a
+ * uint64_t variable)
+ * <li>@ref HAM_PARAM_LOG_DIRECTORY</li> The path of the log file
+ * and the journal files. Ignored for remote Environments.
+ * <li>@ref HAM_PARAM_JOURNAL_COMPRESSION</li> Returns the
+ * selected algorithm for journal compression, or 0 if compression
+ * is disabled
+ * </ul>
+ *
+ * @param env A valid Environment handle
+ * @param param An array of ham_parameter_t structures
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or
+ * @a param is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_get_parameters(ham_env_t *env, ham_parameter_t *param);
+
+/**
+ * Creates a new Database in a Database Environment
+ *
+ * An Environment can contain a (limited) amount of Databases; the exact
+ * limit depends on the page size and is above 600.
+ *
+ * Each Database in an Environment is identified by a positive 16bit
+ * value. 0 and values at or above 0xf000 are reserved.
+ *
+ * This function initializes the ham_db_t handle (the second parameter).
+ * When the handle is no longer in use, it should be closed with
+ * @ref ham_db_close. Alternatively, the Database handle is closed
+ * automatically if @ref ham_env_close is called with the flag
+ * @ref HAM_AUTO_CLEANUP.
+ *
+ * A Database can (and should) be configured and optimized for the data that
+ * is inserted. The data is described through flags and parameters. hamsterdb
+ * differentiates between several data characteristics, and offers predefined
+ * "types" to describe the keys. In general, the default key type
+ * (@ref HAM_TYPE_BINARY) is slower than the other types, and
+ * fixed-length binary keys (@ref HAM_TYPE_BINARY in combination with
+ * @ref HAM_PARAM_KEY_SIZE) is faster than variable-length binary
+ * keys. It is therefore recommended to always set the key size and record size,
+ * although it is not required.
+ *
+ * Internally, hamsterdb uses two different layouts ("default" and "pax)
+ * depending on the settings specified by the user. The "default" layout
+ * is enabled for variable-length keys or if duplicate keys are enabled.
+ * For fixed-length keys (without duplicates) the "pax" layout is chosen.
+ * The "pax" layout is more compact and usually faster.
+ *
+ * A word of warning regarding the use of fixed length binary keys
+ * (@ref HAM_TYPE_CUSTOM or @ref HAM_TYPE_BINARY in combination with
+ * @ref HAM_PARAM_KEY_SIZE): if your key size is too large, only few keys
+ * will fit in a Btree node. The Btree fanout will be very high, which will
+ * decrease performance. In such cases it might be better to NOT specify
+ * the key size; then hamsterdb will store keys as blobs if they are too large.
+ *
+ * See the Wiki documentation for <a href=
+ "https://github.com/cruppstahl/hamsterdb/wiki/Evaluating-and-Benchmarking">
+ * Evaluating and Benchmarking</a> on how to test different configurations and
+ * optimize for performance.
+ *
+ * The key type is set with @ref HAM_PARAM_KEY_TYPE and can have either
+ * of the following values:
+ *
+ * <ul>
+ * <li>HAM_TYPE_BINARY</li> This is the default key type: a binary blob.
+ * Internally, hamsterdb uses memcmp(3) for the sort order. Key size depends
+ * on @ref HAM_PARAM_KEY_SIZE and is unlimited (@ref HAM_KEY_SIZE_UNLIMITED)
+ * by default.
+ * <li>HAM_TYPE_CUSTOM</li> Similar to @ref HAM_TYPE_BINARY, but
+ * uses a callback function for the sort order. This function is supplied
+ * by the application with @sa ham_db_set_compare_func.
+ * <li>HAM_TYPE_UINT8</li> Key is a 8bit (1 byte) unsigned integer
+ * <li>HAM_TYPE_UINT16</li> Key is a 16bit (2 byte) unsigned integer
+ * <li>HAM_TYPE_UINT32</li> Key is a 32bit (4 byte) unsigned integer
+ * <li>HAM_TYPE_UINT64</li> Key is a 64bit (8 byte) unsigned integer
+ * <li>HAM_TYPE_REAL32</li> Key is a 32bit (4 byte) float
+ * <li>HAM_TYPE_REAL64</li> Key is a 64bit (8 byte) double
+ * </ul>
+ *
+ * If the key type is ommitted then @ref HAM_TYPE_BINARY is the default.
+ *
+ * If binary/custom keys are so big that they cannot be stored in the Btree,
+ * then the full key will be stored in an overflow area, which has
+ * performance implications when accessing such keys.
+ *
+ * In addition to the flags above, you can specify @a HAM_ENABLE_DUPLICATE_KEYS
+ * to insert duplicate keys, i.e. to model 1:n or n:m relationships.
+ *
+ * If the size of the records is always constant, then
+ * @ref HAM_PARAM_RECORD_SIZE should be used to specify this size. This allows
+ * hamsterdb to optimize the record storage, and small records will
+ * automatically be stored in the Btree's leaf nodes instead of a separately
+ * allocated blob, allowing faster access.
+ * A record size of 0 is valid and suited for boolean values ("key exists"
+ * vs "key doesn't exist"). The default record size is
+ * @ref HAM_RECORD_SIZE_UNLIMITED.
+ *
+ * @param env A valid Environment handle.
+ * @param db A valid Database handle, which will point to the created
+ * Database. To close the handle, use @ref ham_db_close.
+ * @param name The name of the Database. If a Database with this name
+ * already exists, the function will fail with
+ * @ref HAM_DATABASE_ALREADY_EXISTS. Database names from 0xf000 to
+ * 0xffff and 0 are reserved.
+ * @param flags Optional flags for creating the Database, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_ENABLE_DUPLICATE_KEYS </li> Enable duplicate keys for this
+ * Database. By default, duplicate keys are disabled.
+ * <li>@ref HAM_RECORD_NUMBER32 </li> Creates an "auto-increment" Database.
+ * Keys in Record Number Databases are automatically assigned an
+ * incrementing 32bit value. If key->data is not NULL
+ * (and key->flags is @ref HAM_KEY_USER_ALLOC), the value of the current
+ * key is returned in @a key. If key-data is NULL and key->size is 0,
+ * key->data is temporarily allocated by hamsterdb.
+ * <li>@ref HAM_RECORD_NUMBER64 </li> Creates an "auto-increment" Database.
+ * Keys in Record Number Databases are automatically assigned an
+ * incrementing 64bit value. If key->data is not NULL
+ * (and key->flags is @ref HAM_KEY_USER_ALLOC), the value of the current
+ * key is returned in @a key. If key-data is NULL and key->size is 0,
+ * key->data is temporarily allocated by hamsterdb.
+ * </ul>
+ *
+ * @param params An array of ham_parameter_t structures. The following
+ * parameters are available:
+ * <ul>
+ * <li>@ref HAM_PARAM_KEY_TYPE </li> The type of the keys in the B+Tree
+ * index. The default is @ref HAM_TYPE_BINARY. See above for more
+ * information.
+ * <li>@ref HAM_PARAM_KEY_SIZE </li> The (fixed) size of the keys in
+ * the B+Tree index; or @ref HAM_KEY_SIZE_UNLIMITED for unlimited and
+ * variable keys (this is the default).
+ * <li>@ref HAM_PARAM_RECORD_SIZE </li> The (fixed) size of the records;
+ * or @ref HAM_RECORD_SIZE_UNLIMITED if there was no fixed record size
+ * specified (this is the default).
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or an
+ * invalid combination of flags was specified
+ * @return @ref HAM_DATABASE_ALREADY_EXISTS if a Database with this @a name
+ * already exists in this Environment
+ * @return @ref HAM_OUT_OF_MEMORY if memory could not be allocated
+ * @return @ref HAM_LIMITS_REACHED if the maximum number of Databases per
+ * Environment was already created
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_create_db(ham_env_t *env, ham_db_t **db,
+ uint16_t name, uint32_t flags, const ham_parameter_t *params);
+
+/**
+ * Opens a Database in a Database Environment
+ *
+ * Each Database in an Environment is identified by a positive 16bit
+ * value (except 0 and values at or above 0xf000).
+ *
+ * This function initializes the ham_db_t handle (the second parameter).
+ * When the handle is no longer in use, it should be closed with
+ * @ref ham_db_close. Alternatively, the Database handle is closed
+ * automatically if @ref ham_env_close is called with the flag
+ * @ref HAM_AUTO_CLEANUP.
+ *
+ * @param env A valid Environment handle
+ * @param db A valid Database handle, which will point to the opened
+ * Database. To close the handle, use @see ham_db_close.
+ * @param name The name of the Database. If a Database with this name
+ * does not exist, the function will fail with
+ * @ref HAM_DATABASE_NOT_FOUND.
+ * @param flags Optional flags for opening the Database, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_READ_ONLY </li> Opens the Database for reading only.
+ * Operations that need write access (i.e. @ref ham_db_insert) will
+ * return @ref HAM_WRITE_PROTECTED.
+ * </ul>
+ * @param params Reserved; set to NULL
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or an
+ * invalid combination of flags was specified
+ * @return @ref HAM_DATABASE_NOT_FOUND if a Database with this @a name
+ * does not exist in this Environment.
+ * @return @ref HAM_DATABASE_ALREADY_OPEN if this Database was already
+ * opened
+ * @return @ref HAM_OUT_OF_MEMORY if memory could not be allocated
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_open_db(ham_env_t *env, ham_db_t **db,
+ uint16_t name, uint32_t flags, const ham_parameter_t *params);
+
+/**
+ * Renames a Database in an Environment.
+ *
+ * @param env A valid Environment handle.
+ * @param oldname The old name of the existing Database. If a Database
+ * with this name does not exist, the function will fail with
+ * @ref HAM_DATABASE_NOT_FOUND.
+ * @param newname The new name of this Database. If a Database
+ * with this name already exists, the function will fail with
+ * @ref HAM_DATABASE_ALREADY_EXISTS.
+ * @param flags Optional flags for renaming the Database, combined with
+ * bitwise OR; unused, set to 0.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or if
+ * the new Database name is reserved
+ * @return @ref HAM_DATABASE_NOT_FOUND if a Database with this @a name
+ * does not exist in this Environment
+ * @return @ref HAM_DATABASE_ALREADY_EXISTS if a Database with the new name
+ * already exists
+ * @return @ref HAM_OUT_OF_MEMORY if memory could not be allocated
+ * @return @ref HAM_NOT_READY if the Environment @a env was not initialized
+ * correctly (i.e. not yet opened or created)
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_rename_db(ham_env_t *env, uint16_t oldname,
+ uint16_t newname, uint32_t flags);
+
+/**
+ * Deletes a Database from an Environment
+ *
+ * @param env A valid Environment handle
+ * @param name The name of the Database to delete. If a Database
+ * with this name does not exist, the function will fail with
+ * @ref HAM_DATABASE_NOT_FOUND. If the Database was already opened,
+ * the function will fail with @ref HAM_DATABASE_ALREADY_OPEN.
+ * @param flags Optional flags for deleting the Database; unused, set to 0.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a env pointer is NULL or if
+ * the new Database name is reserved
+ * @return @ref HAM_DATABASE_NOT_FOUND if a Database with this @a name
+ * does not exist
+ * @return @ref HAM_DATABASE_ALREADY_OPEN if a Database with this name is
+ * still open
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_erase_db(ham_env_t *env, uint16_t name, uint32_t flags);
+
+/* internal flag - only flush committed transactions, not the btree pages */
+#define HAM_FLUSH_COMMITTED_TRANSACTIONS 1
+
+/**
+ * Flushes the Environment
+ *
+ * This function flushes the Environment caches and writes the whole file
+ * to disk. All Databases of this Environment are flushed as well.
+ *
+ * Since In-Memory Databases do not have a file on disk, the
+ * function will have no effect and will return @ref HAM_SUCCESS.
+ *
+ * @param env A valid Environment handle
+ * @param flags Optional flags for flushing; unused, set to 0
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_flush(ham_env_t *env, uint32_t flags);
+
+/* internal use only - don't lock mutex */
+#define HAM_DONT_LOCK 0xf0000000
+
+/**
+ * Returns the names of all Databases in an Environment
+ *
+ * This function returns the names of all Databases and the number of
+ * Databases in an Environment.
+ *
+ * The memory for @a names must be allocated by the user. @a count
+ * must be the size of @a names when calling the function, and will be
+ * the number of Databases when the function returns. The function returns
+ * @ref HAM_LIMITS_REACHED if @a names is not big enough; in this case, the
+ * caller should resize the array and call the function again.
+ *
+ * @param env A valid Environment handle
+ * @param names Pointer to an array for the Database names
+ * @param count Pointer to the size of the array; will be used to store the
+ * number of Databases when the function returns.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a env, @a names or @a count is NULL
+ * @return @ref HAM_LIMITS_REACHED if @a names is not large enough to hold
+ * all Database names
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_get_database_names(ham_env_t *env, uint16_t *names,
+ uint32_t *count);
+
+/**
+ * Closes the Database Environment
+ *
+ * This function closes the Database Environment. It also frees the
+ * memory resources allocated in the @a env handle, and tries to truncate
+ * the file (see below).
+ *
+ * If the flag @ref HAM_AUTO_CLEANUP is specified, hamsterdb automatically
+ * calls @ref ham_db_close with flag @ref HAM_AUTO_CLEANUP on all open
+ * Databases (which closes all open Databases and their Cursors). This
+ * invalidates the ham_db_t and ham_cursor_t handles!
+ *
+ * If the flag is not specified, the application must close all Database
+ * handles with @ref ham_db_close to prevent memory leaks.
+ *
+ * This function also aborts all Transactions which were not yet committed,
+ * and therefore renders all Transaction handles invalid. If the flag
+ * @ref HAM_TXN_AUTO_COMMIT is specified, all Transactions will be committed.
+ *
+ * This function also tries to truncate the file and "cut off" unused space
+ * at the end of the file to reduce the file size. This feature is disabled
+ * on Win32 if memory mapped I/O is used (see @ref HAM_DISABLE_MMAP).
+ *
+ * @param env A valid Environment handle
+ * @param flags Optional flags for closing the handle. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_AUTO_CLEANUP. Calls @ref ham_db_close with the flag
+ * @ref HAM_AUTO_CLEANUP on every open Database
+ * <li>@ref HAM_TXN_AUTO_COMMIT. Automatically commit all open
+ * Transactions
+ * <li>@ref HAM_TXN_AUTO_ABORT. Automatically abort all open
+ * Transactions; this is the default behaviour
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a env is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_close(ham_env_t *env, uint32_t flags);
+
+/**
+ * @}
+ */
+
+
+/**
+ * @defgroup ham_txn hamsterdb Transaction Functions
+ * @{
+ */
+
+/**
+ * The hamsterdb Transaction structure
+ *
+ * This structure is allocated with @ref ham_txn_begin and deleted with
+ * @ref ham_txn_commit or @ref ham_txn_abort.
+ */
+struct ham_txn_t;
+typedef struct ham_txn_t ham_txn_t;
+
+/**
+ * Begins a new Transaction
+ *
+ * A Transaction is an atomic sequence of Database operations. With @ref
+ * ham_txn_begin such a new sequence is started. To write all operations of this
+ * sequence to the Database use @ref ham_txn_commit. To abort and cancel
+ * this sequence use @ref ham_txn_abort.
+ *
+ * In order to use Transactions, the Environment has to be created or
+ * opened with the flag @ref HAM_ENABLE_TRANSACTIONS.
+ *
+ * You can create as many Transactions as you want (older versions of
+ * hamsterdb did not allow to create more than one Transaction in parallel).
+ *
+ * @param txn Pointer to a pointer of a Transaction structure
+ * @param env A valid Environment handle
+ * @param name An optional Transaction name
+ * @param reserved A reserved pointer; always set to NULL
+ * @param flags Optional flags for beginning the Transaction, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_TXN_READ_ONLY </li> This Transaction is read-only and
+ * will not modify the Database.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_OUT_OF_MEMORY if memory allocation failed
+ */
+HAM_EXPORT ham_status_t
+ham_txn_begin(ham_txn_t **txn, ham_env_t *env, const char *name,
+ void *reserved, uint32_t flags);
+
+/** Flag for @ref ham_txn_begin */
+#define HAM_TXN_READ_ONLY 1
+
+/* Internal flag for @ref ham_txn_begin */
+#define HAM_TXN_TEMPORARY 2
+
+/**
+ * Retrieves the Transaction name
+ *
+ * @returns NULL if the name was not assigned or if @a txn is invalid
+ */
+HAM_EXPORT const char *
+ham_txn_get_name(ham_txn_t *txn);
+
+/**
+ * Commits a Transaction
+ *
+ * This function applies the sequence of Database operations.
+ *
+ * Note that the function will fail with @ref HAM_CURSOR_STILL_OPEN if
+ * a Cursor was attached to this Transaction (with @ref ham_cursor_create
+ * or @ref ham_cursor_clone), and the Cursor was not closed.
+ *
+ * @param txn Pointer to a Transaction structure
+ * @param flags Optional flags for committing the Transaction, combined with
+ * bitwise OR. Unused, set to 0.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_IO_ERROR if writing to the file failed
+ * @return @ref HAM_CURSOR_STILL_OPEN if there are Cursors attached to this
+ * Transaction
+ */
+HAM_EXPORT ham_status_t
+ham_txn_commit(ham_txn_t *txn, uint32_t flags);
+
+/**
+ * Aborts a Transaction
+ *
+ * This function aborts (= cancels) the sequence of Database operations.
+ *
+ * Note that the function will fail with @ref HAM_CURSOR_STILL_OPEN if
+ * a Cursor was attached to this Transaction (with @ref ham_cursor_create
+ * or @ref ham_cursor_clone), and the Cursor was not closed.
+ *
+ * @param txn Pointer to a Transaction structure
+ * @param flags Optional flags for aborting the Transaction, combined with
+ * bitwise OR. Unused, set to 0.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_IO_ERROR if writing to the Database file or logfile failed
+ * @return @ref HAM_CURSOR_STILL_OPEN if there are Cursors attached to this
+ * Transaction
+ */
+HAM_EXPORT ham_status_t
+ham_txn_abort(ham_txn_t *txn, uint32_t flags);
+
+/**
+ * @}
+ */
+
+
+/**
+ * @defgroup ham_database hamsterdb Database Functions
+ * @{
+ */
+
+/** Flag for @ref ham_env_open, @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_ENABLE_FSYNC 0x00000001
+
+/* unused 0x00000002 */
+
+/** Flag for @ref ham_env_open, @ref ham_env_open_db.
+ * This flag is non persistent. */
+#define HAM_READ_ONLY 0x00000004
+
+/* unused 0x00000008 */
+
+/* unused 0x00000010 */
+
+/* reserved 0x00000020 */
+
+/* unused 0x00000040 */
+
+/** Flag for @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_IN_MEMORY 0x00000080
+
+/* reserved: DB_USE_MMAP (not persistent) 0x00000100 */
+
+/** Flag for @ref ham_env_open, @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_DISABLE_MMAP 0x00000200
+
+/* deprecated */
+#define HAM_RECORD_NUMBER HAM_RECORD_NUMBER64
+
+/** Flag for @ref ham_env_create_db.
+ * This flag is persisted in the Database. */
+#define HAM_RECORD_NUMBER32 0x00001000
+
+/** Flag for @ref ham_env_create_db.
+ * This flag is persisted in the Database. */
+#define HAM_RECORD_NUMBER64 0x00002000
+
+/** Flag for @ref ham_env_create_db.
+ * This flag is persisted in the Database. */
+#define HAM_ENABLE_DUPLICATE_KEYS 0x00004000
+/* deprecated */
+#define HAM_ENABLE_DUPLICATES HAM_ENABLE_DUPLICATE_KEYS
+
+/** Flag for @ref ham_env_create, @ref ham_env_open.
+ * This flag is non persistent. */
+#define HAM_ENABLE_RECOVERY 0x00008000
+
+/** Flag for @ref ham_env_open.
+ * This flag is non persistent. */
+#define HAM_AUTO_RECOVERY 0x00010000
+
+/** Flag for @ref ham_env_create, @ref ham_env_open.
+ * This flag is non persistent. */
+#define HAM_ENABLE_TRANSACTIONS 0x00020000
+
+/** Flag for @ref ham_env_open, @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_CACHE_UNLIMITED 0x00040000
+
+/** Flag for @ref ham_env_create, @ref ham_env_open.
+ * This flag is non persistent. */
+#define HAM_DISABLE_RECOVERY 0x00080000
+
+/* internal use only! (not persistent) */
+#define HAM_IS_REMOTE_INTERNAL 0x00200000
+
+/* internal use only! (not persistent) */
+#define HAM_DISABLE_RECLAIM_INTERNAL 0x00400000
+
+/* internal use only! (persistent) */
+#define HAM_FORCE_RECORDS_INLINE 0x00800000
+
+/** Flag for @ref ham_env_open, @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_FLUSH_WHEN_COMMITTED 0x01000000
+
+/** Pro: Flag for @ref ham_env_open, @ref ham_env_create.
+ * This flag is non persistent. */
+#define HAM_ENABLE_CRC32 0x02000000
+
+/**
+ * Returns the last error code
+ *
+ * @note This API is deprecated! It will be removed in one of the
+ * next versions.
+ *
+ * @param db A valid Database handle
+ *
+ * @return The last error code which was returned by one of the
+ * hamsterdb API functions. Use @ref ham_strerror to translate
+ * this code to a descriptive string
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_get_error(ham_db_t *db);
+
+/**
+ * Typedef for a key comparison function
+ *
+ * @remark This function compares two index keys. It returns -1, if @a lhs
+ * ("left-hand side", the parameter on the left side) is smaller than
+ * @a rhs ("right-hand side"), 0 if both keys are equal, and 1 if @a lhs
+ * is larger than @a rhs.
+ */
+typedef int HAM_CALLCONV (*ham_compare_func_t)(ham_db_t *db,
+ const uint8_t *lhs, uint32_t lhs_length,
+ const uint8_t *rhs, uint32_t rhs_length);
+
+/**
+ * Sets the comparison function
+ *
+ * The comparison function compares two index keys. It returns -1 if the
+ * first key is smaller, +1 if the second key is smaller or 0 if both
+ * keys are equal.
+ *
+ * Supplying a comparison function is only allowed for the key type
+ * @ref HAM_TYPE_CUSTOM; see the documentation of @sa ham_env_create_db
+ * for more information.
+ *
+ * @param db A valid Database handle
+ * @param foo A pointer to the compare function
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ * @return @ref HAM_INV_PARAMETER if the database's key type was not
+ * specified as @ref HAM_TYPE_CUSTOM
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_set_compare_func(ham_db_t *db, ham_compare_func_t foo);
+
+/**
+ * Searches an item in the Database
+ *
+ * This function searches the Database for @a key. If the key
+ * is found, @a record will receive the record of this item and
+ * @ref HAM_SUCCESS is returned. If the key is not found, the function
+ * returns @ref HAM_KEY_NOT_FOUND.
+ *
+ * A ham_record_t structure should be initialized with
+ * zeroes before it is being used. This can be done with the C library
+ * routines memset(3) or bzero(2).
+ *
+ * If the function completes successfully, the @a record pointer is
+ * initialized with the size of the record (in @a record.size) and the
+ * actual record data (in @a record.data). If the record is empty,
+ * @a size is 0 and @a data points to NULL.
+ *
+ * The @a data pointer is a temporary pointer and will be overwritten
+ * by subsequent hamsterdb API calls using the same Transaction
+ * (or, if Transactions are disabled, using the same Database).
+ * You can alter this behaviour by allocating the @a data pointer in
+ * the application and setting @a record.flags to @ref HAM_RECORD_USER_ALLOC.
+ * Make sure that the allocated buffer is large enough.
+ *
+ * When specifying @ref HAM_DIRECT_ACCESS, the @a data pointer will point
+ * directly to the record that is stored in hamsterdb; the data can be modified,
+ * but the pointer must not be reallocated or freed. The flag @ref
+ * HAM_DIRECT_ACCESS is only allowed in In-Memory Databases and not if
+ * Transactions are enabled.
+ *
+ * @ref ham_db_find can not search for duplicate keys. If @a key has
+ * multiple duplicates, only the first duplicate is returned.
+ *
+ * You can read only portions of the record by specifying the flag
+ * @ref HAM_PARTIAL. In this case, hamsterdb will read
+ * <b>record->partial_size</b> bytes of the record data at offset
+ * <b>record->partial_offset</b>. If necessary, the record data will
+ * be limited to the original record size. The number of actually read
+ * bytes is returned in <b>record->partial_size</b>. The original size of
+ * the record is stored in <b>record->size</b>.
+ *
+ * @ref HAM_PARTIAL is not allowed if record->size is <= 8 or if Transactions
+ * are enabled. In such a case, @ref HAM_INV_PARAMETER is returned.
+ *
+ * If Transactions are enabled (see @ref HAM_ENABLE_TRANSACTIONS) and
+ * @a txn is NULL then hamsterdb will create a temporary Transaction.
+ * When moving the Cursor, and the new key is currently modified in an
+ * active Transaction (one that is not yet committed or aborted) then
+ * hamsterdb will skip this key and move to the next/previous one. However if
+ * @a flags are 0 (and the Cursor is not moved), and @a key or @a rec
+ * is NOT NULL, then hamsterdb will return error @ref HAM_TXN_CONFLICT.
+ *
+ * @param db A valid Database handle
+ * @param txn A Transaction handle, or NULL
+ * @param key The key of the item
+ * @param record The record of the item
+ * @param flags Optional flags for searching, which can be combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_FIND_LT_MATCH </li> Cursor 'find' flag 'Less Than': the
+ * cursor is moved to point at the last record which' key
+ * is less than the specified key. When such a record cannot
+ * be located, an error is returned.
+ * <li>@ref HAM_FIND_GT_MATCH </li> Cursor 'find' flag 'Greater Than':
+ * the cursor is moved to point at the first record which' key is
+ * larger than the specified key. When such a record cannot be
+ * located, an error is returned.
+ * <li>@ref HAM_FIND_LEQ_MATCH </li> Cursor 'find' flag 'Less or EQual':
+ * the cursor is moved to point at the record which' key matches
+ * the specified key and when such a record is not available
+ * the cursor is moved to point at the last record which' key
+ * is less than the specified key. When such a record cannot be
+ * located, an error is returned.
+ * <li>@ref HAM_FIND_GEQ_MATCH </li> Cursor 'find' flag 'Greater or
+ * Equal': the cursor is moved to point at the record which' key
+ * matches the specified key and when such a record
+ * is not available the cursor is moved to point at the first
+ * record which' key is larger than the specified key.
+ * When such a record cannot be located, an error is returned.
+ * <li>@ref HAM_FIND_NEAR_MATCH </li> Cursor 'find' flag 'Any Near Or
+ * Equal': the cursor is moved to point at the record which'
+ * key matches the specified key and when such a record is
+ * not available the cursor is moved to point at either the
+ * last record which' key is less than the specified key or
+ * the first record which' key is larger than the specified
+ * key, whichever of these records is located first.
+ * When such records cannot be located, an error is returned.
+ * <li>@ref HAM_DIRECT_ACCESS </li> Only for In-Memory Databases
+ * and not if Transactions are enabled!
+ * Returns a direct pointer to the data blob stored by the
+ * hamsterdb engine. This pointer must not be resized or freed,
+ * but the data in this memory can be modified.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db, @a key or @a record is NULL
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS is specified,
+ * but the Database is not an In-Memory Database.
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS and
+ * @a HAM_ENABLE_TRANSACTIONS were both specified.
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is set but record
+ * size is <= 8 or Transactions are enabled
+ * @return @ref HAM_KEY_NOT_FOUND if the @a key does not exist
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ *
+ * @remark When either or both @ref HAM_FIND_LT_MATCH and/or @ref
+ * HAM_FIND_GT_MATCH have been specified as flags, the @a key structure
+ * will be overwritten when an approximate match was found: the
+ * @a key and @a record structures will then point at the located
+ * @a key and @a record. In this case the caller should ensure @a key
+ * points at a structure which must adhere to the same restrictions
+ * and conditions as specified for @ref ham_cursor_move(...,
+ * HAM_CURSOR_NEXT).
+ *
+ * @sa HAM_RECORD_USER_ALLOC
+ * @sa HAM_KEY_USER_ALLOC
+ * @sa ham_record_t
+ * @sa ham_key_t
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_find(ham_db_t *db, ham_txn_t *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+/**
+ * Inserts a Database item
+ *
+ * This function inserts a key/record pair as a new Database item.
+ *
+ * If the key already exists in the Database, error @ref HAM_DUPLICATE_KEY
+ * is returned.
+ *
+ * If you wish to overwrite an existing entry specify the
+ * flag @ref HAM_OVERWRITE.
+ *
+ * You can write only portions of the record by specifying the flag
+ * @ref HAM_PARTIAL. In this case, hamsterdb will write <b>partial_size</b>
+ * bytes of the record data at offset <b>partial_offset</b>. The full record
+ * size will always be given in <b>record->size</b>! If
+ * partial_size+partial_offset exceed record->size then partial_size will
+ * be limited. To shrink or grow the record, adjust record->size.
+ * @ref HAM_PARTIAL automatically overwrites existing records.
+ * Gaps will be filled with null-bytes if the record did not yet exist.
+ *
+ * @ref HAM_PARTIAL is not allowed if record->size is <= 8 or if Transactions
+ * are enabled. In such a case, @ref HAM_INV_PARAMETER is returned.
+ *
+ * If you wish to insert a duplicate key specify the flag @ref HAM_DUPLICATE.
+ * (Note that the Database has to be created with @ref HAM_ENABLE_DUPLICATE_KEYS
+ * in order to use duplicate keys.)
+ * The duplicate key is inserted after all other duplicate keys (see
+ * @ref HAM_DUPLICATE_INSERT_LAST).
+ *
+ * Record Number Databases (created with @ref HAM_RECORD_NUMBER32 or
+ * @ref HAM_RECORD_NUMBER64) expect either an empty @a key (with a size of
+ * 0 and data pointing to NULL), or a user-supplied key (with key.flag
+ * @ref HAM_KEY_USER_ALLOC and a valid data pointer).
+ * If key.size is 0 and key.data is NULL, hamsterdb will temporarily
+ * allocate memory for key->data, which will then point to an 4-byte (or 8-byte)
+ * unsigned integer.
+ *
+ * For very fast sequential inserts please use @ref ham_cursor_insert in
+ * combination with the flag @ref HAM_HINT_APPEND.
+ *
+ * @param db A valid Database handle
+ * @param txn A Transaction handle, or NULL
+ * @param key The key of the new item
+ * @param record The record of the new item
+ * @param flags Optional flags for inserting. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_OVERWRITE. If the @a key already exists, the record is
+ * overwritten. Otherwise, the key is inserted. Flag is not
+ * allowed in combination with @ref HAM_DUPLICATE.
+ * <li>@ref HAM_DUPLICATE. If the @a key already exists, a duplicate
+ * key is inserted. The key is inserted before the already
+ * existing key, or according to the sort order. Flag is not
+ * allowed in combination with @ref HAM_OVERWRITE.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db, @a key or @a record is NULL
+ * @return @ref HAM_INV_PARAMETER if the Database is a Record Number Database
+ * and the key is invalid (see above)
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is set but record
+ * size is <= 8 or Transactions are enabled
+ * @return @ref HAM_INV_PARAMETER if the flags @ref HAM_OVERWRITE <b>and</b>
+ * @ref HAM_DUPLICATE were specified, or if @ref HAM_DUPLICATE
+ * was specified, but the Database was not created with
+ * flag @ref HAM_ENABLE_DUPLICATE_KEYS.
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is specified and
+ * record->partial_offset+record->partial_size exceeds the
+ * record->size
+ * @return @ref HAM_WRITE_PROTECTED if you tried to insert a key in a read-only
+ * Database
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ * @return @ref HAM_INV_KEY_SIZE if the key size is larger than the
+ * @a HAM_PARAMETER_KEY_SIZE parameter specified for
+ * @ref ham_env_create_db
+ * OR if the key's size is greater than the Btree key size (see
+ * @ref HAM_PARAM_KEY_SIZE).
+ * @return @ref HAM_INV_RECORD_SIZE if the record size is different from
+ * the one specified with @a HAM_PARAM_RECORD_SIZE
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_insert(ham_db_t *db, ham_txn_t *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+/**
+ * Flag for @ref ham_db_insert and @ref ham_cursor_insert
+ *
+ * When specified with @ref ham_db_insert and in case a key
+ * is specified which stores duplicates in the Database, the first
+ * duplicate record will be overwritten.
+ *
+ * When used with @ref ham_cursor_insert and assuming the same
+ * conditions, the duplicate currently referenced by the Cursor
+ * will be overwritten.
+*/
+#define HAM_OVERWRITE 0x0001
+
+/** Flag for @ref ham_db_insert and @ref ham_cursor_insert */
+#define HAM_DUPLICATE 0x0002
+
+/** Flag for @ref ham_cursor_insert */
+#define HAM_DUPLICATE_INSERT_BEFORE 0x0004
+
+/** Flag for @ref ham_cursor_insert */
+#define HAM_DUPLICATE_INSERT_AFTER 0x0008
+
+/** Flag for @ref ham_cursor_insert */
+#define HAM_DUPLICATE_INSERT_FIRST 0x0010
+
+/** Flag for @ref ham_cursor_insert */
+#define HAM_DUPLICATE_INSERT_LAST 0x0020
+
+/** Flag for @ref ham_db_find, @ref ham_cursor_find, @ref ham_cursor_move */
+#define HAM_DIRECT_ACCESS 0x0040
+
+/** Flag for @ref ham_db_insert, @ref ham_cursor_insert, @ref ham_db_find,
+ * @ref ham_cursor_find, @ref ham_cursor_move */
+#define HAM_PARTIAL 0x0080
+
+/* Internal flag for @ref ham_db_find, @ref ham_cursor_find,
+ * @ref ham_cursor_move */
+#define HAM_FORCE_DEEP_COPY 0x0100
+
+/**
+ * Flag for @ref ham_cursor_insert
+ *
+ * Mutually exclusive with flag @ref HAM_HINT_PREPEND.
+ *
+ * Hints the hamsterdb engine that the current key will
+ * compare as @e larger than any key already existing in the Database.
+ * The hamsterdb engine will verify this postulation and when found not
+ * to be true, will revert to a regular insert operation
+ * as if this flag was not specified. The incurred cost then is only one
+ * additional key comparison.
+ */
+#define HAM_HINT_APPEND 0x00080000
+
+/**
+ * Flag for @ref ham_cursor_insert
+ *
+ * Mutually exclusive with flag @ref HAM_HINT_APPEND.
+ *
+ * Hints the hamsterdb engine that the current key will
+ * compare as @e smaller than any key already existing in the Database.
+ * The hamsterdb engine will verify this postulation and when found not
+ * to be true, will revert to a regular insert operation
+ * as if this flag was not specified. The incurred cost then is only one
+ * additional key comparison.
+ */
+#define HAM_HINT_PREPEND 0x00100000
+
+/**
+ * Flag mask to extract the common hint flags from a find/move/insert/erase
+ * flag value.
+ */
+#define HAM_HINTS_MASK 0x001F0000
+
+/**
+ * Erases a Database item
+ *
+ * This function erases a Database item. If the item @a key
+ * does not exist, @ref HAM_KEY_NOT_FOUND is returned.
+ *
+ * Note that ham_db_erase can not erase a single duplicate key. If the key
+ * has multiple duplicates, all duplicates of this key will be erased. Use
+ * @ref ham_cursor_erase to erase a specific duplicate key.
+ *
+ * @param db A valid Database handle
+ * @param txn A Transaction handle, or NULL
+ * @param key The key to delete
+ * @param flags Optional flags for erasing; unused, set to 0
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db or @a key is NULL
+ * @return @ref HAM_WRITE_PROTECTED if you tried to erase a key from a read-only
+ * Database
+ * @return @ref HAM_KEY_NOT_FOUND if @a key was not found
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_erase(ham_db_t *db, ham_txn_t *txn, ham_key_t *key, uint32_t flags);
+
+/* internal flag for ham_db_erase() - do not use */
+#define HAM_ERASE_ALL_DUPLICATES 1
+
+/**
+ * Returns the number of keys stored in the Database
+ *
+ * You can specify the @ref HAM_SKIP_DUPLICATES if you do now want
+ * to include any duplicates in the count. This will also speed up the
+ * counting.
+ *
+ * @param db A valid Database handle
+ * @param txn A Transaction handle, or NULL
+ * @param flags Optional flags:
+ * <ul>
+ * <li>@ref HAM_SKIP_DUPLICATES. Excludes any duplicates from
+ * the count
+ * </ul>
+ * @param keycount A reference to a variable which will receive
+ * the calculated key count per page
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db or @a keycount is NULL or when
+ * @a flags contains an invalid flag set
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_get_key_count(ham_db_t *db, ham_txn_t *txn, uint32_t flags,
+ uint64_t *keycount);
+
+/**
+ * Retrieve the current value for a given Database setting
+ *
+ * Only those values requested by the parameter array will be stored.
+ *
+ * The following parameters are supported:
+ * <ul>
+ * <li>HAM_PARAM_FLAGS</li> returns the flags which were used to
+ * open or create this Database
+ * <li>HAM_PARAM_DATABASE_NAME</li> returns the Database name
+ * <li>HAM_PARAM_KEY_TYPE</li> returns the Btree key type
+ * <li>HAM_PARAM_KEY_SIZE</li> returns the Btree key size
+ * or @ref HAM_KEY_SIZE_UNLIMITED if there was no fixed key size
+ * specified.
+ * <li>HAM_PARAM_RECORD_SIZE</li> returns the record size,
+ * or @ref HAM_RECORD_SIZE_UNLIMITED if there was no fixed record size
+ * specified.
+ * <li>HAM_PARAM_MAX_KEYS_PER_PAGE</li> returns the maximum number
+ * of keys per page. This number is precise if the key size is fixed
+ * and duplicates are disabled; otherwise it's an estimate.
+ * <li>@ref HAM_PARAM_RECORD_COMPRESSION</li> Returns the
+ * selected algorithm for record compression, or 0 if compression
+ * is disabled
+ * <li>@ref HAM_PARAM_KEY_COMPRESSION</li> Returns the
+ * selected algorithm for key compression, or 0 if compression
+ * is disabled
+ * </ul>
+ *
+ * @param db A valid Database handle
+ * @param param An array of ham_parameter_t structures
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if the @a db pointer is NULL or
+ * @a param is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_get_parameters(ham_db_t *db, ham_parameter_t *param);
+
+/** Parameter name for @ref ham_env_open, @ref ham_env_create;
+ * Journal files are switched whenever the number of new Transactions exceeds
+ * this threshold. */
+#define HAM_PARAM_JOURNAL_SWITCH_THRESHOLD 0x00001
+
+/** Parameter name for @ref ham_env_open, @ref ham_env_create;
+ * sets the cache size */
+#define HAM_PARAM_CACHE_SIZE 0x00000100
+/* deprecated */
+#define HAM_PARAM_CACHESIZE HAM_PARAM_CACHE_SIZE
+
+/** Parameter name for @ref ham_env_create; sets the page size */
+#define HAM_PARAM_PAGE_SIZE 0x00000101
+/* deprecated */
+#define HAM_PARAM_PAGESIZE HAM_PARAM_PAGE_SIZE
+
+/** Parameter name for @ref ham_env_create_db; sets the key size */
+#define HAM_PARAM_KEY_SIZE 0x00000102
+/* deprecated */
+#define HAM_PARAM_KEYSIZE HAM_PARAM_KEY_SIZE
+
+/** Parameter name for @ref ham_env_get_parameters; retrieves the number
+ * of maximum Databases */
+#define HAM_PARAM_MAX_DATABASES 0x00000103
+
+/** Parameter name for @ref ham_env_create_db; sets the key type */
+#define HAM_PARAM_KEY_TYPE 0x00000104
+
+/** Parameter name for @ref ham_env_open, @ref ham_env_create;
+ * sets the path of the log files */
+#define HAM_PARAM_LOG_DIRECTORY 0x00000105
+
+/** hamsterdb pro: Parameter name for @ref ham_env_open, @ref ham_env_create;
+ * sets the AES encryption key */
+#define HAM_PARAM_ENCRYPTION_KEY 0x00000106
+
+/** Parameter name for @ref ham_env_open, @ref ham_env_create;
+ * sets the network timeout (in seconds) */
+#define HAM_PARAM_NETWORK_TIMEOUT_SEC 0x00000107
+
+/** Parameter name for @ref ham_env_create_db; sets the key size */
+#define HAM_PARAM_RECORD_SIZE 0x00000108
+
+/** Parameter name for @ref ham_env_create, @ref ham_env_open; sets a
+ * limit for the file size (in bytes) */
+#define HAM_PARAM_FILE_SIZE_LIMIT 0x00000109
+
+/** Parameter name for @ref ham_env_create, @ref ham_env_open; sets the
+ * parameter for posix_fadvise() */
+#define HAM_PARAM_POSIX_FADVISE 0x00000110
+
+/** Value for @ref HAM_PARAM_POSIX_FADVISE */
+#define HAM_POSIX_FADVICE_NORMAL 0
+
+/** Value for @ref HAM_PARAM_POSIX_FADVISE */
+#define HAM_POSIX_FADVICE_RANDOM 1
+
+/** Value for unlimited record sizes */
+#define HAM_RECORD_SIZE_UNLIMITED ((uint32_t)-1)
+
+/** Value for unlimited key sizes */
+#define HAM_KEY_SIZE_UNLIMITED ((uint16_t)-1)
+
+/** Retrieves the Database/Environment flags as were specified at the time of
+ * @ref ham_env_create/@ref ham_env_open invocation. */
+#define HAM_PARAM_FLAGS 0x00000200
+
+/** Retrieves the filesystem file access mode as was specified at the time
+ * of @ref ham_env_create/@ref ham_env_open invocation. */
+#define HAM_PARAM_FILEMODE 0x00000201
+
+/**
+ * Return a <code>const char *</code> pointer to the current
+ * Environment/Database file name in the @ref uint64_t value
+ * member, when the Database is actually stored on disc.
+ *
+ * In-memory Databases will return a NULL (0) pointer instead.
+ */
+#define HAM_PARAM_FILENAME 0x00000202
+
+/**
+ * Retrieve the Database 'name' number of this @ref ham_db_t Database within
+ * the current @ref ham_env_t Environment.
+*/
+#define HAM_PARAM_DATABASE_NAME 0x00000203
+
+/**
+ * Retrieve the maximum number of keys per page; this number depends on the
+ * currently active page and key sizes. Can be an estimate if keys do not
+ * have constant sizes or if duplicate keys are used.
+ */
+#define HAM_PARAM_MAX_KEYS_PER_PAGE 0x00000204
+
+/**
+ * hamsterdb pro: Parameter name for @ref ham_env_create, @ref ham_env_open;
+ * enables compression for the journal.
+ */
+#define HAM_PARAM_JOURNAL_COMPRESSION 0x00001000
+
+/**
+ * hamsterdb pro: Parameter name for @ref ham_env_create_db,
+ * @ref ham_env_open_db; enables compression for the records of
+ * a Database.
+ */
+#define HAM_PARAM_RECORD_COMPRESSION 0x00001001
+
+/**
+ * hamsterdb pro: Parameter name for @ref ham_env_create_db,
+ * @ref ham_env_open_db; enables compression for the records of
+ * a Database.
+ */
+#define HAM_PARAM_KEY_COMPRESSION 0x00001002
+
+/** hamsterdb pro: helper macro for disabling compression */
+#define HAM_COMPRESSOR_NONE 0
+
+/**
+ * hamsterdb pro: selects zlib compression
+ * http://www.zlib.net/
+ */
+#define HAM_COMPRESSOR_ZLIB 1
+
+/**
+ * hamsterdb pro: selects google snappy compression
+ * http://code.google.com/p/snappy
+ */
+#define HAM_COMPRESSOR_SNAPPY 2
+
+/**
+ * hamsterdb pro: selects lzf compression
+ * http://oldhome.schmorp.de/marc/liblzf.html
+ */
+#define HAM_COMPRESSOR_LZF 3
+
+/**
+ * hamsterdb pro: selects lzo compression
+ * http://www.oberhumer.com/opensource/lzo
+ */
+#define HAM_COMPRESSOR_LZO 4
+
+/**
+ * Retrieves the Environment handle of a Database
+ *
+ * @param db A valid Database handle
+ *
+ * @return The Environment handle
+ */
+HAM_EXPORT ham_env_t *HAM_CALLCONV
+ham_db_get_env(ham_db_t *db);
+
+/**
+ * Returns the kind of key match which produced this key as it was
+ * returned by one of the @ref ham_db_find() and @ref ham_cursor_find().
+ *
+ * This routine assumes the key was passed back by one of the @ref ham_db_find
+ * and @ref ham_cursor_find functions and not used by any other hamsterdb
+ * functions after that.
+ *
+ * As such, this function produces an answer akin to the 'sign' of the
+ * specified key as it was returned by the find operation.
+ *
+ * @param key A valid key
+ *
+ * @return 1 (greater than) or -1 (less than) when the given key is an
+ * approximate result / zero (0) otherwise. Specifically:
+ * <ul>
+ * <li>+1 when the key is greater than the item searched for (key
+ * was a GT match)
+ * <li>-1 when the key is less than the item searched for (key was
+ * a LT match)
+ * <li>zero (0) otherwise (key was an EQ (EXACT) match)
+ * </ul>
+ */
+HAM_EXPORT int HAM_CALLCONV
+ham_key_get_approximate_match_type(ham_key_t *key);
+
+/**
+ * Closes the Database
+ *
+ * This function flushes the Database and then closes the file handle.
+ * It also free the memory resources allocated in the @a db handle.
+ *
+ * If the flag @ref HAM_AUTO_CLEANUP is specified, hamsterdb automatically
+ * calls @ref ham_cursor_close on all open Cursors. This invalidates the
+ * ham_cursor_t handle!
+ *
+ * If the flag is not specified, the application must close all Database
+ * Cursors with @ref ham_cursor_close to prevent memory leaks.
+ *
+ * This function also aborts all Transactions which were not yet committed,
+ * and therefore renders all Transaction handles invalid. If the flag
+ * @ref HAM_TXN_AUTO_COMMIT is specified, all Transactions will be committed.
+ *
+ * @param db A valid Database handle
+ * @param flags Optional flags for closing the Database. Possible values are:
+ * <ul>
+ * <li>@ref HAM_AUTO_CLEANUP. Automatically closes all open Cursors
+ * <li>@ref HAM_TXN_AUTO_COMMIT. Automatically commit all open
+ * Transactions
+ * <li>@ref HAM_TXN_AUTO_ABORT. Automatically abort all open
+ * Transactions; this is the default behaviour
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db is NULL
+ * @return @ref HAM_CURSOR_STILL_OPEN if not all Cursors of this Database
+ * were closed, and @ref HAM_AUTO_CLEANUP was not specified
+ * @return @ref HAM_TXN_STILL_OPEN if this Database is modified by a
+ * currently active Transaction
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_close(ham_db_t *db, uint32_t flags);
+
+/** Flag for @ref ham_db_close, @ref ham_env_close */
+#define HAM_AUTO_CLEANUP 1
+
+/** @internal (Internal) flag for @ref ham_db_close, @ref ham_env_close */
+#define HAM_DONT_CLEAR_LOG 2
+
+/** Automatically abort all open Transactions (the default) */
+#define HAM_TXN_AUTO_ABORT 4
+
+/** Automatically commit all open Transactions */
+#define HAM_TXN_AUTO_COMMIT 8
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup ham_cursor hamsterdb Cursor Functions
+ * @{
+ */
+
+/**
+ * Creates a Database Cursor
+ *
+ * Creates a new Database Cursor. Cursors can be used to
+ * traverse the Database from start to end or vice versa. Cursors
+ * can also be used to insert, delete or search Database items.
+ *
+ * A newly created Cursor does not point to any item in the Database.
+ *
+ * The application should close all Cursors of a Database before closing
+ * the Database.
+ *
+ * If Transactions are enabled (@ref HAM_ENABLE_TRANSACTIONS), but @a txn
+ * is NULL, then each Cursor operation (i.e. @ref ham_cursor_insert,
+ * @ref ham_cursor_find etc) will create its own, temporary Transaction
+ * <b>only</b> for the lifetime of this operation and not for the lifetime
+ * of the whole Cursor!
+ *
+ * @param db A valid Database handle
+ * @param txn A Transaction handle, or NULL
+ * @param flags Optional flags for creating the Cursor; unused, set to 0
+ * @param cursor A pointer to a pointer which is allocated for the
+ * new Cursor handle
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a db or @a cursor is NULL
+ * @return @ref HAM_OUT_OF_MEMORY if the new structure could not be allocated
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_create(ham_cursor_t **cursor, ham_db_t *db, ham_txn_t *txn,
+ uint32_t flags);
+
+/**
+ * Clones a Database Cursor
+ *
+ * Clones an existing Cursor. The new Cursor will point to
+ * exactly the same item as the old Cursor. If the old Cursor did not point
+ * to any item, so will the new Cursor.
+ *
+ * If the old Cursor is bound to a Transaction, then the new Cursor will
+ * also be bound to this Transaction.
+ *
+ * @param src The existing Cursor
+ * @param dest A pointer to a pointer, which is allocated for the
+ * cloned Cursor handle
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a src or @a dest is NULL
+ * @return @ref HAM_OUT_OF_MEMORY if the new structure could not be allocated
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_clone(ham_cursor_t *src, ham_cursor_t **dest);
+
+/**
+ * Moves the Cursor
+ *
+ * Moves the Cursor. Use the @a flags to specify the direction.
+ * After the move, key and record of the item are returned, if @a key
+ * and/or @a record are valid pointers.
+ *
+ * If the direction is not specified, the Cursor will not move. Do not
+ * specify a direction if you want to fetch the key and/or record of
+ * the current item.
+ *
+ * When specifying @ref HAM_DIRECT_ACCESS, the @a data pointer will point
+ * directly to the record that is stored in hamsterdb; the data can be modified,
+ * but the pointer must not be reallocated or freed. The flag @ref
+ * HAM_DIRECT_ACCESS is only allowed in In-Memory Databases and not if
+ * Transactions are enabled.
+ *
+ * You can read only portions of the record by specifying the flag
+ * @ref HAM_PARTIAL. In this case, hamsterdb will read
+ * <b>record->partial_size</b> bytes of the record data at offset
+ * <b>record->partial_offset</b>. If necessary, the record data will
+ * be limited to the original record size. The number of actually read
+ * bytes is returned in <b>record->partial_size</b>. The original size of
+ * the record is stored in <b>record->size</b>.
+ *
+ * @ref HAM_PARTIAL is not allowed if record->size is <= 8 or if Transactions
+ * are enabled. In such a case, @ref HAM_INV_PARAMETER is returned.
+ *
+ * If Transactions are enabled (see @ref HAM_ENABLE_TRANSACTIONS), and
+ * the Cursor moves next or previous to a key which is currently modified
+ * in an active Transaction (one that is not yet committed or aborted), then
+ * hamsterdb will skip the modified key. (This behavior is different from i.e.
+ * @a ham_cursor_find, which would return the error @ref HAM_TXN_CONFLICT).
+ *
+ * If a key has duplicates and any of the duplicates is currently modified
+ * in another active Transaction, then ALL duplicate keys are skipped when
+ * moving to the next or previous key.
+ *
+ * If the first (@ref HAM_CURSOR_FIRST) or last (@ref HAM_CURSOR_LAST) key
+ * is requested, and the current key (or any of its duplicates) is currently
+ * modified in an active Transaction, then @ref HAM_TXN_CONFLICT is
+ * returned.
+ *
+ * If this Cursor is nil (i.e. because it was not yet used or the Cursor's
+ * item was erased) then the flag @a HAM_CURSOR_NEXT (or @a
+ * HAM_CURSOR_PREVIOUS) will be identical to @a HAM_CURSOR_FIRST (or
+ * @a HAM_CURSOR_LAST).
+ *
+ * @param cursor A valid Cursor handle
+ * @param key An optional pointer to a @ref ham_key_t structure. If this
+ * pointer is not NULL, the key of the new item is returned.
+ * Note that key->data will point to temporary data. This pointer
+ * will be invalidated by subsequent hamsterdb API calls. See
+ * @ref HAM_KEY_USER_ALLOC on how to change this behaviour.
+ * @param record An optional pointer to a @ref ham_record_t structure. If this
+ * pointer is not NULL, the record of the new item is returned.
+ * Note that record->data will point to temporary data. This pointer
+ * will be invalidated by subsequent hamsterdb API calls. See
+ * @ref HAM_RECORD_USER_ALLOC on how to change this behaviour.
+ * @param flags The flags for this operation. They are used to specify
+ * the direction for the "move". If you do not specify a direction,
+ * the Cursor will remain on the current position.
+ * <ul>
+ * <li>@ref HAM_CURSOR_FIRST </li> positions the Cursor on the first
+ * item in the Database
+ * <li>@ref HAM_CURSOR_LAST </li> positions the Cursor on the last
+ * item in the Database
+ * <li>@ref HAM_CURSOR_NEXT </li> positions the Cursor on the next
+ * item in the Database; if the Cursor does not point to any
+ * item, the function behaves as if direction was
+ * @ref HAM_CURSOR_FIRST.
+ * <li>@ref HAM_CURSOR_PREVIOUS </li> positions the Cursor on the
+ * previous item in the Database; if the Cursor does not point to
+ * any item, the function behaves as if direction was
+ * @ref HAM_CURSOR_LAST.
+ * <li>@ref HAM_SKIP_DUPLICATES </li> skips duplicate keys of the
+ * current key. Not allowed in combination with
+ * @ref HAM_ONLY_DUPLICATES.
+ * <li>@ref HAM_ONLY_DUPLICATES </li> only move through duplicate keys
+ * of the current key. Not allowed in combination with
+ * @ref HAM_SKIP_DUPLICATES.
+ * <li>@ref HAM_DIRECT_ACCESS </li> Only for In-Memory Databases and
+ * not if Transactions are enabled!
+ * Returns a direct pointer to the data blob stored by the
+ * hamsterdb engine. This pointer must not be resized or freed,
+ * but the data in this memory can be modified.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a cursor is NULL, or if an invalid
+ * combination of flags was specified
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is set but record
+ * size is <= 8 or Transactions are enabled
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item, but
+ * key and/or record were requested
+ * @return @ref HAM_KEY_NOT_FOUND if @a cursor points to the first (or last)
+ * item, and a move to the previous (or next) item was
+ * requested
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS is specified,
+ * but the Database is not an In-Memory Database.
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS and
+ * @a HAM_ENABLE_TRANSACTIONS were both specified.
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is specified and
+ * record->partial_offset+record->partial_size exceeds the
+ * record->size
+ * @return @ref HAM_TXN_CONFLICT if @ref HAM_CURSOR_FIRST or @ref
+ * HAM_CURSOR_LAST is specified but the first (or last) key or
+ * any of its duplicates is currently modified in an active
+ * Transaction
+ *
+ * @sa HAM_RECORD_USER_ALLOC
+ * @sa HAM_KEY_USER_ALLOC
+ * @sa ham_record_t
+ * @sa ham_key_t
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_move(ham_cursor_t *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+/** Flag for @ref ham_cursor_move */
+#define HAM_CURSOR_FIRST 0x0001
+
+/** Flag for @ref ham_cursor_move */
+#define HAM_CURSOR_LAST 0x0002
+
+/** Flag for @ref ham_cursor_move */
+#define HAM_CURSOR_NEXT 0x0004
+
+/** Flag for @ref ham_cursor_move */
+#define HAM_CURSOR_PREVIOUS 0x0008
+
+/** Flag for @ref ham_cursor_move and @ref ham_db_get_key_count */
+#define HAM_SKIP_DUPLICATES 0x0010
+
+/** Flag for @ref ham_cursor_move */
+#define HAM_ONLY_DUPLICATES 0x0020
+
+/**
+ * Overwrites the current record
+ *
+ * This function overwrites the record of the current item.
+ *
+ * @param cursor A valid Cursor handle
+ * @param record A valid record structure
+ * @param flags Optional flags for overwriting the item; unused, set to 0
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a cursor or @a record is NULL
+ * @return @ref HAM_INV_PARAMETER if @a cursor points to an item with
+ * duplicates and duplicate sorting is enabled
+ * @return @ref HAM_INV_PARAMETER if duplicate sorting is enabled
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_overwrite(ham_cursor_t *cursor, ham_record_t *record,
+ uint32_t flags);
+
+/**
+ * Searches with a key and points the Cursor to the key found, retrieves
+ * the located record
+ *
+ * Searches for an item in the Database and points the Cursor to this item.
+ * If the item could not be found, the Cursor is not modified.
+ *
+ * Note that @ref ham_cursor_find can not search for duplicate keys. If @a key
+ * has multiple duplicates, only the first duplicate is returned.
+ *
+ * When specifying @ref HAM_DIRECT_ACCESS, the @a data pointer will point
+ * directly to the record that is stored in hamsterdb; the data can be modified,
+ * but the pointer must not be reallocated or freed. The flag @ref
+ * HAM_DIRECT_ACCESS is only allowed in In-Memory Databases and not if
+ * Transactions are enabled.
+ *
+ * You can read only portions of the record by specifying the flag
+ * @ref HAM_PARTIAL. In this case, hamsterdb will read
+ * <b>record->partial_size</b> bytes of the record data at offset
+ * <b>record->partial_offset</b>. If necessary, the record data will
+ * be limited to the original record size. The number of actually read
+ * bytes is returned in <b>record->partial_size</b>. The original size of
+ * the record is stored in <b>record->size</b>.
+ *
+ * @ref HAM_PARTIAL is not allowed if record->size is <= 8 or if Transactions
+ * are enabled. In such a case, @ref HAM_INV_PARAMETER is returned.
+ *
+ * When either or both @ref HAM_FIND_LT_MATCH and/or @ref HAM_FIND_GT_MATCH
+ * have been specified as flags, the @a key structure will be overwritten
+ * when an approximate match was found: the @a key and @a record
+ * structures will then point at the located @a key (and @a record).
+ * In this case the caller should ensure @a key points at a structure
+ * which must adhere to the same restrictions and conditions as specified
+ * for @ref ham_cursor_move(...,HAM_CURSOR_*):
+ * key->data will point to temporary data upon return. This pointer
+ * will be invalidated by subsequent hamsterdb API calls using the same
+ * Transaction (or the same Database, if Transactions are disabled). See
+ * @ref HAM_KEY_USER_ALLOC on how to change this behaviour.
+ *
+ * Further note that the @a key structure must be non-const at all times as its
+ * internal flag bits may be written to. This is done for your benefit, as
+ * you may pass the returned @a key structure to
+ * @ref ham_key_get_approximate_match_type() to retrieve additional info about
+ * the precise nature of the returned key: the sign value produced
+ * by @ref ham_key_get_approximate_match_type() tells you which kind of match
+ * (equal, less than, greater than) occurred. This is very useful to
+ * discern between the various possible successful answers produced by the
+ * combinations of @ref HAM_FIND_LT_MATCH and @ref HAM_FIND_GT_MATCH.
+ *
+ * @param cursor A valid Cursor handle
+ * @param key A pointer to a @ref ham_key_t structure. If this
+ * pointer is not NULL, the key of the new item is returned.
+ * Note that key->data will point to temporary data. This pointer
+ * will be invalidated by subsequent hamsterdb API calls. See
+ * @a HAM_KEY_USER_ALLOC on how to change this behaviour.
+ * @param record Optional pointer to a @ref ham_record_t structure. If this
+ * pointer is not NULL, the record of the new item is returned.
+ * Note that record->data will point to temporary data. This pointer
+ * will be invalidated by subsequent hamsterdb API calls. See
+ * @ref HAM_RECORD_USER_ALLOC on how to change this behaviour.
+ * @param flags Optional flags for searching, which can be combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_FIND_LT_MATCH </li> Cursor 'find' flag 'Less Than': the
+ * cursor is moved to point at the last record which' key
+ * is less than the specified key. When such a record cannot
+ * be located, an error is returned.
+ * <li>@ref HAM_FIND_GT_MATCH </li> Cursor 'find' flag 'Greater Than':
+ * the cursor is moved to point at the first record which' key is
+ * larger than the specified key. When such a record cannot be
+ * located, an error is returned.
+ * <li>@ref HAM_FIND_LEQ_MATCH </li> Cursor 'find' flag 'Less or EQual':
+ * the cursor is moved to point at the record which' key matches
+ * the specified key and when such a record is not available
+ * the cursor is moved to point at the last record which' key
+ * is less than the specified key. When such a record cannot be
+ * located, an error is returned.
+ * <li>@ref HAM_FIND_GEQ_MATCH </li> Cursor 'find' flag 'Greater or
+ * Equal': the cursor is moved to point at the record which' key
+ * matches the specified key and when such a record
+ * is not available the cursor is moved to point at the first
+ * record which' key is larger than the specified key.
+ * When such a record cannot be located, an error is returned.
+ * <li>@ref HAM_FIND_NEAR_MATCH </li> Cursor 'find' flag 'Any Near Or
+ * Equal': the cursor is moved to point at the record which'
+ * key matches the specified key and when such a record is
+ * not available the cursor is moved to point at either the
+ * last record which' key is less than the specified key or
+ * the first record which' key is larger than the specified
+ * key, whichever of these records is located first.
+ * When such records cannot be located, an error is returned.
+ * <li>@ref HAM_DIRECT_ACCESS </li> Only for In-Memory Databases and
+ * not if Transactions are enabled!
+ * Returns a direct pointer to the data blob stored by the
+ * hamsterdb engine. This pointer must not be resized or freed,
+ * but the data in this memory can be modified.
+ * </ul>
+ *
+ * <b>Remark</b>
+ * For Approximate Matching the returned match will either match the
+ * key exactly or is either the first key available above or below the
+ * given key when an exact match could not be found; 'find' does NOT
+ * spend any effort, in the sense of determining which of both is the
+ * 'nearest' to the given key, when both a key above and a key below the
+ * one given exist; 'find' will simply return the first of both found.
+ * As such, this flag is the simplest possible combination of the
+ * combined @ref HAM_FIND_LEQ_MATCH and @ref HAM_FIND_GEQ_MATCH flags.
+ *
+ * Note that these flags may be bitwise OR-ed to form functional combinations.
+ *
+ * @ref HAM_FIND_LEQ_MATCH, @ref HAM_FIND_GEQ_MATCH and
+ * @ref HAM_FIND_LT_MATCH, @ref HAM_FIND_GT_MATCH
+ *
+ * @return @ref HAM_SUCCESS upon success. Mind the remarks about the
+ * @a key flags being adjusted and the useful invocation of
+ * @ref ham_key_get_approximate_match_type() afterwards.
+ * @return @ref HAM_INV_PARAMETER if @a db, @a key or @a record is NULL
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_KEY_NOT_FOUND if no suitable @a key (record) exists
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS is specified,
+ * but the Database is not an In-Memory Database.
+ * @return @ref HAM_INV_PARAMETER if @a HAM_DIRECT_ACCESS and
+ * @a HAM_ENABLE_TRANSACTIONS were both specified.
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is set but record
+ * size is <= 8 or Transactions are enabled
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ *
+ * @sa HAM_KEY_USER_ALLOC
+ * @sa ham_key_t
+ * @sa HAM_RECORD_USER_ALLOC
+ * @sa ham_record_t
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_find(ham_cursor_t *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+/* internal flag */
+#define HAM_FIND_EXACT_MATCH 0x4000
+
+/**
+ * Cursor 'find' flag 'Less Than': return the nearest match below the
+ * given key, whether an exact match exists or not.
+ */
+#define HAM_FIND_LT_MATCH 0x1000
+
+/**
+ * Cursor 'find' flag 'Greater Than': return the nearest match above the
+ * given key, whether an exact match exists or not.
+ */
+#define HAM_FIND_GT_MATCH 0x2000
+
+/**
+ * Cursor 'find' flag 'Less or EQual': return the nearest match below the
+ * given key, when an exact match does not exist.
+ *
+ * May be combined with @ref HAM_FIND_GEQ_MATCH to accept any 'near' key, or
+ * you can use the @ref HAM_FIND_NEAR_MATCH constant as a shorthand for that.
+ */
+#define HAM_FIND_LEQ_MATCH (HAM_FIND_LT_MATCH | HAM_FIND_EXACT_MATCH)
+
+/**
+ * Cursor 'find' flag 'Greater or Equal': return the nearest match above
+ * the given key, when an exact match does not exist.
+ *
+ * May be combined with @ref HAM_FIND_LEQ_MATCH to accept any 'near' key,
+ * or you can use the @ref HAM_FIND_NEAR_MATCH constant as a shorthand for that.
+ */
+#define HAM_FIND_GEQ_MATCH (HAM_FIND_GT_MATCH | HAM_FIND_EXACT_MATCH)
+
+/**
+ * Cursor 'find' flag 'Any Near Or Equal': return a match directly below or
+ * above the given key, when an exact match does not exist.
+ *
+ * Be aware that the returned match will either match the key exactly or
+ * is either the first key available above or below the given key when an
+ * exact match could not be found; 'find' does NOT spend any effort, in the
+ * sense of determining which of both is the 'nearest' to the given key,
+ * when both a key above and a key below the one given exist; 'find' will
+ * simply return the first of both found. As such, this flag is the simplest
+ * possible combination of the combined @ref HAM_FIND_LEQ_MATCH and
+ * @ref HAM_FIND_GEQ_MATCH flags.
+ */
+#define HAM_FIND_NEAR_MATCH (HAM_FIND_LT_MATCH | HAM_FIND_GT_MATCH \
+ | HAM_FIND_EXACT_MATCH)
+
+/**
+ * Inserts a Database item and points the Cursor to the inserted item
+ *
+ * This function inserts a key/record pair as a new Database item.
+ * If the key already exists in the Database, error @ref HAM_DUPLICATE_KEY
+ * is returned.
+ *
+ * If you wish to overwrite an existing entry specify the
+ * flag @ref HAM_OVERWRITE. The use of this flag is not allowed in combination
+ * with @ref HAM_DUPLICATE.
+ *
+ * If you wish to insert a duplicate key specify the flag @ref HAM_DUPLICATE.
+ * (In order to use duplicate keys, the Database has to be created with
+ * @ref HAM_ENABLE_DUPLICATE_KEYS.)
+ * By default, the duplicate key is inserted after all other duplicate keys
+ * (see @ref HAM_DUPLICATE_INSERT_LAST). This behaviour can be overwritten by
+ * specifying @ref HAM_DUPLICATE_INSERT_FIRST, @ref HAM_DUPLICATE_INSERT_BEFORE
+ * or @ref HAM_DUPLICATE_INSERT_AFTER.
+ *
+ * You can write only portions of the record by specifying the flag
+ * @ref HAM_PARTIAL. In this case, hamsterdb will write <b>partial_size</b>
+ * bytes of the record data at offset <b>partial_offset</b>. If necessary, the
+ * record data will grow. Gaps will be filled with null-bytes, if the record
+ * did not yet exist.
+ *
+ * @ref HAM_PARTIAL is not allowed if record->size is <= 8 or if Transactions
+ * are enabled. In such a case, @ref HAM_INV_PARAMETER is returned.
+ *
+ * Specify the flag @ref HAM_HINT_APPEND if you insert sequential data
+ * and the current @a key is greater than any other key in this Database.
+ * In this case hamsterdb will optimize the insert algorithm. hamsterdb will
+ * verify that this key is the greatest; if not, it will perform a normal
+ * insert. This flag is the default for Record Number Databases.
+ *
+ * Specify the flag @ref HAM_HINT_PREPEND if you insert sequential data
+ * and the current @a key is lower than any other key in this Database.
+ * In this case hamsterdb will optimize the insert algorithm. hamsterdb will
+ * verify that this key is the lowest; if not, it will perform a normal
+ * insert.
+ *
+ * After inserting, the Cursor will point to the new item. If inserting
+ * the item failed, the Cursor is not modified.
+ *
+ * Record Number Databases (created with @ref HAM_RECORD_NUMBER32 or
+ * @ref HAM_RECORD_NUMBER64) expect either an empty @a key (with a size of
+ * 0 and data pointing to NULL), or a user-supplied key (with key.flag
+ * @ref HAM_KEY_USER_ALLOC and a valid data pointer).
+ * If key.size is 0 and key.data is NULL, hamsterdb will temporarily
+ * allocate memory for key->data, which will then point to an 4-byte (or 8-byte)
+ * unsigned integer.
+ *
+ * @param cursor A valid Cursor handle
+ * @param key A valid key structure
+ * @param record A valid record structure
+ * @param flags Optional flags for inserting the item, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_OVERWRITE. If the @a key already exists, the record is
+ * overwritten. Otherwise, the key is inserted. Not allowed in
+ * combination with @ref HAM_DUPLICATE.
+ * <li>@ref HAM_DUPLICATE. If the @a key already exists, a duplicate
+ * key is inserted. Same as @ref HAM_DUPLICATE_INSERT_LAST. Not
+ * allowed in combination with @ref HAM_DUPLICATE.
+ * <li>@ref HAM_DUPLICATE_INSERT_BEFORE. If the @a key already exists,
+ * a duplicate key is inserted before the duplicate pointed
+ * to by the Cursor. Not allowed if duplicate sorting is enabled.
+ * <li>@ref HAM_DUPLICATE_INSERT_AFTER. If the @a key already exists,
+ * a duplicate key is inserted after the duplicate pointed
+ * to by the Cursor. Not allowed if duplicate sorting is enabled.
+ * <li>@ref HAM_DUPLICATE_INSERT_FIRST. If the @a key already exists,
+ * a duplicate key is inserted as the first duplicate of
+ * the current key. Not allowed if duplicate sorting is enabled.
+ * <li>@ref HAM_DUPLICATE_INSERT_LAST. If the @a key already exists,
+ * a duplicate key is inserted as the last duplicate of
+ * the current key. Not allowed if duplicate sorting is enabled.
+ * <li>@ref HAM_HINT_APPEND. Hints the hamsterdb engine that the
+ * current key will compare as @e larger than any key already
+ * existing in the Database. The hamsterdb engine will verify
+ * this postulation and when found not to be true, will revert
+ * to a regular insert operation as if this flag was not
+ * specified. The incurred cost then is only one additional key
+ * comparison. Mutually exclusive with flag @ref HAM_HINT_PREPEND.
+ * This is the default for Record Number Databases.
+ * <li>@ref HAM_HINT_PREPEND. Hints the hamsterdb engine that the
+ * current key will compare as @e lower than any key already
+ * existing in the Database. The hamsterdb engine will verify
+ * this postulation and when found not to be true, will revert
+ * to a regular insert operation as if this flag was not
+ * specified. The incurred cost then is only one additional key
+ * comparison. Mutually exclusive with flag @ref HAM_HINT_APPEND.
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a key or @a record is NULL
+ * @return @ref HAM_INV_PARAMETER if the Database is a Record Number Database
+ * and the key is invalid (see above)
+ * @return @ref HAM_INV_PARAMETER if @ref HAM_PARTIAL is set but record
+ * size is <= 8 or Transactions are enabled
+ * @return @ref HAM_INV_PARAMETER if the flags @ref HAM_OVERWRITE <b>and</b>
+ * @ref HAM_DUPLICATE were specified, or if @ref HAM_DUPLICATE
+ * was specified, but the Database was not created with
+ * flag @ref HAM_ENABLE_DUPLICATE_KEYS.
+ * @return @ref HAM_WRITE_PROTECTED if you tried to insert a key to a read-only
+ * Database.
+ * @return @ref HAM_INV_KEY_SIZE if the key size is different from
+ * the one specified with @a HAM_PARAM_KEY_SIZE
+ * @return @ref HAM_INV_RECORD_SIZE if the record size is different from
+ * the one specified with @a HAM_PARAM_RECORD_SIZE
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_insert(ham_cursor_t *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+/**
+ * Erases the current key
+ *
+ * Erases a key from the Database. If the erase was
+ * successful, the Cursor is invalidated and does no longer point to
+ * any item. In case of an error, the Cursor is not modified.
+ *
+ * If the Database was opened with the flag @ref HAM_ENABLE_DUPLICATE_KEYS,
+ * this function erases only the duplicate item to which the Cursor refers.
+ *
+ * @param cursor A valid Cursor handle
+ * @param flags Unused, set to 0
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if @a cursor is NULL
+ * @return @ref HAM_WRITE_PROTECTED if you tried to erase a key from a read-only
+ * Database
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_erase(ham_cursor_t *cursor, uint32_t flags);
+
+/**
+ * Returns the number of duplicate keys
+ *
+ * Returns the number of duplicate keys of the item to which the
+ * Cursor currently refers.
+ * Returns 1 if the key has no duplicates.
+ *
+ * @param cursor A valid Cursor handle
+ * @param count Returns the number of duplicate keys
+ * @param flags Optional flags; unused, set to 0.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_INV_PARAMETER if @a cursor or @a count is NULL
+ * @return @ref HAM_TXN_CONFLICT if the same key was inserted in another
+ * Transaction which was not yet committed or aborted
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_get_duplicate_count(ham_cursor_t *cursor,
+ uint32_t *count, uint32_t flags);
+
+/**
+ * Returns the current cursor position in the duplicate list
+ *
+ * Returns the position in the duplicate list of the current key. The position
+ * is 0-based.
+ *
+ * @param cursor A valid Cursor handle
+ * @param position Returns the duplicate position
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_INV_PARAMETER if @a cursor or @a position is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_get_duplicate_position(ham_cursor_t *cursor,
+ uint32_t *position);
+
+/**
+ * Returns the record size of the current key
+ *
+ * Returns the record size of the item to which the Cursor currently refers.
+ *
+ * @param cursor A valid Cursor handle
+ * @param size Returns the record size, in bytes
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_INV_PARAMETER if @a cursor or @a size is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_get_record_size(ham_cursor_t *cursor, uint64_t *size);
+
+/**
+ * Closes a Database Cursor
+ *
+ * Closes a Cursor and frees allocated memory. All Cursors
+ * should be closed before closing the Database (see @ref ham_db_close).
+ *
+ * @param cursor A valid Cursor handle
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_CURSOR_IS_NIL if the Cursor does not point to an item
+ * @return @ref HAM_INV_PARAMETER if @a cursor is NULL
+ *
+ * @sa ham_db_close
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_close(ham_cursor_t *cursor);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HAM_HAMSTERDB_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.hpp b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.hpp
new file mode 100644
index 0000000000..68892ac2d7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb.hpp
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hamsterdb.hpp
+ * @author Christoph Rupp, chris@crupp.de
+ * @version 2.1.10
+ *
+ * This C++ wrapper class is a very tight wrapper around the C API. It does
+ * not attempt to be STL compatible.
+ *
+ * All functions throw exceptions of class @sa ham::error in case of an error.
+ * Please refer to the C API documentation for more information. You can find
+ * it here: http://hamsterdb.com/?page=doxygen&module=globals.html
+ *
+ */
+
+#ifndef HAM_HAMSTERDB_HPP
+#define HAM_HAMSTERDB_HPP
+
+#include <ham/hamsterdb.h>
+#include <ham/hamsterdb_int.h>
+#include <cstring>
+#include <vector>
+
+#if defined(_MSC_VER) && defined(_DEBUG) && !defined(_CRTDBG_MAP_ALLOC)
+# define _CRTDBG_MAP_ALLOC
+# include <crtdbg.h>
+#endif
+
+/**
+ * @defgroup ham_cpp hamsterdb C++ API wrapper
+ * @{
+ */
+
+/**
+ * The global hamsterdb namespace.
+ */
+namespace hamsterdb {
+
+class txn;
+class db;
+class env;
+
+/**
+ * An error class.
+ *
+ * The hamsterdb C++ API throws this class as Exceptions.
+ */
+class error {
+ public:
+ /** Constructor */
+ error(ham_status_t st)
+ : m_errno(st) {
+ };
+
+ /** Returns the error code. */
+ ham_status_t get_errno() const {
+ return (m_errno);
+ }
+
+ /** Returns an English error description. */
+ const char *get_string() const {
+ return (ham_strerror(m_errno));
+ }
+
+private:
+ ham_status_t m_errno;
+};
+
+/**
+ * A key class.
+ *
+ * This class wraps structures of type ham_key_t.
+ */
+class key {
+ public:
+ /** Constructor */
+ key(void *data = 0, uint16_t size = 0, uint32_t flags = 0) {
+ memset(&m_key, 0, sizeof(m_key));
+ m_key.data = data;
+ m_key.size = size;
+ m_key.flags = flags;
+ if (m_key.size != size) // check for overflow
+ throw error(HAM_INV_KEYSIZE);
+ }
+
+ /** Copy constructor. */
+ key(const key &other)
+ : m_key(other.m_key) {
+ }
+
+ /** Assignment operator. */
+ key &operator=(const key &other) {
+ if (&other != this)
+ m_key = other.m_key;
+ return (*this);
+ }
+
+ /** Returns the key data. */
+ void *get_data() const {
+ return (m_key.data);
+ }
+
+ /** Sets the key data. */
+ void set_data(void *data) {
+ m_key.data = data;
+ }
+
+ /** Returns the size of the key. */
+ uint16_t get_size() const {
+ return (m_key.size);
+ }
+
+ /** Sets the size of the key. */
+ void set_size(uint16_t size) {
+ m_key.size = size;
+ }
+
+ /** Template assignment */
+ template <class T>
+ void set(T &t) {
+ set_data(&t);
+ set_size(sizeof(t));
+ }
+
+ /** Returns the flags of the key. */
+ uint32_t get_flags() const {
+ return (m_key.flags);
+ }
+
+ /** Sets the flags of the key. */
+ void set_flags(uint32_t flags) {
+ m_key.flags = flags;
+ }
+
+ /** Returns a pointer to the internal ham_key_t structure. */
+ ham_key_t *get_handle() {
+ return (&m_key);
+ }
+
+ /** Returns 'sign' of Approximate Match */
+ int get_approximate_match_type() {
+ return (ham_key_get_approximate_match_type(&m_key));
+ }
+
+private:
+ ham_key_t m_key;
+};
+
+/**
+ * A record class.
+ *
+ * This class wraps structures of type ham_record_t.
+ */
+class record {
+ public:
+ /** Constructor */
+ record(void *data = 0, uint32_t size = 0, uint32_t flags = 0) {
+ memset(&m_rec, 0, sizeof(m_rec));
+ m_rec.data = data;
+ m_rec.size = size;
+ m_rec.flags = flags;
+ }
+
+ /** Copy constructor. */
+ record(const record &other)
+ : m_rec(other.m_rec) {
+ }
+
+ /** Assignment operator. */
+ record &operator=(const record &other) {
+ m_rec = other.m_rec;
+ return (*this);
+ }
+
+ /** Returns the record data. */
+ void *get_data() const {
+ return (m_rec.data);
+ }
+
+ /** Sets the record data. */
+ void set_data(void *data) {
+ m_rec.data = data;
+ }
+
+ /** Returns the size of the record. */
+ uint32_t get_size() const {
+ return (m_rec.size);
+ }
+
+ /** Sets the size of the record. */
+ void set_size(uint32_t size) {
+ m_rec.size = size;
+ }
+
+ /** Returns the flags of the record. */
+ uint32_t get_flags() const {
+ return (m_rec.flags);
+ }
+
+ /** Sets the flags of the record. */
+ void set_flags(uint32_t flags) {
+ m_rec.flags = flags;
+ }
+
+ /** Returns a pointer to the internal ham_record_t structure. */
+ ham_record_t *get_handle() {
+ return (&m_rec);
+ }
+
+ protected:
+ ham_record_t m_rec;
+};
+
+
+/**
+ * A Transaction class
+ *
+ * This class wraps structures of type ham_txn_t.
+ */
+class txn {
+ public:
+ /** Constructor */
+ txn(ham_txn_t *t = 0)
+ : m_txn(t) {
+ }
+
+ /** Abort the Transaction */
+ void abort() {
+ ham_status_t st = ham_txn_abort(m_txn, 0);
+ if (st)
+ throw error(st);
+ }
+
+ /** Commit the Transaction */
+ void commit() {
+ ham_status_t st = ham_txn_commit(m_txn, 0);
+ if (st)
+ throw error(st);
+ }
+
+ std::string get_name() {
+ const char *p = ham_txn_get_name(m_txn);
+ return (p ? p : "");
+ }
+
+ /** Returns a pointer to the internal ham_txn_t structure. */
+ ham_txn_t *get_handle() {
+ return (m_txn);
+ }
+
+ protected:
+ ham_txn_t *m_txn;
+};
+
+
+/**
+ * A Database class.
+ *
+ * This class wraps the ham_db_t Database handles.
+ */
+class db {
+ public:
+ /** Set error handler function. */
+ static void set_errhandler(ham_errhandler_fun f) {
+ ham_set_errhandler(f);
+ }
+
+ /** Retrieves the hamsterdb library version. */
+ static void get_version(uint32_t *major, uint32_t *minor,
+ uint32_t *revision) {
+ ham_get_version(major, minor, revision);
+ }
+
+ /** Constructor */
+ db()
+ : m_db(0) {
+ }
+
+ /** Destructor - automatically closes the Database, if necessary. */
+ ~db() {
+ close();
+ }
+
+ /**
+ * Assignment operator.
+ *
+ * <b>Important!</b> This operator transfers the ownership of the
+ * Database handle.
+ */
+ db &operator=(const db &other) {
+ db &rhs = (db &)other;
+ if (this == &other)
+ return (*this);
+ close();
+ m_db = rhs.m_db;
+ rhs.m_db = 0;
+ return (*this);
+ }
+
+ /** Returns the last Database error. */
+ ham_status_t get_error() {
+ return (ham_db_get_error(m_db));
+ }
+
+ /** Sets the comparison function. */
+ void set_compare_func(ham_compare_func_t foo) {
+ ham_status_t st = ham_db_set_compare_func(m_db, foo);
+ if (st)
+ throw error(st);
+ }
+
+ /** Finds a record by looking up the key. */
+ record find(txn *t, key *k, uint32_t flags = 0) {
+ record r;
+ ham_status_t st = ham_db_find(m_db,
+ t ? t->get_handle() : 0,
+ k ? k->get_handle() : 0,
+ r.get_handle(), flags);
+ if (st)
+ throw error(st);
+ return (r);
+ }
+
+ /** Finds a record by looking up the key. */
+ record &find(txn *t, key *k, record *r, uint32_t flags = 0) {
+ ham_status_t st = ham_db_find(m_db,
+ t ? t->get_handle() : 0,
+ k ? k->get_handle() : 0,
+ r->get_handle(), flags);
+ if (st)
+ throw error(st);
+ return (*r);
+ }
+
+ /** Finds a record by looking up the key. */
+ record find(key *k, uint32_t flags = 0) {
+ return (find(0, k, flags));
+ }
+
+ /** Inserts a key/record pair. */
+ void insert(txn *t, key *k, record *r, uint32_t flags = 0) {
+ ham_status_t st = ham_db_insert(m_db,
+ t ? t->get_handle() : 0,
+ k ? k->get_handle() : 0,
+ r ? r->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Inserts a key/record pair. */
+ void insert(key *k, record *r, uint32_t flags=0) {
+ insert(0, k, r, flags);
+ }
+
+ /** Erases a key/record pair. */
+ void erase(key *k, uint32_t flags = 0) {
+ erase(0, k, flags);
+ }
+
+ /** Erases a key/record pair. */
+ void erase(txn *t, key *k, uint32_t flags = 0) {
+ ham_status_t st = ham_db_erase(m_db,
+ t ? t->get_handle() : 0,
+ k ? k->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Returns number of items in the Database. */
+ uint64_t get_key_count(ham_txn_t *txn = 0, uint32_t flags = 0) {
+ uint64_t count = 0;
+ ham_status_t st = ham_db_get_key_count(m_db, txn, flags, &count);
+ if (st)
+ throw error(st);
+ return (count);
+ }
+
+ /** Retrieves Database parameters. */
+ void get_parameters(ham_parameter_t *param) {
+ ham_status_t st = ham_db_get_parameters(m_db, param);
+ if (st)
+ throw error(st);
+ }
+
+ /** Closes the Database. */
+ void close(uint32_t flags = 0) {
+ if (!m_db)
+ return;
+ // disable auto-cleanup; all objects will be destroyed when
+ // going out of scope
+ flags &= ~HAM_AUTO_CLEANUP;
+ ham_status_t st = ham_db_close(m_db, flags);
+ if (st)
+ throw error(st);
+ m_db = 0;
+ }
+
+ /** Returns a pointer to the internal ham_db_t structure. */
+ ham_db_t *get_handle() {
+ return (m_db);
+ }
+
+protected:
+ friend class env;
+
+ /* Copy Constructor. Is protected and should not be used. */
+ db(ham_db_t *db)
+ : m_db(db) {
+ }
+
+ private:
+ ham_db_t *m_db;
+};
+
+
+/**
+ * A Database Cursor.
+ *
+ * This class wraps the ham_cursor_t Cursor handles.
+ */
+class cursor {
+ public:
+ /** Constructor */
+ cursor(db *db = 0, txn *t = 0, uint32_t flags = 0)
+ : m_cursor(0) {
+ create(db, t, flags);
+ }
+
+ /** Constructor */
+ cursor(txn *t, db *db = 0, uint32_t flags = 0)
+ : m_cursor(0) {
+ create(db, t, flags);
+ }
+
+ /** Destructor - automatically closes the Cursor, if necessary. */
+ ~cursor() {
+ close();
+ }
+
+ /** Creates a new Cursor. */
+ void create(db *db, txn *t = 0, uint32_t flags = 0) {
+ if (m_cursor)
+ close();
+ if (db) {
+ ham_status_t st = ham_cursor_create(&m_cursor, db->get_handle(),
+ t ? t->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+ }
+
+ /** Clones the Cursor. */
+ cursor clone() {
+ ham_cursor_t *dest;
+ ham_status_t st = ham_cursor_clone(m_cursor, &dest);
+ if (st)
+ throw error(st);
+ return (cursor(dest));
+ }
+
+ /** Moves the Cursor, and retrieves the key/record of the new position. */
+ void move(key *k, record *r, uint32_t flags = 0) {
+ ham_status_t st = ham_cursor_move(m_cursor, k ? k->get_handle() : 0,
+ r ? r->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Moves the Cursor to the first Database element. */
+ void move_first(key *k = 0, record *r = 0) {
+ move(k, r, HAM_CURSOR_FIRST);
+ }
+
+ /** Moves the Cursor to the last Database element. */
+ void move_last(key *k = 0, record *r = 0) {
+ move(k, r, HAM_CURSOR_LAST);
+ }
+
+ /** Moves the Cursor to the next Database element. */
+ void move_next(key *k = 0, record *r = 0) {
+ move(k, r, HAM_CURSOR_NEXT);
+ }
+
+ /** Moves the Cursor to the previous Database element. */
+ void move_previous(key *k = 0, record *r = 0) {
+ move(k, r, HAM_CURSOR_PREVIOUS);
+ }
+
+ /** Overwrites the current record. */
+ void overwrite(record *r, uint32_t flags = 0) {
+ ham_status_t st = ham_cursor_overwrite(m_cursor,
+ r ? r->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Finds a key. */
+ void find(key *k, record *r = 0, uint32_t flags = 0) {
+ ham_status_t st = ham_cursor_find(m_cursor, k->get_handle(),
+ (r ? r->get_handle() : 0), flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Inserts a key/record pair. */
+ void insert(key *k, record *r, uint32_t flags = 0) {
+ ham_status_t st = ham_cursor_insert(m_cursor, k ? k->get_handle() : 0,
+ r ? r->get_handle() : 0, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Erases the current key/record pair. */
+ void erase(uint32_t flags = 0) {
+ ham_status_t st = ham_cursor_erase(m_cursor, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Returns the number of duplicate keys. */
+ uint32_t get_duplicate_count(uint32_t flags = 0) {
+ uint32_t c;
+ ham_status_t st = ham_cursor_get_duplicate_count(m_cursor, &c, flags);
+ if (st)
+ throw error(st);
+ return (c);
+ }
+
+ /** Returns the size of the current record. */
+ uint64_t get_record_size() {
+ uint64_t s;
+ ham_status_t st = ham_cursor_get_record_size(m_cursor, &s);
+ if (st)
+ throw error(st);
+ return (s);
+ }
+
+ /** Closes the Cursor. */
+ void close() {
+ if (!m_cursor)
+ return;
+ ham_status_t st = ham_cursor_close(m_cursor);
+ if (st)
+ throw error(st);
+ m_cursor = 0;
+ }
+
+ protected:
+ /* Copy Constructor. Is protected and should not be used. */
+ cursor(ham_cursor_t *c) {
+ m_cursor = c;
+ }
+
+ private:
+ ham_cursor_t *m_cursor;
+};
+
+/**
+ * An Environment class.
+ *
+ * This class wraps the ham_env_t structure.
+ */
+class env {
+ public:
+ /** Constructor */
+ env()
+ : m_env(0) {
+ }
+
+ /** Destructor - automatically closes the Cursor, if necessary. */
+ ~env() {
+ close();
+ }
+
+ /** Creates a new Environment. */
+ void create(const char *filename, uint32_t flags = 0,
+ uint32_t mode = 0644, const ham_parameter_t *param = 0) {
+ ham_status_t st = ham_env_create(&m_env, filename, flags, mode, param);
+ if (st)
+ throw error(st);
+ }
+
+ /** Opens an existing Environment. */
+ void open(const char *filename, uint32_t flags = 0,
+ const ham_parameter_t *param = 0) {
+ ham_status_t st = ham_env_open(&m_env, filename, flags, param);
+ if (st)
+ throw error(st);
+ }
+
+ /** Flushes the Environment to disk. */
+ void flush(uint32_t flags = 0) {
+ ham_status_t st = ham_env_flush(m_env, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Creates a new Database in the Environment. */
+ db create_db(uint16_t name, uint32_t flags = 0,
+ const ham_parameter_t *param = 0) {
+ ham_db_t *dbh;
+
+ ham_status_t st = ham_env_create_db(m_env, &dbh, name, flags, param);
+ if (st)
+ throw error(st);
+
+ return (hamsterdb::db(dbh));
+ }
+
+ /** Opens an existing Database in the Environment. */
+ db open_db(uint16_t name, uint32_t flags = 0,
+ const ham_parameter_t *param = 0) {
+ ham_db_t *dbh;
+
+ ham_status_t st = ham_env_open_db(m_env, &dbh, name, flags, param);
+ if (st)
+ throw error(st);
+
+ return (hamsterdb::db(dbh));
+ }
+
+ /** Renames an existing Database in the Environment. */
+ void rename_db(uint16_t oldname, uint16_t newname, uint32_t flags = 0) {
+ ham_status_t st = ham_env_rename_db(m_env, oldname, newname, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Deletes a Database from the Environment. */
+ void erase_db(uint16_t name, uint32_t flags = 0) {
+ ham_status_t st = ham_env_erase_db(m_env, name, flags);
+ if (st)
+ throw error(st);
+ }
+
+ /** Begin a new Transaction */
+ txn begin(const char *name = 0) {
+ ham_txn_t *h;
+ ham_status_t st = ham_txn_begin(&h, m_env, name, 0, 0);
+ if (st)
+ throw error(st);
+ return (txn(h));
+ }
+
+
+ /** Closes the Environment. */
+ void close(uint32_t flags = 0) {
+ if (!m_env)
+ return;
+ // disable auto-cleanup; all objects will be destroyed when
+ // going out of scope
+ flags &= ~HAM_AUTO_CLEANUP;
+ ham_status_t st = ham_env_close(m_env, flags);
+ if (st)
+ throw error(st);
+ m_env = 0;
+ }
+
+ /** Retrieves Environment parameters. */
+ void get_parameters(ham_parameter_t *param) {
+ ham_status_t st = ham_env_get_parameters(m_env, param);
+ if (st)
+ throw error(st);
+ }
+
+ /** Get all Database names. */
+ std::vector<uint16_t> get_database_names() {
+ uint32_t count = 32;
+ ham_status_t st;
+ std::vector<uint16_t> v(count);
+
+ for (;;) {
+ st = ham_env_get_database_names(m_env, &v[0], &count);
+ if (!st)
+ break;
+ if (st && st!=HAM_LIMITS_REACHED)
+ throw error(st);
+ count += 16;
+ v.resize(count);
+ }
+
+ v.resize(count);
+ return (v);
+ }
+
+ private:
+ ham_env_t *m_env;
+};
+
+} // namespace hamsterdb
+
+/**
+ * @}
+ */
+
+#endif // HAMSTERDB_HPP
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_int.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_int.h
new file mode 100644
index 0000000000..ec05ece264
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_int.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hamsterdb_int.h
+ * @brief Internal hamsterdb Embedded Storage functions.
+ * @author Christoph Rupp, chris@crupp.de
+ *
+ * Please be aware that the interfaces in this file are mostly for internal
+ * use. Unlike those in hamsterdb.h they are not stable and can be changed
+ * with every new version.
+ *
+ */
+
+#ifndef HAM_HAMSTERDB_INT_H
+#define HAM_HAMSTERDB_INT_H
+
+#include <ham/hamsterdb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup ham_extended_api hamsterdb Enhanced API
+ * @{
+ */
+
+/** get the (non-persisted) flags of a key */
+#define ham_key_get_intflags(key) (key)->_flags
+
+/**
+ * set the flags of a key
+ *
+ * Note that the ham_find/ham_cursor_find/ham_cursor_find_ex flags must
+ * be defined such that those can peacefully co-exist with these; that's
+ * why those public flags start at the value 0x1000 (4096).
+ */
+#define ham_key_set_intflags(key, f) (key)->_flags=(f)
+
+/**
+ * Verifies the integrity of the Database
+ *
+ * This function is only interesting if you want to debug hamsterdb.
+ *
+ * @param db A valid Database handle
+ * @param flags Optional flags for the integrity check, combined with
+ * bitwise OR. Possible flags are:
+ * <ul>
+ * <li>@ref HAM_PRINT_GRAPH</li> Prints the Btree as a graph; stores
+ * the image as "graph.png" in the current working directory. It uses
+ * the "dot" tool from graphviz to generate the image.
+ * This functionality is only available in DEBUG builds!
+ * </ul>
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INTEGRITY_VIOLATED if the Database is broken
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_check_integrity(ham_db_t *db, uint32_t flags);
+
+/** Flag for ham_db_check_integrity */
+#define HAM_PRINT_GRAPH 1
+
+/**
+ * Set a user-provided context pointer
+ *
+ * This function sets a user-provided context pointer. This can be any
+ * arbitrary pointer; it is stored in the Database handle and can be
+ * retrieved with @a ham_get_context_data. It is mainly used by Wrappers
+ * and language bindings.
+ *
+ * @param db A valid Database handle
+ * @param data The pointer to the context data
+ */
+HAM_EXPORT void HAM_CALLCONV
+ham_set_context_data(ham_db_t *db, void *data);
+
+/**
+ * Retrieves a user-provided context pointer
+ *
+ * This function retrieves a user-provided context pointer. This can be any
+ * arbitrary pointer which was previously stored with @a ham_set_context_data.
+ *
+ * @param db A valid Database handle
+ * @param dont_lock Whether the Environment mutex should be locked or not
+ * this is used to avoid recursive locks when retrieving the context
+ * data in a compare function
+ *
+ * @return The pointer to the context data
+ */
+HAM_EXPORT void * HAM_CALLCONV
+ham_get_context_data(ham_db_t *db, ham_bool_t dont_lock);
+
+/**
+ * Retrieves the Database handle of a Cursor
+ *
+ * @param cursor A valid Cursor handle
+ *
+ * @return @a The Database handle of @a cursor
+ */
+HAM_EXPORT ham_db_t * HAM_CALLCONV
+ham_cursor_get_database(ham_cursor_t *cursor);
+
+typedef struct min_max_avg_u32_t {
+ uint32_t min;
+ uint32_t max;
+ uint32_t avg;
+ uint32_t _total; /* for calculating the average */
+ uint32_t _instances; /* for calculating the average */
+} min_max_avg_u32_t;
+
+/* btree metrics */
+typedef struct btree_metrics_t {
+ /* the database name of the btree */
+ uint16_t database_name;
+
+ /* number of pages */
+ uint64_t number_of_pages;
+
+ /* number of keys */
+ uint64_t number_of_keys;
+
+ /* total btree space, including overhead */
+ uint64_t total_btree_space;
+
+ /* static overhead per page */
+ uint32_t overhead_per_page;
+
+ /* number of keys stored per page (w/o duplicates) */
+ min_max_avg_u32_t keys_per_page;
+
+ /* payload storage assigned to the KeyLists */
+ min_max_avg_u32_t keylist_ranges;
+
+ /* payload storage assigned to the RecordLists */
+ min_max_avg_u32_t recordlist_ranges;
+
+ /* storage assigned to the Indices (if available) */
+ min_max_avg_u32_t keylist_index;
+
+ /* storage assigned to the Indices (if available) */
+ min_max_avg_u32_t recordlist_index;
+
+ /* unused storage (i.e. gaps between pages, underfilled blocks etc) */
+ min_max_avg_u32_t keylist_unused;
+
+ /* unused storage (i.e. gaps between pages, underfilled blocks etc) */
+ min_max_avg_u32_t recordlist_unused;
+
+ /* number of blocks per page (if available) */
+ min_max_avg_u32_t keylist_blocks_per_page;
+
+ /* block sizes (if available) */
+ min_max_avg_u32_t keylist_block_sizes;
+} btree_metrics_t;
+
+/**
+ * Retrieves collected metrics from the hamsterdb Environment. Used mainly
+ * for testing.
+ * See below for the structure with the currently available metrics.
+ * This structure will change a lot; the first field is a version indicator
+ * that applications can use to verify that the structure layout is compatible.
+ *
+ * These metrics are NOT persisted to disk.
+ *
+ * Metrics marked "global" are stored globally and shared between multiple
+ * Environments.
+ */
+#define HAM_METRICS_VERSION 9
+
+typedef struct ham_env_metrics_t {
+ /* the version indicator - must be HAM_METRICS_VERSION */
+ uint16_t version;
+
+ /* number of total allocations for the whole lifetime of the process */
+ uint64_t mem_total_allocations;
+
+ /* currently active allocations for the whole process */
+ uint64_t mem_current_allocations;
+
+ /* current amount of memory allocated and tracked by the process
+ * (excludes memory used by the kernel or not allocated with
+ * malloc/free) */
+ uint64_t mem_current_usage;
+
+ /* peak usage of memory (for the whole process) */
+ uint64_t mem_peak_usage;
+
+ /* the heap size of this process */
+ uint64_t mem_heap_size;
+
+ /* amount of pages fetched from disk */
+ uint64_t page_count_fetched;
+
+ /* amount of pages written to disk */
+ uint64_t page_count_flushed;
+
+ /* number of index pages in this Environment */
+ uint64_t page_count_type_index;
+
+ /* number of blob pages in this Environment */
+ uint64_t page_count_type_blob;
+
+ /* number of page-manager pages in this Environment */
+ uint64_t page_count_type_page_manager;
+
+ /* number of successful freelist hits */
+ uint64_t freelist_hits;
+
+ /* number of freelist misses */
+ uint64_t freelist_misses;
+
+ /* number of successful cache hits */
+ uint64_t cache_hits;
+
+ /* number of cache misses */
+ uint64_t cache_misses;
+
+ /* number of blobs allocated */
+ uint64_t blob_total_allocated;
+
+ /* number of blobs read */
+ uint64_t blob_total_read;
+
+ /* (global) number of btree page splits */
+ uint64_t btree_smo_split;
+
+ /* (global) number of btree page merges */
+ uint64_t btree_smo_merge;
+
+ /* (global) number of extended keys */
+ uint64_t extended_keys;
+
+ /* (global) number of extended duplicate tables */
+ uint64_t extended_duptables;
+
+ /* number of bytes that the log/journal flushes to disk */
+ uint64_t journal_bytes_flushed;
+
+ /* PRO: log/journal bytes before compression */
+ uint64_t journal_bytes_before_compression;
+
+ /* PRO: log/journal bytes after compression */
+ uint64_t journal_bytes_after_compression;
+
+ /* PRO: record bytes before compression */
+ uint64_t record_bytes_before_compression;
+
+ /* PRO: record bytes after compression */
+ uint64_t record_bytes_after_compression;
+
+ /* PRO: key bytes before compression */
+ uint64_t key_bytes_before_compression;
+
+ /* PRO: key bytes after compression */
+ uint64_t key_bytes_after_compression;
+
+ /* PRO: set to the max. SIMD lane width (0 if SIMD is not available) */
+ int simd_lane_width;
+
+ /* btree metrics for leaf nodes */
+ btree_metrics_t btree_leaf_metrics;
+
+ /* btree metrics for internal nodes */
+ btree_metrics_t btree_internal_metrics;
+
+} ham_env_metrics_t;
+
+/**
+ * Retrieves the current metrics from an Environment
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_get_metrics(ham_env_t *env, ham_env_metrics_t *metrics);
+
+/**
+ * Returns @ref HAM_TRUE if this hamsterdb library was compiled with debug
+ * diagnostics, checks and asserts
+ */
+HAM_EXPORT ham_bool_t HAM_CALLCONV
+ham_is_debug();
+
+/**
+ * Returns @ref HAM_TRUE if this hamsterdb library is the commercial
+ * closed-source "hamsterdb pro" edition
+ */
+HAM_EXPORT ham_bool_t HAM_CALLCONV
+ham_is_pro();
+
+/**
+ * Returns the end time of the evaluation period, if this is an evaluation
+ * license of the commercial closed-source "hamsterdb pro";
+ * returns 0 otherwise
+ */
+HAM_EXPORT uint32_t HAM_CALLCONV
+ham_is_pro_evaluation();
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HAM_HAMSTERDB_INT_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_ola.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_ola.h
new file mode 100644
index 0000000000..f65b98b8b1
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_ola.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file hamsterdb_hola.h
+ * @brief Include file for hamsterdb OnLine Analytical functions
+ * @author Christoph Rupp, chris@crupp.de
+ * @version 2.1.10
+ *
+ * This API is EXPERIMENTAL!! The interface is not yet stable.
+ */
+
+#ifndef HAM_HAMSTERDB_OLA_H
+#define HAM_HAMSTERDB_OLA_H
+
+#include <ham/hamsterdb.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * A predicate function with context parameters returning a bool value.
+ *
+ * The predicate function is applied to various analytical functions
+ * of this API and is generally used to select keys where a predicate applies.
+ */
+typedef struct {
+ /** A function pointer; receives a key, returns a bool */
+ ham_bool_t (*predicate_func)(const void *key_data, uint16_t key_size,
+ void *context);
+
+ /** User-supplied context data */
+ void *context;
+
+} hola_bool_predicate_t;
+
+
+/**
+ * A structure which returns the result of an operation.
+ *
+ * For now, the result is either a @a uint64_t counter or a @a double value.
+ * The @a type parameter specifies which one is used; @a type's value is
+ * one of @a HAM_TYPE_UINT64 or @a HAM_TYPE_REAL64.
+ */
+typedef struct {
+ union {
+ /** The result as a 64bit unsigned integer */
+ uint64_t result_u64;
+
+ /** The result as a 64bit real */
+ double result_double;
+ } u;
+
+ /** The actual type in the union - one of the @a HAM_TYPE_* macros */
+ int type;
+
+} hola_result_t;
+
+
+/**
+ * Counts the keys in a Database
+ *
+ * This is a non-distinct count. If the Database has duplicate keys then
+ * they are included in the count.
+ *
+ * The actual count is returned in @a result->u.result_u64. @a result->type
+ * is set to @a HAM_TYPE_U64.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_count(ham_db_t *db, ham_txn_t *txn, hola_result_t *result);
+
+/**
+ * Selectively counts the keys in a Database
+ *
+ * This is a non-distinct count. If the Database has duplicate keys then
+ * they are included in the count. The predicate function is applied to
+ * each key. If it returns true then the key (and its duplicates) is included
+ * in the count; otherwise the key is ignored.
+ *
+ * The actual count is returned in @a result->u.result_u64. @a result->type
+ * is set to @a HAM_TYPE_U64.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_count_if(ham_db_t *db, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result);
+
+/**
+ * Counts the distinct keys in a Database
+ *
+ * This is a distinct count. If the Database has duplicate keys then
+ * they are not included in the count.
+ *
+ * The actual count is returned in @a result->u.result_u64. @a result->type
+ * is set to @a HAM_TYPE_U64.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_count_distinct(ham_db_t *db, ham_txn_t *txn, hola_result_t *result);
+
+/**
+ * Selectively counts the distinct keys in a Database
+ *
+ * This is a distinct count. If the Database has duplicate keys then
+ * they are not included in the count. The predicate function is applied to
+ * each key. If it returns true then the key is included in the count;
+ * otherwise the key is ignored.
+ *
+ * The actual count is returned in @a result->u.result_u64. @a result->type
+ * is set to @a HAM_TYPE_U64.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_count_distinct_if(ham_db_t *db, ham_txn_t *txn,
+ hola_bool_predicate_t *pred, hola_result_t *result);
+
+/**
+ * Calculates the average of all keys.
+ *
+ * This is a non-distinct function and includes all duplicate keys.
+ *
+ * Internally, a 64bit counter is used for the calculation. This function
+ * does not protect against an overflow of this counter.
+ *
+ * The keys in the database (@a db) have to be numeric, which means that
+ * the Database's type must be one of @a HAM_TYPE_UINT8, @a HAM_TYPE_UINT16,
+ * HAM_TYPE_UINT32, @a HAM_TYPE_UINT64, @a HAM_TYPE_REAL32 or
+ * @a HAM_TYPE_REAL64.
+ *
+ * The actual result is returned in @a result->u.result_u64 or
+ * @a result->u.result_double, depending on the Database's configuration.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ * @return @ref HAM_INV_PARAMETER if the database is not numeric
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_average(ham_db_t *db, ham_txn_t *txn, hola_result_t *result);
+
+/**
+ * Calculates the average of all keys where a predicate applies.
+ *
+ * This is a non-distinct function and includes all duplicate keys for which
+ * the predicate function returns true.
+ *
+ * Internally, a 64bit counter is used for the calculation. This function
+ * does not protect against an overflow of this counter.
+ *
+ * The keys in the database (@a db) have to be numeric, which means that
+ * the Database's type must be one of @a HAM_TYPE_UINT8, @a HAM_TYPE_UINT16,
+ * HAM_TYPE_UINT32, @a HAM_TYPE_UINT64, @a HAM_TYPE_REAL32 or
+ * @a HAM_TYPE_REAL64.
+ *
+ * The actual result is returned in @a result->u.result_u64 or
+ * @a result->u.result_double, depending on the Database's configuration.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ * @return @ref HAM_INV_PARAMETER if the database is not numeric
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_average_if(ham_db_t *db, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result);
+
+/**
+ * Calculates the sum of all keys.
+ *
+ * This is a non-distinct function and includes all duplicate keys.
+ *
+ * Internally, a 64bit counter is used for the calculation. This function
+ * does not protect against an overflow of this counter.
+ *
+ * The keys in the database (@a db) have to be numeric, which means that
+ * the Database's type must be one of @a HAM_TYPE_UINT8, @a HAM_TYPE_UINT16,
+ * HAM_TYPE_UINT32, @a HAM_TYPE_UINT64, @a HAM_TYPE_REAL32 or
+ * @a HAM_TYPE_REAL64.
+ *
+ * The actual result is returned in @a result->u.result_u64 or
+ * @a result->u.result_double, depending on the Database's configuration.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ * @return @ref HAM_INV_PARAMETER if the database is not numeric
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_sum(ham_db_t *db, ham_txn_t *txn, hola_result_t *result);
+
+/**
+ * Calculates the sum of all keys where a predicate applies.
+ *
+ * This is a non-distinct function and includes all duplicate keys for which
+ * the predicate function returns true.
+ *
+ * Internally, a 64bit counter is used for the calculation. This function
+ * does not protect against an overflow of this counter.
+ *
+ * The keys in the database (@a db) have to be numeric, which means that
+ * the Database's type must be one of @a HAM_TYPE_UINT8, @a HAM_TYPE_UINT16,
+ * HAM_TYPE_UINT32, @a HAM_TYPE_UINT64, @a HAM_TYPE_REAL32 or
+ * @a HAM_TYPE_REAL64.
+ *
+ * The actual result is returned in @a result->u.result_u64 or
+ * @a result->u.result_double, depending on the Database's configuration.
+ *
+ * @return @ref HAM_SUCCESS upon success
+ * @return @ref HAM_INV_PARAMETER if one of the parameters is NULL
+ * @return @ref HAM_INV_PARAMETER if the database is not numeric
+ */
+HAM_EXPORT ham_status_t HAM_CALLCONV
+hola_sum_if(ham_db_t *db, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HAM_HAMSTERDB_OLA_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_srv.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_srv.h
new file mode 100644
index 0000000000..83ffef8f2e
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/hamsterdb_srv.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HAM_HAMSTERDB_SRV_H
+#define HAM_HAMSTERDB_SRV_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <ham/hamsterdb.h>
+
+/**
+ * @defgroup ham_server hamsterdb Embedded Server
+ * @{
+ */
+
+/**
+ * A configuration structure
+ *
+ * It is always recommended to initialize the full structure with zeroes
+ * before using it.
+ */
+typedef struct {
+ /** The server port */
+ uint16_t port;
+
+ /* Path of the access log, or NULL if no log should be written
+ * - currently NOT USED! */
+ const char *access_log_path;
+
+ /** Path of the error log, or NULL if no log should be written
+ * - currently NOT USED! */
+ const char *error_log_path;
+
+} ham_srv_config_t;
+
+/**
+ * A server handle
+ */
+struct ham_srv_t;
+typedef struct ham_srv_t ham_srv_t;
+
+/**
+ * Initialize the server
+ *
+ * This function initializes a ham_srv_t handle and starts the hamsterdb
+ * database server on the port specified in the configuration object.
+ *
+ * @param config A configuration structure
+ * @param srv A pointer to a ham_srv_t pointer; will be allocated
+ * if this function returns successfully
+ *
+ * @return HAM_SUCCESS on success
+ * @return HAM_OUT_OF_MEMORY if memory could not be allocated
+ */
+extern ham_status_t
+ham_srv_init(ham_srv_config_t *config, ham_srv_t **srv);
+
+/**
+ * Add a hamsterdb Environment
+ *
+ * This function adds a new hamsterdb Environment to the server. The
+ * Environment has to be initialized properly by the caller. It will be
+ * served at ham://localhost:port/urlname, where @a port was specified
+ * for @ref ham_srv_init and @a urlname is the third parameter to this
+ * function.
+ *
+ * A client accessing this Environment will specify this URL as a filename,
+ * and hamsterdb will transparently connect to this server.
+ *
+ * @param srv A valid ham_srv_t handle
+ * @param env A valid hamsterdb Environment handle
+ * @param urlname URL of this Environment
+ *
+ * @return HAM_SUCCESS on success
+ * @return HAM_LIMITS_REACHED if more than the max. number of Environments
+ * were added (default limit: 128)
+ */
+extern ham_status_t
+ham_srv_add_env(ham_srv_t *srv, ham_env_t *env, const char *urlname);
+
+/*
+ * Release memory and clean up
+ *
+ * @param srv A valid ham_srv_t handle
+ *
+ * @warning
+ * This function will not close open handles (i.e. of Databases, Cursors
+ * or Transactions). The caller has to close the remaining Environment
+ * handles (@see ham_env_close).
+ */
+extern void
+ham_srv_close(ham_srv_t *srv);
+
+/**
+ * @}
+ */
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HAM_HAMSTERDB_SRV_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/msstdint.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/msstdint.h
new file mode 100644
index 0000000000..4fe0ef9a9b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/msstdint.h
@@ -0,0 +1,259 @@
+// ISO C9x compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
+// Copyright (c) 2006-2013 Alexander Chemeris
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the product nor the names of its contributors may
+// be used to endorse or promote products derived from this software
+// without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#if _MSC_VER >= 1600 // [
+#include <stdint.h>
+#else // ] _MSC_VER >= 1600 [
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+// error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+# include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+# define _W64 __w64
+# else
+# define _W64
+# endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+ typedef signed char int8_t;
+ typedef signed short int16_t;
+ typedef signed int int32_t;
+ typedef unsigned char uint8_t;
+ typedef unsigned short uint16_t;
+ typedef unsigned int uint32_t;
+#else
+ typedef signed __int8 int8_t;
+ typedef signed __int16 int16_t;
+ typedef signed __int32 int32_t;
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int16 uint16_t;
+ typedef unsigned __int32 uint32_t;
+#endif
+typedef signed __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t int_least8_t;
+typedef int16_t int_least16_t;
+typedef int32_t int_least32_t;
+typedef int64_t int_least64_t;
+typedef uint8_t uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t int_fast8_t;
+typedef int16_t int_fast16_t;
+typedef int32_t int_fast32_t;
+typedef int64_t int_fast64_t;
+typedef uint8_t uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+ typedef signed __int64 intptr_t;
+ typedef unsigned __int64 uintptr_t;
+#else // _WIN64 ][
+ typedef _W64 signed int intptr_t;
+ typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t intmax_t;
+typedef uint64_t uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN ((int8_t)_I8_MIN)
+#define INT8_MAX _I8_MAX
+#define INT16_MIN ((int16_t)_I16_MIN)
+#define INT16_MAX _I16_MAX
+#define INT32_MIN ((int32_t)_I32_MIN)
+#define INT32_MAX _I32_MAX
+#define INT64_MIN ((int64_t)_I64_MIN)
+#define INT64_MAX _I64_MAX
+#define UINT8_MAX _UI8_MAX
+#define UINT16_MAX _UI16_MAX
+#define UINT32_MAX _UI32_MAX
+#define UINT64_MAX _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN INT8_MIN
+#define INT_LEAST8_MAX INT8_MAX
+#define INT_LEAST16_MIN INT16_MIN
+#define INT_LEAST16_MAX INT16_MAX
+#define INT_LEAST32_MIN INT32_MIN
+#define INT_LEAST32_MAX INT32_MAX
+#define INT_LEAST64_MIN INT64_MIN
+#define INT_LEAST64_MAX INT64_MAX
+#define UINT_LEAST8_MAX UINT8_MAX
+#define UINT_LEAST16_MAX UINT16_MAX
+#define UINT_LEAST32_MAX UINT32_MAX
+#define UINT_LEAST64_MAX UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN INT8_MIN
+#define INT_FAST8_MAX INT8_MAX
+#define INT_FAST16_MIN INT16_MIN
+#define INT_FAST16_MAX INT16_MAX
+#define INT_FAST32_MIN INT32_MIN
+#define INT_FAST32_MAX INT32_MAX
+#define INT_FAST64_MIN INT64_MIN
+#define INT_FAST64_MAX INT64_MAX
+#define UINT_FAST8_MAX UINT8_MAX
+#define UINT_FAST16_MAX UINT16_MAX
+#define UINT_FAST32_MAX UINT32_MAX
+#define UINT_FAST64_MAX UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+# define INTPTR_MIN INT64_MIN
+# define INTPTR_MAX INT64_MAX
+# define UINTPTR_MAX UINT64_MAX
+#else // _WIN64 ][
+# define INTPTR_MIN INT32_MIN
+# define INTPTR_MAX INT32_MAX
+# define UINTPTR_MAX UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN INT64_MIN
+#define INTMAX_MAX INT64_MAX
+#define UINTMAX_MAX UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+# define PTRDIFF_MIN _I64_MIN
+# define PTRDIFF_MAX _I64_MAX
+#else // _WIN64 ][
+# define PTRDIFF_MIN _I32_MIN
+# define PTRDIFF_MAX _I32_MAX
+#endif // _WIN64 ]
+
+#define SIG_ATOMIC_MIN INT_MIN
+#define SIG_ATOMIC_MAX INT_MAX
+
+#ifndef SIZE_MAX // [
+# ifdef _WIN64 // [
+# define SIZE_MAX _UI64_MAX
+# else // _WIN64 ][
+# define SIZE_MAX _UI32_MAX
+# endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+# define WCHAR_MIN 0
+#endif // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+# define WCHAR_MAX _UI16_MAX
+#endif // WCHAR_MAX ]
+
+#define WINT_MIN 0
+#define WINT_MAX _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val) val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val) val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
+// Check out Issue 9 for the details.
+#ifndef INTMAX_C // [
+# define INTMAX_C INT64_C
+#endif // INTMAX_C ]
+#ifndef UINTMAX_C // [
+# define UINTMAX_C UINT64_C
+#endif // UINTMAX_C ]
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+#endif // _MSC_VER >= 1600 ]
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/plugins/Dbx_kv/src/hamsterdb/include/ham/types.h b/plugins/Dbx_kv/src/hamsterdb/include/ham/types.h
new file mode 100644
index 0000000000..54d75aa7e0
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/include/ham/types.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file types.h
+ * @brief Portable typedefs for hamsterdb Embedded Storage.
+ * @author Christoph Rupp, chris@crupp.de
+ *
+ */
+
+#ifndef HAM_TYPES_H
+#define HAM_TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Check the operating system and word size
+ */
+#ifdef WIN32
+# undef HAM_OS_WIN32
+# define HAM_OS_WIN32 1
+# ifdef WIN64
+# undef HAM_64BIT
+# define HAM_64BIT 1
+# elif WIN32
+# undef HAM_32BIT
+# define HAM_32BIT 1
+# else
+# error "Neither WIN32 nor WIN64 defined!"
+# endif
+#else /* posix? */
+# undef HAM_OS_POSIX
+# define HAM_OS_POSIX 1
+# if defined(__LP64__) || defined(__LP64) || __WORDSIZE == 64
+# undef HAM_64BIT
+# define HAM_64BIT 1
+# else
+# undef HAM_32BIT
+# define HAM_32BIT 1
+# endif
+#endif
+
+#if defined(HAM_OS_POSIX) && defined(HAM_OS_WIN32)
+# error "Unknown arch - neither HAM_OS_POSIX nor HAM_OS_WIN32 defined"
+#endif
+
+/*
+ * improve memory debugging on WIN32 by using crtdbg.h (only MSVC
+ * compiler and debug builds!)
+ *
+ * make sure crtdbg.h is loaded before malloc.h!
+ */
+#if defined(_MSC_VER) && defined(HAM_OS_WIN32)
+# if (defined(WIN32) || defined(__WIN32)) && !defined(UNDER_CE)
+# if defined(DEBUG) || defined(_DEBUG)
+# ifndef _CRTDBG_MAP_ALLOC
+# define _CRTDBG_MAP_ALLOC 1
+# endif
+# endif
+# include <crtdbg.h>
+# include <malloc.h>
+# endif
+#endif
+
+/*
+ * Create the EXPORT macro for Microsoft Visual C++
+ */
+#ifndef HAM_EXPORT
+# ifdef _MSC_VER
+# define HAM_EXPORT __declspec(dllexport)
+# else
+# define HAM_EXPORT extern
+# endif
+#endif
+
+/*
+ * The default calling convention is cdecl
+ */
+#ifndef HAM_CALLCONV
+# define HAM_CALLCONV
+#endif
+
+/*
+ * Common typedefs. Since stdint.h is not available on older versions of
+ * Microsoft Visual Studio, they get declared here.
+ * http://msinttypes.googlecode.com/svn/trunk/stdint.h
+ */
+#if _MSC_VER
+# include <ham/msstdint.h>
+#else
+# include <stdint.h>
+#endif
+
+/* Deprecated typedefs; used prior to 2.1.9. Please do not use them! */
+typedef int64_t ham_s64_t;
+typedef uint64_t ham_u64_t;
+typedef int32_t ham_s32_t;
+typedef uint32_t ham_u32_t;
+typedef int16_t ham_s16_t;
+typedef uint16_t ham_u16_t;
+typedef int8_t ham_s8_t;
+typedef uint8_t ham_u8_t;
+
+/*
+ * Undefine macros to avoid macro redefinitions
+ */
+#undef HAM_INVALID_FD
+#undef HAM_FALSE
+#undef HAM_TRUE
+
+/**
+ * a boolean type
+ */
+typedef int ham_bool_t;
+#define HAM_FALSE 0
+#define HAM_TRUE (!HAM_FALSE)
+
+/**
+ * typedef for error- and status-code
+ */
+typedef int ham_status_t;
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* HAM_TYPES_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/0root/root.h b/plugins/Dbx_kv/src/hamsterdb/src/0root/root.h
new file mode 100644
index 0000000000..38e003b7c7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/0root/root.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The root of all evil. This header file must be included *before all others*!
+ *
+ * @thread_safe: yes
+ * @exception_safe: nothrow
+ */
+
+#ifndef HAM_ROOT_H
+#define HAM_ROOT_H
+
+//#define HAM_ENABLE_HELGRIND 1
+
+// some feature macros in config.h must be set *before* inclusion
+// of any system headers to have the desired effect.
+// assume sane default values if there is no config.h.
+#ifdef HAVE_CONFIG_H
+# include "../config.h"
+#else
+# define HAVE_MMAP 1
+# define HAVE_UNMMAP 1
+# define HAVE_PREAD 1
+# define HAVE_PWRITE 1
+#endif
+
+#include "ham/types.h"
+
+// check for a valid build
+#if (!defined(HAM_DEBUG))
+# if (defined(_DEBUG) || defined(DEBUG))
+# define HAM_DEBUG 1
+# endif
+#endif
+
+// the default cache size is 2 MB
+#define HAM_DEFAULT_CACHE_SIZE (2 * 1024 * 1024)
+
+// the default page size is 16 kb
+#define HAM_DEFAULT_PAGE_SIZE (16 * 1024)
+
+// use tcmalloc?
+#if HAVE_GOOGLE_TCMALLOC_H == 1
+# if HAVE_LIBTCMALLOC_MINIMAL == 1
+# define HAM_USE_TCMALLOC 1
+# endif
+#endif
+
+#include <stddef.h>
+#define OFFSETOF(type, member) offsetof(type, member)
+
+// helper macros to improve CPU branch prediction
+#if defined __GNUC__
+# define likely(x) __builtin_expect ((x), 1)
+# define unlikely(x) __builtin_expect ((x), 0)
+#else
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+#ifdef WIN32
+// MSVC: disable warning about use of 'this' in base member initializer list
+# pragma warning(disable:4355)
+# define WIN32_MEAN_AND_LEAN
+# include <windows.h>
+#endif
+
+// some compilers define min and max as macros; this leads to errors
+// when using std::min and std::max
+#ifdef min
+# undef min
+#endif
+
+#ifdef max
+# undef max
+#endif
+
+// a macro to cast pointers to u64 and vice versa to avoid compiler
+// warnings if the sizes of ptr and u64 are not equal
+#if defined(HAM_32BIT) && (!defined(_MSC_VER))
+# define U64_TO_PTR(p) (uint8_t *)(int)p
+# define PTR_TO_U64(p) (uint64_t)(int)p
+#else
+# define U64_TO_PTR(p) p
+# define PTR_TO_U64(p) p
+#endif
+
+#endif /* HAM_ROOT_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/abi.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/abi.h
new file mode 100644
index 0000000000..57c086f24c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/abi.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Returns the demangled name of a class
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_ABI_H
+#define HAM_ABI_H
+
+#include "0root/root.h"
+
+#ifdef HAVE_GCC_ABI_DEMANGLE
+# include <cxxabi.h>
+#endif
+
+#include <string>
+#include <stdlib.h>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+template<class T> inline std::string
+get_classname(const T& t)
+{
+#ifdef HAVE_GCC_ABI_DEMANGLE
+ int status;
+ const std::type_info &ti = typeid(t);
+ char *name = abi::__cxa_demangle(ti.name(), 0, 0, &status);
+ if (!name)
+ return ("");
+ if (status) {
+ ::free(name);
+ return ("");
+ }
+ std::string s = name;
+ ::free(name);
+ return (s);
+#else
+ return ("");
+#endif
+}
+
+} // namespace hamsterdb
+
+#endif /* HAM_ABI_H */
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/dynamic_array.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/dynamic_array.h
new file mode 100644
index 0000000000..8cd8e2c8b7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/dynamic_array.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A class managing a dynamically sized array for arbitrary types
+ *
+ * @exception_safe: strong
+ * @thread_safe: no
+ */
+
+#ifndef HAM_DYNAMIC_ARRAY_H
+#define HAM_DYNAMIC_ARRAY_H
+
+#include "0root/root.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1mem/mem.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/*
+ * The DynamicArray class is a dynamic, resizable array. The internal memory
+ * is released when the DynamicArray instance is destructed.
+ *
+ * Unlike std::vector, the DynamicArray uses libc functions for constructing,
+ * copying and initializing elements.
+ */
+template<typename T>
+class DynamicArray
+{
+ public:
+ typedef T value_t;
+ typedef T *pointer_t;
+
+ DynamicArray(size_t size = 0)
+ : m_ptr(0), m_size(0), m_own(true) {
+ resize(size);
+ }
+
+ DynamicArray(size_t size, uint8_t fill_byte)
+ : m_ptr(0), m_size(0), m_own(true) {
+ resize(size);
+ if (m_ptr)
+ ::memset(m_ptr, fill_byte, sizeof(T) * m_size);
+ }
+
+ ~DynamicArray() {
+ clear();
+ }
+
+ void append(const T *ptr, size_t size) {
+ size_t old_size = m_size;
+ T *p = (T *)resize(m_size + size);
+ ::memcpy(p + old_size, ptr, sizeof(T) * size);
+ }
+
+ void copy(const T *ptr, size_t size) {
+ resize(size);
+ ::memcpy(m_ptr, ptr, sizeof(T) * size);
+ m_size = size;
+ }
+
+ void overwrite(uint32_t position, const T *ptr, size_t size) {
+ ::memcpy(((uint8_t *)m_ptr) + position, ptr, sizeof(T) * size);
+ }
+
+ T *resize(size_t size) {
+ if (size > m_size) {
+ m_ptr = Memory::reallocate<T>(m_ptr, sizeof(T) * size);
+ m_size = size;
+ }
+ return (m_ptr);
+ }
+
+ T *resize(size_t size, uint8_t fill_byte) {
+ resize(size);
+ if (m_ptr)
+ ::memset(m_ptr, fill_byte, sizeof(T) * size);
+ return (m_ptr);
+ }
+
+ size_t get_size() const {
+ return (m_size);
+ }
+
+ void set_size(size_t size) {
+ m_size = size;
+ }
+
+ T *get_ptr() {
+ return (m_ptr);
+ }
+
+ const T *get_ptr() const {
+ return (m_ptr);
+ }
+
+ void assign(T *ptr, size_t size) {
+ clear();
+ m_ptr = ptr;
+ m_size = size;
+ }
+
+ void clear(bool release_memory = true) {
+ if (m_own && release_memory)
+ Memory::release(m_ptr);
+ m_ptr = 0;
+ m_size = 0;
+ }
+
+ bool is_empty() const {
+ return (m_size == 0);
+ }
+
+ void disown() {
+ m_own = false;
+ }
+
+ private:
+ // Pointer to the data
+ T *m_ptr;
+
+ // The size of the array
+ size_t m_size;
+
+ // True if the destructor should free the pointer
+ bool m_own;
+};
+
+/*
+ * A ByteArray is a DynamicArray for bytes
+ */
+typedef DynamicArray<uint8_t> ByteArray;
+
+} // namespace hamsterdb
+
+#endif // HAM_DYNAMIC_ARRAY_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/error.cc b/plugins/Dbx_kv/src/hamsterdb/src/1base/error.cc
new file mode 100644
index 0000000000..c7ebc530bb
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/error.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/util.h"
+#include "1globals/globals.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+void (*ham_test_abort)(void);
+
+static int
+dbg_snprintf(char *str, size_t size, const char *format, ...)
+{
+ int s;
+
+ va_list ap;
+ va_start(ap, format);
+ s = util_vsnprintf(str, size, format, ap);
+ va_end(ap);
+
+ return (s);
+}
+
+void HAM_CALLCONV
+default_errhandler(int level, const char *message)
+{
+#ifndef HAM_DEBUG
+ if (level == HAM_DEBUG_LEVEL_DEBUG)
+ return;
+#endif
+ fprintf(stderr, "%s\n", message);
+}
+
+void
+dbg_prepare(int level, const char *file, int line, const char *function,
+ const char *expr)
+{
+ Globals::ms_error_level = level;
+ Globals::ms_error_file = file;
+ Globals::ms_error_line = line;
+ Globals::ms_error_expr = expr;
+ Globals::ms_error_function = function;
+}
+
+void
+dbg_log(const char *format, ...)
+{
+ int s = 0;
+ char buffer[1024 * 4];
+
+ va_list ap;
+ va_start(ap, format);
+#ifdef HAM_DEBUG
+ s = dbg_snprintf(buffer, sizeof(buffer), "%s[%d]: ",
+ Globals::ms_error_file, Globals::ms_error_line);
+ util_vsnprintf(buffer + s, sizeof(buffer) - s, format, ap);
+#else
+ if (Globals::ms_error_function)
+ s = dbg_snprintf(buffer, sizeof(buffer), "%s: ",
+ Globals::ms_error_function);
+ util_vsnprintf(buffer + s, sizeof(buffer) - s, format, ap);
+#endif
+ va_end(ap);
+
+ Globals::ms_error_handler(Globals::ms_error_level, buffer);
+}
+
+/* coverity[+kill] */
+void
+dbg_verify_failed(int level, const char *file, int line, const char *function,
+ const char *expr)
+{
+ char buffer[1024 * 4];
+
+ if (!expr)
+ expr = "(none)";
+
+ dbg_snprintf(buffer, sizeof(buffer),
+ "ASSERT FAILED in file %s, line %d:\n\t\"%s\"\n",
+ file, line, expr);
+ buffer[sizeof(buffer) - 1] = '\0';
+
+ Globals::ms_error_handler(Globals::ms_error_level, buffer);
+
+ if (ham_test_abort)
+ ham_test_abort();
+ else
+ abort();
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/error.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/error.h
new file mode 100644
index 0000000000..f02a8a8c24
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/error.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Error handling routines, assert macros, logging facilities
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no (b/c of the logging macros)
+ */
+
+#ifndef HAM_ERROR_H
+#define HAM_ERROR_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// A generic exception for storing a status code
+//
+struct Exception
+{
+ Exception(ham_status_t st)
+ : code(st) {
+ }
+
+ ham_status_t code;
+};
+
+// the default error handler
+void HAM_CALLCONV
+default_errhandler(int level, const char *message);
+
+extern void
+dbg_prepare(int level, const char *file, int line, const char *function,
+ const char *expr);
+
+extern void
+dbg_log(const char *format, ...);
+
+#define CLANG_ANALYZER_NORETURN
+#if __clang__
+# if __has_feature(attribute_analyzer_noreturn)
+# undef CLANG_ANALYZER_NORETURN
+# define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn))
+# endif
+#endif
+
+// causes the actual abort()
+extern void
+dbg_verify_failed(int level, const char *file, int line,
+ const char *function, const char *expr) CLANG_ANALYZER_NORETURN;
+
+// a hook for unittests; will be triggered when an assert fails
+extern void (*ham_test_abort)();
+
+// if your compiler does not support __FUNCTION__, you can define it here:
+// #define __FUNCTION__ 0
+
+/*
+ * in debug mode we write trace()-messages to stderr, and assert()
+ * is enabled.
+ *
+ * not every preprocessor supports ellipsis as macro-arguments -
+ * therefore we have to use brackets, so preprocessors treat multiple
+ * arguments like a single argument. and we need to lock the output,
+ * otherwise we are not thread-safe. this is super-ugly.
+ */
+#ifdef HAM_DEBUG
+# define ham_assert(e) while (!(e)) { \
+ hamsterdb::dbg_verify_failed(HAM_DEBUG_LEVEL_FATAL, __FILE__, \
+ __LINE__, __FUNCTION__, #e); \
+ break; \
+ }
+#else /* !HAM_DEBUG */
+# define ham_assert(e) (void)0
+#endif /* HAM_DEBUG */
+
+// ham_log() and ham_verify() are available in every build
+#define ham_trace(f) do { \
+ hamsterdb::dbg_prepare(HAM_DEBUG_LEVEL_DEBUG, __FILE__, \
+ __LINE__, __FUNCTION__, 0); \
+ hamsterdb::dbg_log f; \
+ } while (0)
+
+#define ham_log(f) do { \
+ hamsterdb::dbg_prepare(HAM_DEBUG_LEVEL_NORMAL, __FILE__, \
+ __LINE__, __FUNCTION__, 0); \
+ hamsterdb::dbg_log f; \
+ } while (0)
+
+#define ham_verify(e) if (!(e)) { \
+ hamsterdb::dbg_verify_failed(HAM_DEBUG_LEVEL_FATAL, __FILE__, \
+ __LINE__, __FUNCTION__, #e); \
+ }
+
+} // namespace hamsterdb
+
+#endif /* HAM_ERROR_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/mutex.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/mutex.h
new file mode 100644
index 0000000000..0e09ae046c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/mutex.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A operating-system dependent mutex
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_MUTEX_H
+#define HAM_MUTEX_H
+
+#include "0root/root.h"
+
+#define BOOST_ALL_NO_LIB // disable MSVC auto-linking
+#include <boost/version.hpp>
+#include <boost/thread/mutex.hpp>
+#include <boost/thread/recursive_mutex.hpp>
+#include <boost/thread/thread.hpp>
+#include <boost/thread/tss.hpp>
+#include <boost/thread/condition.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+typedef boost::mutex::scoped_lock ScopedLock;
+typedef boost::thread Thread;
+typedef boost::condition Condition;
+typedef boost::mutex Mutex;
+typedef boost::recursive_mutex RecursiveMutex;
+
+} // namespace hamsterdb
+
+#endif /* HAM_MUTEX_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/packstart.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/packstart.h
new file mode 100644
index 0000000000..3a6b1981a7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/packstart.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Macros for packing structures; should work with most compilers.
+ *
+ * Example usage:
+ *
+ * #include "packstart.h"
+ *
+ * typedef HAM_PACK_0 struct HAM_PACK_1 foo {
+ * int bar;
+ * } HAM_PACK_2 foo_t;
+ *
+ * #include "packstop.h"
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+/* This class does NOT include root.h! */
+
+#ifdef __GNUC__
+# if (((__GNUC__==2) && (__GNUC_MINOR__>=7)) || (__GNUC__>2))
+# define HAM_PACK_2 __attribute__ ((packed))
+# define _NEWGNUC_
+# endif
+#endif
+
+#ifdef __WATCOMC__
+# define HAM_PACK_0 _Packed
+#endif
+
+#if (defined(_MSC_VER) && (_MSC_VER >= 900)) || defined(__BORLANDC__)
+# define _NEWMSC_
+#endif
+#if !defined(_NEWGNUC_) && !defined(__WATCOMC__) && !defined(_NEWMSC_)
+# pragma pack(1)
+#endif
+#ifdef _NEWMSC_
+# pragma pack(push, 1)
+# define HAM_PACK_2 __declspec(align(1))
+#endif
+
+#if defined(_NEWMSC_) && !defined(_WIN32_WCE)
+# pragma pack(push, 1)
+# define HAM_PACK_2 __declspec(align(1))
+#endif
+
+#ifndef HAM_PACK_0
+# define HAM_PACK_0
+#endif
+
+#ifndef HAM_PACK_1
+# define HAM_PACK_1
+#endif
+
+#ifndef HAM_PACK_2
+# define HAM_PACK_2
+#endif
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/packstop.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/packstop.h
new file mode 100644
index 0000000000..a32566f4f9
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/packstop.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Macros for packing structures; should work with most compilers.
+ * See packstart.h for a usage example.
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+/* This class does NOT include root.h! */
+
+#if !defined(_NEWGNUC_) && !defined(__WATCOMC__) && !defined(_NEWMSC_)
+# pragma pack()
+#endif
+#ifdef _NEWMSC_
+# pragma pack(pop)
+#endif
+#if defined(_NEWMSC_) && !defined(_WIN32_WCE)
+# pragma pack(pop)
+#endif
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/pickle.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/pickle.h
new file mode 100644
index 0000000000..8927e08910
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/pickle.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Class for pickling/unpickling data to a buffer
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_PICKLE_H
+#define HAM_PICKLE_H
+
+#include "0root/root.h"
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Pickle {
+ /* encodes a uint64 number and stores it in |p|; returns the number of
+ * bytes used */
+ static size_t encode_u64(uint8_t *p, uint64_t n) {
+ if (n <= 0xf) {
+ *p = (uint8_t)n;
+ return (1);
+ }
+ if (n <= 0xff) {
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (2);
+ }
+ if (n <= 0xfff) {
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (3);
+ }
+ if (n <= 0xffff) {
+ *(p + 3) = (n & 0xf000) >> 12;
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (4);
+ }
+ if (n <= 0xfffff) {
+ *(p + 4) = (n & 0xf0000) >> 16;
+ *(p + 3) = (n & 0xf000) >> 12;
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (5);
+ }
+ if (n <= 0xffffff) {
+ *(p + 5) = (n & 0xf00000) >> 24;
+ *(p + 4) = (n & 0xf0000) >> 16;
+ *(p + 3) = (n & 0xf000) >> 12;
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (6);
+ }
+ if (n <= 0xfffffff) {
+ *(p + 6) = (n & 0xf000000) >> 32;
+ *(p + 5) = (n & 0xf00000) >> 24;
+ *(p + 4) = (n & 0xf0000) >> 16;
+ *(p + 3) = (n & 0xf000) >> 12;
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (7);
+ }
+ *(p + 7) = (n & 0xf0000000) >> 36;
+ *(p + 6) = (n & 0xf000000) >> 32;
+ *(p + 5) = (n & 0xf00000) >> 24;
+ *(p + 4) = (n & 0xf0000) >> 16;
+ *(p + 3) = (n & 0xf000) >> 12;
+ *(p + 2) = (n & 0xf00) >> 8;
+ *(p + 1) = (n & 0xf0) >> 4;
+ *(p + 0) = n & 0xf;
+ return (8);
+ }
+
+ /* decodes and returns a pickled number of |len| bytes */
+ static uint64_t decode_u64(size_t len, uint8_t *p) {
+ uint64_t ret = 0;
+
+ for (size_t i = 0; i < len - 1; i++) {
+ ret += *(p + (len - i - 1));
+ ret <<= 4;
+ }
+
+ // last assignment is without *= 10
+ return (ret + *p);
+ }
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_PICKLE_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/scoped_ptr.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/scoped_ptr.h
new file mode 100644
index 0000000000..b920059aad
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/scoped_ptr.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A (stupid) smart pointer
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_SCOPED_PTR_H
+#define HAM_SCOPED_PTR_H
+
+#include "0root/root.h"
+
+#define BOOST_ALL_NO_LIB // disable MSVC auto-linking
+#include <boost/scoped_ptr.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+template <typename T>
+struct ScopedPtr : public boost::scoped_ptr<T>
+{
+ ScopedPtr()
+ : boost::scoped_ptr<T>() {
+ }
+
+ ScopedPtr(T *t)
+ : boost::scoped_ptr<T>(t) {
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_SCOPED_PTR_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/spinlock.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/spinlock.h
new file mode 100644
index 0000000000..e9d917212c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/spinlock.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A fast spinlock, taken from the boost documentation
+ * http://www.boost.org/doc/libs/1_57_0/doc/html/atomic/usage_examples.html
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_SPINLOCK_H
+#define HAM_SPINLOCK_H
+
+#include "0root/root.h"
+
+#include <stdio.h>
+#ifndef HAM_OS_WIN32
+# include <sched.h>
+# include <unistd.h>
+#endif
+#include <boost/atomic.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/mutex.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+#ifdef HAM_ENABLE_HELGRIND
+typedef Mutex Spinlock;
+#else
+
+class Spinlock {
+ typedef enum {
+ kLocked,
+ kUnlocked,
+ kSpinThreshold = 10
+ } LockState;
+
+ public:
+ Spinlock()
+ : m_state(kUnlocked) {
+ }
+
+ // Need user-defined copy constructor because boost::atomic<> is not
+ // copyable
+ Spinlock(const Spinlock &other)
+ : m_state(other.m_state.load()) {
+ }
+
+ void lock() {
+ int k = 0;
+ while (m_state.exchange(kLocked, boost::memory_order_acquire) == kLocked)
+ spin(++k);
+ }
+
+ void unlock() {
+ m_state.store(kUnlocked, boost::memory_order_release);
+ }
+
+ bool try_lock() {
+ return (m_state.exchange(kLocked, boost::memory_order_acquire)
+ != kLocked);
+ }
+
+ static void spin(int loop) {
+ if (loop < kSpinThreshold) {
+#ifdef HAM_OS_WIN32
+ ::Sleep(0);
+#elif HAVE_SCHED_YIELD
+ ::sched_yield();
+#else
+ ham_assert(!"Please implement me");
+#endif
+ }
+ else {
+#ifdef HAM_OS_WIN32
+ ::Sleep(25);
+#elif HAVE_USLEEP
+ ::usleep(25);
+#else
+ ham_assert(!"Please implement me");
+#endif
+ }
+ }
+
+ private:
+ boost::atomic<LockState> m_state;
+};
+#endif // HAM_ENABLE_HELGRIND
+
+class ScopedSpinlock {
+ public:
+ ScopedSpinlock(Spinlock &lock)
+ : m_spinlock(lock) {
+ m_spinlock.lock();
+ }
+
+ ~ScopedSpinlock() {
+ m_spinlock.unlock();
+ }
+
+ private:
+ Spinlock &m_spinlock;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_SPINLOCK_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/util.cc b/plugins/Dbx_kv/src/hamsterdb/src/1base/util.cc
new file mode 100644
index 0000000000..828fb3ec9d
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/util.cc
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/util.h"
+
+namespace hamsterdb {
+
+int
+util_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+#if defined(HAM_OS_POSIX)
+ return vsnprintf(str, size, format, ap);
+#elif defined(HAM_OS_WIN32)
+ return _vsnprintf(str, size, format, ap);
+#else
+ (void)size;
+ return (vsprintf(str, format, ap));
+#endif
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1base/util.h b/plugins/Dbx_kv/src/hamsterdb/src/1base/util.h
new file mode 100644
index 0000000000..4e7857bd34
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1base/util.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Misc. utility classes and functions
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_UTIL_H
+#define HAM_UTIL_H
+
+#include "0root/root.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// vsnprintf replacement/wrapper
+//
+// uses vsprintf on platforms which do not define vsnprintf
+//
+extern int
+util_vsnprintf(char *str, size_t size, const char *format, va_list ap);
+
+//
+// snprintf replacement/wrapper
+//
+// uses sprintf on platforms which do not define snprintf
+//
+#ifndef HAM_OS_POSIX
+# define util_snprintf _snprintf
+#else
+# define util_snprintf snprintf
+#endif
+
+} // namespace hamsterdb
+
+#endif // HAM_UTIL_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.cc b/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.cc
new file mode 100644
index 0000000000..9f343c5ed6
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1errorinducer/errorinducer.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+ErrorInducer ErrorInducer::ms_instance;
+bool ErrorInducer::ms_is_active = false;
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.h b/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.h
new file mode 100644
index 0000000000..4a7b2107af
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1errorinducer/errorinducer.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Facility to simulate errors
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_ERRORINDUCER_H
+#define HAM_ERRORINDUCER_H
+
+#include "0root/root.h"
+
+#include <string.h>
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+// a macro to invoke errors
+#define HAM_INDUCE_ERROR(id) \
+ while (ErrorInducer::is_active()) { \
+ ham_status_t st = ErrorInducer::get_instance()->induce(id); \
+ if (st) \
+ throw Exception(st); \
+ break; \
+ }
+
+namespace hamsterdb {
+
+class ErrorInducer {
+ struct State {
+ State()
+ : loops(0), error(HAM_INTERNAL_ERROR) {
+ }
+
+ int loops;
+ ham_status_t error;
+ };
+
+ public:
+ enum Action {
+ // simulates a failure in Changeset::flush
+ kChangesetFlush,
+
+ // simulates a hang in hamserver-connect
+ kServerConnect,
+
+ kMaxActions
+ };
+
+ // Activates or deactivates the error inducer
+ static void activate(bool active) {
+ ms_is_active = active;
+ }
+
+ // Returns true if the error inducer is active
+ static bool is_active() {
+ return (ms_is_active);
+ }
+
+ // Returns the singleton instance
+ static ErrorInducer *get_instance() {
+ return (&ms_instance);
+ }
+
+ ErrorInducer() {
+ memset(&m_state[0], 0, sizeof(m_state));
+ }
+
+ void add(Action action, int loops,
+ ham_status_t error = HAM_INTERNAL_ERROR) {
+ m_state[action].loops = loops;
+ m_state[action].error = error;
+ }
+
+ ham_status_t induce(Action action) {
+ ham_assert(m_state[action].loops >= 0);
+ if (m_state[action].loops > 0 && --m_state[action].loops == 0)
+ return (m_state[action].error);
+ return (0);
+ }
+
+ private:
+ State m_state[kMaxActions];
+
+ // The singleton instance
+ static ErrorInducer ms_instance;
+
+ // Is the ErrorInducer active?
+ static bool ms_is_active;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ERRORINDUCER_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.cc b/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.cc
new file mode 100644
index 0000000000..9f5d184c55
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+uint64_t Globals::ms_extended_keys;
+
+uint64_t Globals::ms_extended_duptables;
+
+uint32_t Globals::ms_extended_threshold;
+
+uint32_t Globals::ms_duplicate_threshold;
+
+int Globals::ms_linear_threshold;
+
+int Globals::ms_error_level;
+
+const char *Globals::ms_error_file;
+
+int Globals::ms_error_line;
+
+const char *Globals::ms_error_expr;
+
+const char *Globals::ms_error_function;
+
+// the default error handler
+void HAM_CALLCONV default_errhandler(int level, const char *message);
+
+ham_errhandler_fun Globals::ms_error_handler = default_errhandler;
+
+uint64_t Globals::ms_bytes_before_compression;
+
+uint64_t Globals::ms_bytes_after_compression;
+
+bool Globals::ms_is_simd_enabled = true;
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.h b/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.h
new file mode 100644
index 0000000000..efe3449e93
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1globals/globals.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Global variables; used for tests and metrics
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_GLOBALS_H
+#define HAM_GLOBALS_H
+
+#include "0root/root.h"
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Globals {
+ // for counting extended keys
+ static uint64_t ms_extended_keys;
+
+ // for counting extended duplicate tables
+ static uint64_t ms_extended_duptables;
+
+ // Move every key > threshold to a blob. For testing purposes.
+ // TODO currently gets assigned at runtime
+ static uint32_t ms_extended_threshold;
+
+ // Create duplicate table if amount of duplicates > threshold. For testing
+ // purposes.
+ // TODO currently gets assigned at runtime
+ static uint32_t ms_duplicate_threshold;
+
+ // linear search threshold for the PAX layout
+ static int ms_linear_threshold;
+
+ // used in error.h/error.cc
+ static int ms_error_level;
+
+ // used in error.h/error.cc
+ static const char *ms_error_file;
+
+ // used in error.h/error.cc
+ static int ms_error_line;
+
+ // used in error.h/error.cc
+ static const char *ms_error_expr;
+
+ // used in error.h/error.cc
+ static const char *ms_error_function;
+
+ // used in error.h/error.cc
+ static ham_errhandler_fun ms_error_handler;
+
+ // PRO: Tracking key bytes before compression
+ static uint64_t ms_bytes_before_compression;
+
+ // PRO: Tracking key bytes after compression
+ static uint64_t ms_bytes_after_compression;
+
+ // PRO: enable/disable SIMD
+ static bool ms_is_simd_enabled;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_GLOBALS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.cc b/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.cc
new file mode 100644
index 0000000000..58a00b87c3
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#ifdef HAM_USE_TCMALLOC
+# include <google/tcmalloc.h>
+# include <google/malloc_extension.h>
+#endif
+#include <stdlib.h>
+
+#include "ham/hamsterdb_int.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/file.h"
+#include "1mem/mem.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+uint64_t Memory::ms_peak_memory;
+uint64_t Memory::ms_total_allocations;
+uint64_t Memory::ms_current_allocations;
+
+void
+Memory::get_global_metrics(ham_env_metrics_t *metrics)
+{
+#ifdef HAM_USE_TCMALLOC
+ size_t value = 0;
+ MallocExtension::instance()->GetNumericProperty(
+ "generic.current_allocated_bytes", &value);
+ metrics->mem_current_usage = value;
+ if (ms_peak_memory < value)
+ ms_peak_memory = metrics->mem_peak_usage = value;
+ MallocExtension::instance()->GetNumericProperty(
+ "generic.heap_size", &value);
+ metrics->mem_heap_size = value;
+#endif
+
+ metrics->mem_total_allocations = ms_total_allocations;
+ metrics->mem_current_allocations = ms_current_allocations;
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.h b/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.h
new file mode 100644
index 0000000000..13f79b618c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1mem/mem.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Memory handling
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no (b/c of metrics)
+ */
+
+#ifndef HAM_MEM_H
+#define HAM_MEM_H
+
+#include "0root/root.h"
+
+#include <new>
+#include <stdlib.h>
+#ifdef HAM_USE_TCMALLOC
+# include <google/tcmalloc.h>
+#endif
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+struct ham_env_metrics_t;
+
+namespace hamsterdb {
+
+/*
+ * The static Memory class provides memory management functions in a common
+ * c++ namespace. The functions can allocate, reallocate and free memory
+ * while tracking usage statistics.
+ *
+ * If tcmalloc is used then additional metrics will be available.
+ *
+ * This class only has static members and methods. It does not have a
+ * constructor.
+ */
+class Memory {
+ public:
+ // allocates |size| bytes, casted into type |T *|;
+ // returns null if out of memory.
+ // usage:
+ //
+ // char *p = Memory::allocate<char>(1024);
+ //
+ template<typename T>
+ static T *allocate(size_t size) {
+ ms_total_allocations++;
+ ms_current_allocations++;
+#ifdef HAM_USE_TCMALLOC
+ T *t = (T *)::tc_malloc(size);
+#else
+ T *t = (T *)::malloc(size);
+#endif
+ if (!t)
+ throw Exception(HAM_OUT_OF_MEMORY);
+ return (t);
+ }
+
+ // allocates |size| bytes; returns null if out of memory. initializes
+ // the allocated memory with zeroes.
+ // usage:
+ //
+ // const char *p = Memory::callocate<const char>(50);
+ //
+ template<typename T>
+ static T *callocate(size_t size) {
+ ms_total_allocations++;
+ ms_current_allocations++;
+
+#ifdef HAM_USE_TCMALLOC
+ T *t = (T *)::tc_calloc(1, size);
+#else
+ T *t = (T *)::calloc(1, size);
+#endif
+ if (!t)
+ throw Exception(HAM_OUT_OF_MEMORY);
+ return (t);
+ }
+
+ // re-allocates |ptr| for |size| bytes; returns null if out of memory.
+ // |ptr| can be null on first use.
+ // usage:
+ //
+ // p = Memory::reallocate<char>(p, 100);
+ //
+ template<typename T>
+ static T *reallocate(T *ptr, size_t size) {
+ if (ptr == 0) {
+ ms_total_allocations++;
+ ms_current_allocations++;
+ }
+#ifdef HAM_USE_TCMALLOC
+ T *t = (T *)::tc_realloc(ptr, size);
+#else
+ T *t = (T *)::realloc(ptr, size);
+#endif
+ if (!t)
+ throw Exception(HAM_OUT_OF_MEMORY);
+ return (t);
+ }
+
+ // releases a memory block; can deal with NULL pointers.
+ static void release(void *ptr) {
+ if (ptr) {
+ ms_current_allocations--;
+#ifdef HAM_USE_TCMALLOC
+ ::tc_free(ptr);
+#else
+ ::free(ptr);
+#endif
+ }
+ }
+
+ // updates and returns the collected metrics
+ static void get_global_metrics(ham_env_metrics_t *metrics);
+
+ private:
+ // peak memory usage
+ static uint64_t ms_peak_memory;
+
+ // total memory allocations
+ static uint64_t ms_total_allocations;
+
+ // currently active allocations
+ static uint64_t ms_current_allocations;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_MEM_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/file.h b/plugins/Dbx_kv/src/hamsterdb/src/1os/file.h
new file mode 100644
index 0000000000..df9049c6de
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/file.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A simple wrapper around a file handle. Throws exceptions in
+ * case of errors. Moves the file handle when copied.
+ *
+ * @exception_safe: strong
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_FILE_H
+#define HAM_FILE_H
+
+#include "0root/root.h"
+
+#include <stdio.h>
+#include <limits.h>
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/os.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class File
+{
+ public:
+ enum {
+#ifdef HAM_OS_POSIX
+ kSeekSet = SEEK_SET,
+ kSeekEnd = SEEK_END,
+ kSeekCur = SEEK_CUR,
+ kMaxPath = PATH_MAX
+#else
+ kSeekSet = FILE_BEGIN,
+ kSeekEnd = FILE_END,
+ kSeekCur = FILE_CURRENT,
+ kMaxPath = MAX_PATH
+#endif
+ };
+
+ // Constructor: creates an empty File handle
+ File()
+ : m_fd(HAM_INVALID_FD), m_mmaph(HAM_INVALID_FD), m_posix_advice(0) {
+ }
+
+ // Copy constructor: moves ownership of the file handle
+ File(File &other)
+ : m_fd(other.m_fd), m_mmaph(other.m_mmaph),
+ m_posix_advice(other.m_posix_advice) {
+ other.m_fd = HAM_INVALID_FD;
+ other.m_mmaph = HAM_INVALID_FD;
+ }
+
+ // Destructor: closes the file
+ ~File() {
+ close();
+ }
+
+ // Assignment operator: moves ownership of the file handle
+ File &operator=(File &other) {
+ m_fd = other.m_fd;
+ other.m_fd = HAM_INVALID_FD;
+ return (*this);
+ }
+
+ // Creates a new file
+ void create(const char *filename, uint32_t mode);
+
+ // Opens an existing file
+ void open(const char *filename, bool read_only);
+
+ // Returns true if the file is open
+ bool is_open() const {
+ return (m_fd != HAM_INVALID_FD);
+ }
+
+ // Flushes a file
+ void flush();
+
+ // Sets the parameter for posix_fadvise()
+ void set_posix_advice(int parameter);
+
+ // Maps a file in memory
+ //
+ // mmap is called with MAP_PRIVATE - the allocated buffer
+ // is just a copy of the file; writing to the buffer will not alter
+ // the file itself.
+ void mmap(uint64_t position, size_t size, bool readonly,
+ uint8_t **buffer);
+
+ // Unmaps a buffer
+ void munmap(void *buffer, size_t size);
+
+ // Positional read from a file
+ void pread(uint64_t addr, void *buffer, size_t len);
+
+ // Positional write to a file
+ void pwrite(uint64_t addr, const void *buffer, size_t len);
+
+ // Write data to a file; uses the current file position
+ void write(const void *buffer, size_t len);
+
+ // Get the page allocation granularity of the operating system
+ static size_t get_granularity();
+
+ // Seek position in a file
+ void seek(uint64_t offset, int whence);
+
+ // Tell the position in a file
+ uint64_t tell();
+
+ // Returns the size of the file
+ uint64_t get_file_size();
+
+ // Truncate/resize the file
+ void truncate(uint64_t newsize);
+
+ // Closes the file descriptor
+ void close();
+
+ private:
+ // The file handle
+ ham_fd_t m_fd;
+
+ // The mmap handle - required for Win32
+ ham_fd_t m_mmaph;
+
+ // Parameter for posix_fadvise()
+ int m_posix_advice;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_FILE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/os.cc b/plugins/Dbx_kv/src/hamsterdb/src/1os/os.cc
new file mode 100644
index 0000000000..8f8c0c991c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/os.cc
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "1os/os.h"
+
+namespace hamsterdb {
+
+int
+os_get_simd_lane_width()
+{
+ // only supported in hamsterdb pro
+ return (0);
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/os.h b/plugins/Dbx_kv/src/hamsterdb/src/1os/os.h
new file mode 100644
index 0000000000..dd2f52a4dc
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/os.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Abstraction layer for operating system functions
+ *
+ * @exception_safe: basic // for socket
+ * @exception_safe: strong // for file
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_OS_H
+#define HAM_OS_H
+
+#include "0root/root.h"
+
+#include <stdio.h>
+#include <limits.h>
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/*
+ * typedefs for posix
+ */
+#ifdef HAM_OS_POSIX
+typedef int ham_fd_t;
+typedef int ham_socket_t;
+# define HAM_INVALID_FD (-1)
+#endif
+
+/*
+ * typedefs for Windows 32- and 64-bit
+ */
+#ifdef HAM_OS_WIN32
+# ifdef CYGWIN
+typedef int ham_fd_t;
+typedef int ham_socket_t;
+# else
+typedef HANDLE ham_fd_t;
+typedef SOCKET ham_socket_t;
+# endif
+# define HAM_INVALID_FD (0)
+#endif
+
+// Returns the number of 32bit integers that the CPU can process in
+// parallel (the SIMD lane width)
+extern int
+os_get_simd_lane_width();
+
+} // namespace hamsterdb
+
+#endif /* HAM_OS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/os_posix.cc b/plugins/Dbx_kv/src/hamsterdb/src/1os/os_posix.cc
new file mode 100644
index 0000000000..135899e7ea
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/os_posix.cc
@@ -0,0 +1,474 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE 1 // for O_LARGEFILE
+#define _FILE_OFFSET_BITS 64
+
+#include "0root/root.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#if HAVE_MMAP
+# include <sys/mman.h>
+#endif
+#if HAVE_WRITEV
+# include <sys/uio.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1os/file.h"
+#include "1os/socket.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+#if 0
+# define os_log(x) ham_log(x)
+#else
+# define os_log(x)
+#endif
+
+static void
+lock_exclusive(int fd, bool lock)
+{
+#ifdef HAM_SOLARIS
+ // SunOS 5.9 doesn't have LOCK_* unless i include /usr/ucbinclude; but then,
+ // mmap behaves strangely (the first write-access to the mmapped buffer
+ // leads to a segmentation fault).
+ //
+ // Tell me if this troubles you/if you have suggestions for fixes.
+#else
+ int flags;
+
+ if (lock)
+ flags = LOCK_EX | LOCK_NB;
+ else
+ flags = LOCK_UN;
+
+ if (0 != flock(fd, flags)) {
+ ham_log(("flock failed with status %u (%s)", errno, strerror(errno)));
+ // it seems that linux does not only return EWOULDBLOCK, as stated
+ // in the documentation (flock(2)), but also other errors...
+ if (errno && lock)
+ throw Exception(HAM_WOULD_BLOCK);
+ throw Exception(HAM_IO_ERROR);
+ }
+#endif
+}
+
+static void
+enable_largefile(int fd)
+{
+ // not available on cygwin...
+#ifdef HAVE_O_LARGEFILE
+ int oflag = fcntl(fd, F_GETFL, 0);
+ fcntl(fd, F_SETFL, oflag | O_LARGEFILE);
+#endif
+}
+
+static void
+os_read(ham_fd_t fd, uint8_t *buffer, size_t len)
+{
+ os_log(("os_read: fd=%d, size=%lld", fd, len));
+
+ int r;
+ size_t total = 0;
+
+ while (total < len) {
+ r = read(fd, &buffer[total], len - total);
+ if (r < 0) {
+ ham_log(("os_read failed with status %u (%s)", errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (r == 0)
+ break;
+ total += r;
+ }
+
+ if (total != len) {
+ ham_log(("os_read() failed with short read (%s)", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+static void
+os_write(ham_fd_t fd, const void *buffer, size_t len)
+{
+ int w;
+ size_t total = 0;
+ const char *p = (const char *)buffer;
+
+ while (total < len) {
+ w = ::write(fd, p + total, len - total);
+ if (w < 0) {
+ ham_log(("os_write failed with status %u (%s)", errno,
+ strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (w == 0)
+ break;
+ total += w;
+ }
+
+ if (total != len) {
+ ham_log(("os_write() failed with short read (%s)", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+size_t
+File::get_granularity()
+{
+ return ((size_t)sysconf(_SC_PAGE_SIZE));
+}
+
+void
+File::set_posix_advice(int advice)
+{
+ m_posix_advice = advice;
+ ham_assert(m_fd != HAM_INVALID_FD);
+
+#if HAVE_POSIX_FADVISE
+ if (m_posix_advice == HAM_POSIX_FADVICE_RANDOM) {
+ int r = ::posix_fadvise(m_fd, 0, 0, POSIX_FADV_RANDOM);
+ if (r != 0) {
+ ham_log(("posix_fadvise failed with status %d (%s)",
+ errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ }
+#endif
+}
+
+void
+File::mmap(uint64_t position, size_t size, bool readonly, uint8_t **buffer)
+{
+ os_log(("File::mmap: fd=%d, position=%lld, size=%lld", m_fd, position, size));
+
+ int prot = PROT_READ;
+ if (!readonly)
+ prot |= PROT_WRITE;
+
+#if HAVE_MMAP
+ *buffer = (uint8_t *)::mmap(0, size, prot, MAP_PRIVATE, m_fd, position);
+ if (*buffer == (void *)-1) {
+ *buffer = 0;
+ ham_log(("mmap failed with status %d (%s)", errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+#else
+ throw Exception(HAM_NOT_IMPLEMENTED);
+#endif
+
+#if HAVE_MADVISE
+ if (m_posix_advice == HAM_POSIX_FADVICE_RANDOM) {
+ int r = ::madvise(*buffer, size, MADV_RANDOM);
+ if (r != 0) {
+ ham_log(("madvise failed with status %d (%s)", errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ }
+#endif
+}
+
+void
+File::munmap(void *buffer, size_t size)
+{
+ os_log(("File::munmap: size=%lld", size));
+
+#if HAVE_MUNMAP
+ int r = ::munmap(buffer, size);
+ if (r) {
+ ham_log(("munmap failed with status %d (%s)", errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+#else
+ throw Exception(HAM_NOT_IMPLEMENTED);
+#endif
+}
+
+void
+File::pread(uint64_t addr, void *buffer, size_t len)
+{
+#if HAVE_PREAD
+ os_log(("File::pread: fd=%d, address=%lld, size=%lld", m_fd, addr,
+ len));
+
+ int r;
+ size_t total = 0;
+
+ while (total < len) {
+ r = ::pread(m_fd, (uint8_t *)buffer + total, len - total,
+ addr + total);
+ if (r < 0) {
+ ham_log(("File::pread failed with status %u (%s)", errno,
+ strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (r == 0)
+ break;
+ total += r;
+ }
+
+ if (total != len) {
+ ham_log(("File::pread() failed with short read (%s)", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+#else
+ File::seek(addr, kSeekSet);
+ os_read(m_fd, (uint8_t *)buffer, len);
+#endif
+}
+
+void
+File::pwrite(uint64_t addr, const void *buffer, size_t len)
+{
+ os_log(("File::pwrite: fd=%d, address=%lld, size=%lld", m_fd, addr, len));
+
+#if HAVE_PWRITE
+ ssize_t s;
+ size_t total = 0;
+
+ while (total < len) {
+ s = ::pwrite(m_fd, buffer, len, addr + total);
+ if (s < 0) {
+ ham_log(("pwrite() failed with status %u (%s)", errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (s == 0)
+ break;
+ total += s;
+ }
+
+ if (total != len) {
+ ham_log(("pwrite() failed with short read (%s)", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+#else
+ seek(addr, kSeekSet);
+ write(buffer, len);
+#endif
+}
+
+void
+File::write(const void *buffer, size_t len)
+{
+ os_log(("File::write: fd=%d, size=%lld", m_fd, len));
+ os_write(m_fd, buffer, len);
+}
+
+void
+File::seek(uint64_t offset, int whence)
+{
+ os_log(("File::seek: fd=%d, offset=%lld, whence=%d", m_fd, offset, whence));
+ if (lseek(m_fd, offset, whence) < 0)
+ throw Exception(HAM_IO_ERROR);
+}
+
+uint64_t
+File::tell()
+{
+ uint64_t offset = lseek(m_fd, 0, SEEK_CUR);
+ os_log(("File::tell: fd=%d, offset=%lld", m_fd, offset));
+ if (offset == (uint64_t) - 1)
+ throw Exception(HAM_IO_ERROR);
+ return (offset);
+}
+
+uint64_t
+File::get_file_size()
+{
+ seek(0, kSeekEnd);
+ uint64_t size = tell();
+ os_log(("File::get_file_size: fd=%d, size=%lld", m_fd, size));
+ return (size);
+}
+
+void
+File::truncate(uint64_t newsize)
+{
+ os_log(("File::truncate: fd=%d, size=%lld", m_fd, newsize));
+ if (ftruncate(m_fd, newsize))
+ throw Exception(HAM_IO_ERROR);
+}
+
+void
+File::create(const char *filename, uint32_t mode)
+{
+ int osflags = O_CREAT | O_RDWR | O_TRUNC;
+#if HAVE_O_NOATIME
+ osflags |= O_NOATIME;
+#endif
+
+ ham_fd_t fd = ::open(filename, osflags, mode ? mode : 0644);
+ if (fd < 0) {
+ ham_log(("creating file %s failed with status %u (%s)", filename,
+ errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ /* lock the file - this is default behaviour since 1.1.0 */
+ lock_exclusive(fd, true);
+
+ /* enable O_LARGEFILE support */
+ enable_largefile(fd);
+
+ m_fd = fd;
+}
+
+void
+File::flush()
+{
+ os_log(("File::flush: fd=%d", m_fd));
+ /* unlike fsync(), fdatasync() does not flush the metadata unless
+ * it's really required. it's therefore a lot faster. */
+#if HAVE_FDATASYNC && !__APPLE__
+ if (fdatasync(m_fd) == -1) {
+#else
+ if (fsync(m_fd) == -1) {
+#endif
+ ham_log(("fdatasync failed with status %u (%s)",
+ errno, strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+void
+File::open(const char *filename, bool read_only)
+{
+ int osflags = 0;
+
+ if (read_only)
+ osflags |= O_RDONLY;
+ else
+ osflags |= O_RDWR;
+#if HAVE_O_NOATIME
+ osflags |= O_NOATIME;
+#endif
+
+ ham_fd_t fd = ::open(filename, osflags);
+ if (fd < 0) {
+ ham_log(("opening file %s failed with status %u (%s)", filename,
+ errno, strerror(errno)));
+ throw Exception(errno == ENOENT ? HAM_FILE_NOT_FOUND : HAM_IO_ERROR);
+ }
+
+ /* lock the file - this is default behaviour since 1.1.0 */
+ lock_exclusive(fd, true);
+
+ /* enable O_LARGEFILE support */
+ enable_largefile(fd);
+
+ m_fd = fd;
+}
+
+void
+File::close()
+{
+ if (m_fd != HAM_INVALID_FD) {
+ // on posix, we most likely don't want to close descriptors 0 and 1
+ ham_assert(m_fd != 0 && m_fd != 1);
+
+ // unlock the file - this is default behaviour since 1.1.0
+ lock_exclusive(m_fd, false);
+
+ // now close the descriptor
+ if (::close(m_fd) == -1)
+ throw Exception(HAM_IO_ERROR);
+
+ m_fd = HAM_INVALID_FD;
+ }
+}
+
+void
+Socket::connect(const char *hostname, uint16_t port, uint32_t timeout_sec)
+{
+ ham_socket_t s = ::socket(AF_INET, SOCK_STREAM, 0);
+ if (s < 0) {
+ ham_log(("failed creating socket: %s", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ struct hostent *server = ::gethostbyname(hostname);
+ if (!server) {
+ ham_log(("unable to resolve hostname %s: %s", hostname,
+ hstrerror(h_errno)));
+ ::close(s);
+ throw Exception(HAM_NETWORK_ERROR);
+ }
+
+ struct sockaddr_in addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ memcpy(&addr.sin_addr.s_addr, server->h_addr, server->h_length);
+ addr.sin_port = htons(port);
+ if (::connect(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ ham_log(("unable to connect to %s:%d: %s", hostname, (int)port,
+ strerror(errno)));
+ ::close(s);
+ throw Exception(HAM_NETWORK_ERROR);
+ }
+
+ if (timeout_sec) {
+ struct timeval tv;
+ tv.tv_sec = timeout_sec;
+ tv.tv_usec = 0;
+ if (::setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)) < 0) {
+ ham_log(("unable to set socket timeout to %d sec: %s", timeout_sec,
+ strerror(errno)));
+ // fall through, this is not critical
+ }
+ }
+
+ m_socket = s;
+}
+
+void
+Socket::send(const uint8_t *data, size_t len)
+{
+ os_write(m_socket, data, len);
+}
+
+void
+Socket::recv(uint8_t *data, size_t len)
+{
+ os_read(m_socket, data, len);
+}
+
+void
+Socket::close()
+{
+ if (m_socket != HAM_INVALID_FD) {
+ if (::close(m_socket) == -1)
+ throw Exception(HAM_IO_ERROR);
+ m_socket = HAM_INVALID_FD;
+ }
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/os_win32.cc b/plugins/Dbx_kv/src/hamsterdb/src/1os/os_win32.cc
new file mode 100644
index 0000000000..ac51a4a7b7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/os_win32.cc
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <winsock2.h>
+#include <windows.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <assert.h>
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1os/file.h"
+#include "1os/socket.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+static const char *
+DisplayError(char* buf, uint32_t buflen, DWORD errorcode)
+{
+ size_t len;
+
+ buf[0] = 0;
+ FormatMessageA(/* FORMAT_MESSAGE_ALLOCATE_BUFFER | */
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL, errorcode,
+ MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
+ (LPSTR)buf, buflen, NULL);
+ buf[buflen - 1] = 0;
+
+ /* strip trailing whitespace\newlines */
+ for (len = strlen(buf); len-- > 0; ) {
+ if (!isspace(buf[len]))
+ break;
+ buf[len] = 0;
+ }
+
+ return (buf);
+}
+
+/*
+ * MS says:
+ *
+ * Security Alert
+ *
+ * Using the MultiByteToWideChar function incorrectly can compromise the
+ * security of your application. Calling this function can easily cause a
+ * buffer overrun because the size of the input buffer indicated by
+ * lpMultiByteStr equals the number of bytes in the string, while the size of
+ * the output buffer indicated by lpWideCharStr equals the number of WCHAR
+ * values.
+ *
+ * To avoid a buffer overrun, your application must specify a buffer size
+ * appropriate for the data type the buffer receives. For more information, see
+ * Security Considerations: International Features.
+ */
+static void
+utf8_string(const char *filename, WCHAR *wfilename, int wlen)
+{
+ MultiByteToWideChar(CP_ACP, 0, filename, -1, wfilename, wlen);
+}
+
+static int
+calc_wlen4str(const char *str)
+{
+ // Since we call MultiByteToWideChar with an input length of -1, the
+ // output will include the wchar NUL sentinel as well, so count it
+ return (int)(strlen(str) + 1);
+}
+
+size_t
+File::get_granularity()
+{
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return ((size_t)info.dwAllocationGranularity);
+}
+
+void
+File::set_posix_advice(int advice)
+{
+ // Only available for posix platforms
+}
+
+void
+File::mmap(uint64_t position, size_t size, bool readonly, uint8_t **buffer)
+{
+ ham_status_t st;
+ DWORD protect = (readonly ? PAGE_READONLY : PAGE_WRITECOPY);
+ DWORD access = FILE_MAP_COPY;
+ LARGE_INTEGER i;
+ i.QuadPart = position;
+
+ m_mmaph = CreateFileMapping(m_fd, 0, protect, 0, 0, 0);
+ if (!m_mmaph) {
+ char buf[256];
+ *buffer = 0;
+ st = (ham_status_t)GetLastError();
+ ham_log(("CreateFileMapping failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ *buffer = (uint8_t *)MapViewOfFile(m_mmaph, access, i.HighPart, i.LowPart,
+ (SIZE_T)size);
+ if (!*buffer) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ /* make sure to release the mapping */
+ (void)CloseHandle(m_mmaph);
+ m_mmaph = HAM_INVALID_FD;
+ ham_log(("MapViewOfFile failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ if (st == ERROR_NOT_ENOUGH_QUOTA) // not enough resources - fallback to r/w
+ throw Exception(HAM_LIMITS_REACHED);
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+void
+File::munmap(void *buffer, size_t size)
+{
+ ham_status_t st;
+
+ if (!UnmapViewOfFile(buffer)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("UnMapViewOfFile failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ if (m_mmaph != HAM_INVALID_FD) {
+ if (!CloseHandle(m_mmaph)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("CloseHandle failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ }
+
+ m_mmaph = HAM_INVALID_FD;
+}
+
+void
+File::pread(uint64_t addr, void *buffer, size_t len)
+{
+ ham_status_t st;
+ OVERLAPPED ov = { 0 };
+ ov.Offset = (DWORD)addr;
+ ov.OffsetHigh = addr >> 32;
+ DWORD read;
+ if (!::ReadFile(m_fd, buffer, (DWORD)len, &read, &ov)) {
+ if (GetLastError() != ERROR_IO_PENDING) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("ReadFile failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (!::GetOverlappedResult(m_fd, &ov, &read, TRUE)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("GetOverlappedResult failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ }
+
+ if (read != len)
+ throw Exception(HAM_IO_ERROR);
+}
+
+void
+File::pwrite(uint64_t addr, const void *buffer, size_t len)
+{
+ ham_status_t st;
+ OVERLAPPED ov = { 0 };
+ ov.Offset = (DWORD)addr;
+ ov.OffsetHigh = addr >> 32;
+ DWORD written;
+ if (!::WriteFile(m_fd, buffer, (DWORD)len, &written, &ov)) {
+ if (GetLastError() != ERROR_IO_PENDING) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("WriteFile failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ if (!::GetOverlappedResult(m_fd, &ov, &written, TRUE)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("GetOverlappedResult failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ }
+
+ if (written != len)
+ throw Exception(HAM_IO_ERROR);
+}
+
+void
+File::write(const void *buffer, size_t len)
+{
+ ham_status_t st;
+ DWORD written = 0;
+
+ if (!WriteFile(m_fd, buffer, (DWORD)len, &written, 0)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("WriteFile failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ if (written != len)
+ throw Exception(HAM_IO_ERROR);
+}
+
+#ifndef INVALID_SET_FILE_POINTER
+# define INVALID_SET_FILE_POINTER ((DWORD)-1)
+#endif
+
+void
+File::seek(uint64_t offset, int whence)
+{
+ DWORD st;
+ LARGE_INTEGER i;
+ i.QuadPart = offset;
+
+ i.LowPart = ::SetFilePointer(m_fd, i.LowPart, &i.HighPart, whence);
+ if (i.LowPart == INVALID_SET_FILE_POINTER &&
+ (st = GetLastError())!=NO_ERROR) {
+ char buf[256];
+ ham_log(("SetFilePointer failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+uint64_t
+File::tell()
+{
+ DWORD st;
+ LARGE_INTEGER i;
+ i.QuadPart = 0;
+
+ i.LowPart = SetFilePointer(m_fd, i.LowPart, &i.HighPart, kSeekCur);
+ if (i.LowPart == INVALID_SET_FILE_POINTER &&
+ (st = GetLastError()) != NO_ERROR) {
+ char buf[256];
+ ham_log(("SetFilePointer failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ return ((size_t)i.QuadPart);
+}
+
+#ifndef INVALID_FILE_SIZE
+# define INVALID_FILE_SIZE ((DWORD)-1)
+#endif
+
+uint64_t
+File::get_file_size()
+{
+ ham_status_t st;
+ LARGE_INTEGER i;
+ i.QuadPart = 0;
+ i.LowPart = GetFileSize(m_fd, (LPDWORD)&i.HighPart);
+
+ if (i.LowPart == INVALID_FILE_SIZE && (st = GetLastError()) != NO_ERROR) {
+ char buf[256];
+ ham_log(("GetFileSize failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ return ((size_t)i.QuadPart);
+}
+
+void
+File::truncate(uint64_t newsize)
+{
+ File::seek(newsize, kSeekSet);
+
+ if (!SetEndOfFile(m_fd)) {
+ char buf[256];
+ ham_status_t st = (ham_status_t)GetLastError();
+ ham_log(("SetEndOfFile failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+void
+File::create(const char *filename, uint32_t mode)
+{
+ ham_status_t st;
+ DWORD share = 0; /* 1.1.0: default behaviour is exclusive locking */
+ DWORD access = GENERIC_READ | GENERIC_WRITE;
+ ham_fd_t fd;
+
+#ifdef UNICODE
+ int fnameWlen = calc_wlen4str(filename);
+ WCHAR *wfilename = (WCHAR *)malloc(fnameWlen * sizeof(wfilename[0]));
+ if (!wfilename)
+ throw Exception(HAM_OUT_OF_MEMORY);
+
+ /* translate ASCII filename to unicode */
+ utf8_string(filename, wfilename, fnameWlen);
+ fd = (ham_fd_t)CreateFileW(wfilename, access,
+ share, NULL, CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED, 0);
+ free(wfilename);
+#else
+ fd = (ham_fd_t)CreateFileA(filename, access,
+ share, NULL, CREATE_ALWAYS,
+ FILE_ATTRIBUTE_NORMAL | FILE_ATTRIBUTE_NOT_CONTENT_INDEXED, 0);
+#endif
+
+ if (fd == INVALID_HANDLE_VALUE) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ if (st == ERROR_SHARING_VIOLATION)
+ throw Exception(HAM_WOULD_BLOCK);
+ ham_log(("CreateFile(%s, %x, %x, ...) (create) failed with OS status "
+ "%u (%s)", filename, access, share, st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ m_fd = fd;
+}
+
+void
+File::flush()
+{
+ ham_status_t st;
+
+ if (!FlushFileBuffers(m_fd)) {
+ char buf[256];
+ st = (ham_status_t)GetLastError();
+ ham_log(("FlushFileBuffers failed with OS status %u (%s)",
+ st, DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+}
+
+void
+File::open(const char *filename, bool read_only)
+{
+ ham_status_t st;
+ DWORD share = 0; /* 1.1.0: default behaviour is exclusive locking */
+ DWORD access = read_only
+ ? GENERIC_READ
+ : (GENERIC_READ | GENERIC_WRITE);
+ DWORD dispo = OPEN_EXISTING;
+ DWORD osflags = 0;
+ ham_fd_t fd;
+
+#ifdef UNICODE
+ {
+ int fnameWlen = calc_wlen4str(filename);
+ WCHAR *wfilename = (WCHAR *)malloc(fnameWlen * sizeof(wfilename[0]));
+ if (!wfilename)
+ throw Exception(HAM_OUT_OF_MEMORY);
+
+ /* translate ASCII filename to unicode */
+ utf8_string(filename, wfilename, fnameWlen);
+ fd = (ham_fd_t)CreateFileW(wfilename, access, share, NULL,
+ dispo, osflags, 0);
+ free(wfilename);
+ }
+#else
+ fd = (ham_fd_t)CreateFileA(filename, access, share, NULL,
+ dispo, osflags, 0);
+#endif
+
+ if (fd == INVALID_HANDLE_VALUE) {
+ char buf[256];
+ fd = HAM_INVALID_FD;
+ st = (ham_status_t)GetLastError();
+ ham_log(("CreateFile(%s, %x, %x, ...) (open) failed with OS status "
+ "%u (%s)", filename, access, share,
+ st, DisplayError(buf, sizeof(buf), st)));
+ if (st == ERROR_SHARING_VIOLATION)
+ throw Exception(HAM_WOULD_BLOCK);
+ throw Exception(st == ERROR_FILE_NOT_FOUND
+ ? HAM_FILE_NOT_FOUND
+ : HAM_IO_ERROR);
+ }
+
+ m_fd = fd;
+}
+
+void
+File::close()
+{
+ if (m_fd != HAM_INVALID_FD) {
+ if (!CloseHandle((HANDLE)m_fd)) {
+ char buf[256];
+ ham_status_t st = (ham_status_t)GetLastError();
+ ham_log(("CloseHandle failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ m_fd = HAM_INVALID_FD;
+ }
+
+ if (m_mmaph != HAM_INVALID_FD) {
+ if (!CloseHandle((HANDLE)m_mmaph)) {
+ char buf[256];
+ ham_status_t st = (ham_status_t)GetLastError();
+ ham_log(("CloseHandle failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ m_mmaph = HAM_INVALID_FD;
+ }
+}
+
+void
+Socket::connect(const char *hostname, uint16_t port, uint32_t timeout_sec)
+{
+ WORD sockVersion = MAKEWORD(1, 1);
+ WSADATA wsaData;
+ WSAStartup(sockVersion, &wsaData);
+
+ ham_socket_t s = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (s < 0) {
+ ham_log(("failed creating socket: %s", strerror(errno)));
+ throw Exception(HAM_IO_ERROR);
+ }
+
+ LPHOSTENT server = ::gethostbyname(hostname);
+ if (!server) {
+ ham_log(("unable to resolve hostname %s", hostname));
+ ::closesocket(s);
+ throw Exception(HAM_NETWORK_ERROR);
+ }
+
+ SOCKADDR_IN addr;
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr = *((LPIN_ADDR)*server->h_addr_list);
+ addr.sin_port = htons(port);
+ if (::connect(s, (LPSOCKADDR)&addr, sizeof(addr)) < 0) {
+ ham_log(("unable to connect to %s:%d: %s", hostname, (int)port,
+ strerror(errno)));
+ ::closesocket(s);
+ throw Exception(HAM_NETWORK_ERROR);
+ }
+
+ if (timeout_sec) {
+ struct timeval tv;
+ tv.tv_sec = timeout_sec;
+ tv.tv_usec = 0;
+ if (::setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)) < 0) {
+ char buf[256];
+ ham_log(("unable to set socket timeout to %u sec: %u/%s", timeout_sec,
+ WSAGetLastError(), DisplayError(buf, sizeof(buf),
+ WSAGetLastError())));
+ // fall through, this is not critical
+ }
+ }
+
+ m_socket = s;
+}
+
+void
+Socket::send(const uint8_t *data, size_t len)
+{
+ size_t sent = 0;
+ char buf[256];
+ ham_status_t st;
+
+ while (sent != len) {
+ int s = ::send(m_socket, (const char *)(data + sent), len - sent, 0);
+ if (s <= 0) {
+ st = (ham_status_t)GetLastError();
+ ham_log(("send failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ sent += s;
+ }
+}
+
+void
+Socket::recv(uint8_t *data, size_t len)
+{
+ size_t read = 0;
+ char buf[256];
+ ham_status_t st;
+
+ while (read != len) {
+ int r = ::recv(m_socket, (char *)(data + read), len - read, 0);
+ if (r <= 0) {
+ st = (ham_status_t)GetLastError();
+ ham_log(("recv failed with OS status %u (%s)", st,
+ DisplayError(buf, sizeof(buf), st)));
+ throw Exception(HAM_IO_ERROR);
+ }
+ read += r;
+ }
+}
+
+void
+Socket::close()
+{
+ if (m_socket != HAM_INVALID_FD) {
+ if (::closesocket(m_socket) == -1)
+ throw Exception(HAM_IO_ERROR);
+ m_socket = HAM_INVALID_FD;
+ }
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1os/socket.h b/plugins/Dbx_kv/src/hamsterdb/src/1os/socket.h
new file mode 100644
index 0000000000..0acdfdd14e
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1os/socket.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A simple wrapper around a tcp socket handle. Throws exceptions in
+ * case of errors
+ *
+ * @exception_safe: basic
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_SOCKET_H
+#define HAM_SOCKET_H
+
+#include "0root/root.h"
+
+#include <stdio.h>
+#include <limits.h>
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/os.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Socket
+{
+ public:
+ // Constructor creates an empty socket
+ Socket()
+ : m_socket(HAM_INVALID_FD) {
+ }
+
+ // Destructor closes the socket
+ ~Socket() {
+ close();
+ }
+
+ // Connects to a remote host
+ void connect(const char *hostname, uint16_t port, uint32_t timeout_sec);
+
+ // Sends data to the connected server
+ void send(const uint8_t *data, size_t len);
+
+ // Receives data from the connected server; blocking!
+ void recv(uint8_t *data, size_t len);
+
+ // Closes the connection; no problem if socket was already closed
+ void close();
+
+ private:
+ ham_socket_t m_socket;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_SOCKET_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/1rb/rb.h b/plugins/Dbx_kv/src/hamsterdb/src/1rb/rb.h
new file mode 100644
index 0000000000..fcf0c135d5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/1rb/rb.h
@@ -0,0 +1,977 @@
+/*-
+ *******************************************************************************
+ *
+ * cpp macro implementation of left-leaning 2-3 red-black trees. Parent
+ * pointers are not used, and color bits are stored in the least significant
+ * bit of right-child pointers (if RB_COMPACT is defined), thus making node
+ * linkage as compact as is possible for red-black trees.
+ *
+ * Usage:
+ *
+ * #include <stdint.h>
+ * #include <stdbool.h>
+ * #define NDEBUG // (Optional, see assert(3).)
+ * #include <assert.h>
+ * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.)
+ * #include <rb.h>
+ * ...
+ *
+ *******************************************************************************
+ */
+
+#ifndef RB_H_
+#define RB_H_
+
+#include "0root/root.h"
+
+#ifndef HAM_OS_WIN32
+# include <stdint.h>
+# include <sys/cdefs.h>
+#endif
+#include <assert.h>
+
+#ifdef RB_COMPACT
+/* Node structure. */
+#define rb_node(a_type) \
+struct { \
+ a_type *rbn_left; \
+ a_type *rbn_right_red; \
+}
+#else
+#define rb_node(a_type) \
+struct { \
+ a_type *rbn_left; \
+ a_type *rbn_right; \
+ bool rbn_red; \
+}
+#endif
+
+/* Root structure. */
+#define rbt(a_type) \
+struct { \
+ a_type *rbt_root; \
+ a_type rbt_nil; \
+}
+
+/* Left accessors. */
+#define rbtn_left_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_left)
+#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \
+ (a_node)->a_field.rbn_left = a_left; \
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define rbtn_right_get(a_type, a_field, a_node) \
+ ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \
+ & ((ssize_t)-2)))
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \
+ | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \
+} while (0)
+
+/* Color accessors. */
+#define rbtn_red_get(a_type, a_field, a_node) \
+ ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \
+ & ((size_t)1)))
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \
+ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \
+ | ((ssize_t)a_red)); \
+} while (0)
+#define rbtn_red_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \
+ (a_node)->a_field.rbn_right_red) | ((size_t)1)); \
+} while (0)
+#define rbtn_black_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \
+ (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \
+} while (0)
+#else
+/* Right accessors. */
+#define rbtn_right_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_right)
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \
+ (a_node)->a_field.rbn_right = a_right; \
+} while (0)
+
+/* Color accessors. */
+#define rbtn_red_get(a_type, a_field, a_node) \
+ ((a_node)->a_field.rbn_red)
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \
+ (a_node)->a_field.rbn_red = (a_red); \
+} while (0)
+#define rbtn_red_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_red = true; \
+} while (0)
+#define rbtn_black_set(a_type, a_field, a_node) do { \
+ (a_node)->a_field.rbn_red = false; \
+} while (0)
+#endif
+
+/* Node initializer. */
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \
+ rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
+ rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \
+ rbtn_red_set(a_type, a_field, (a_node)); \
+} while (0)
+
+/* Tree initializer. */
+#define rb_new(a_type, a_field, a_rbt) do { \
+ (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \
+ rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \
+ rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \
+} while (0)
+
+/* Internal utility macros. */
+#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \
+ (r_node) = (a_root); \
+ if ((r_node) != &(a_rbt)->rbt_nil) { \
+ for (; \
+ rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+ (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \
+ } \
+ } \
+} while (0)
+
+#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \
+ (r_node) = (a_root); \
+ if ((r_node) != &(a_rbt)->rbt_nil) { \
+ for (; rbtn_right_get(a_type, a_field, (r_node)) != \
+ &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \
+ (r_node))) { \
+ } \
+ } \
+} while (0)
+
+#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \
+ (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \
+ rbtn_right_set(a_type, a_field, (a_node), \
+ rbtn_left_get(a_type, a_field, (r_node))); \
+ rbtn_left_set(a_type, a_field, (r_node), (a_node)); \
+} while (0)
+
+#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \
+ (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \
+ rbtn_left_set(a_type, a_field, (a_node), \
+ rbtn_right_get(a_type, a_field, (r_node))); \
+ rbtn_right_set(a_type, a_field, (r_node), (a_node)); \
+} while (0)
+
+/*
+ * The rb_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to rb_gen().
+ */
+
+#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
+a_attr void \
+a_prefix##new(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##first(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##last(a_rbt_type *rbtree); \
+a_attr a_type * \
+a_prefix##next(a_rbt_type *rbtree, a_type *node); \
+a_attr a_type * \
+a_prefix##prev(a_rbt_type *rbtree, a_type *node); \
+a_attr a_type * \
+a_prefix##search(a_rbt_type *rbtree, a_type *key); \
+a_attr a_type * \
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \
+a_attr a_type * \
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \
+a_attr void \
+a_prefix##insert(a_rbt_type *rbtree, a_type *node); \
+a_attr void \
+a_prefix##remove(a_rbt_type *rbtree, a_type *node);/* \
+a_attr a_type * \
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
+ a_rbt_type *, a_type *, void *), void *arg); \
+a_attr a_type * \
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);*/
+
+/*
+ * The rb_gen() macro generates a type-specific red-black tree implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ * a_attr : Function attribute for generated functions (ex: static).
+ * a_prefix : Prefix for generated functions (ex: ex_).
+ * a_rb_type : Type for red-black tree data structure (ex: ex_t).
+ * a_type : Type for red-black tree node data structure (ex: ex_node_t).
+ * a_field : Name of red-black tree node linkage (ex: ex_link).
+ * a_cmp : Node comparison function name, with the following prototype:
+ * int (a_cmp *)(a_type *a_node, a_type *a_other);
+ * ^^^^^^
+ * or a_key
+ * Interpretation of comparision function return values:
+ * -1 : a_node < a_other
+ * 0 : a_node == a_other
+ * 1 : a_node > a_other
+ * In all cases, the a_node or a_key macro argument is the first
+ * argument to the comparison function, which makes it possible
+ * to write comparison functions that treat the first argument
+ * specially.
+ *
+ * Assuming the following setup:
+ *
+ * typedef struct ex_node_s ex_node_t;
+ * struct ex_node_s {
+ * rb_node(ex_node_t) ex_link;
+ * };
+ * typedef rbt(ex_node_t) ex_t;
+ * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ * static void
+ * ex_new(ex_t *tree);
+ * Description: Initialize a red-black tree structure.
+ * Args:
+ * tree: Pointer to an uninitialized red-black tree object.
+ *
+ * static ex_node_t *
+ * ex_first(ex_t *tree);
+ * static ex_node_t *
+ * ex_last(ex_t *tree);
+ * Description: Get the first/last node in tree.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * Ret: First/last node in tree, or NULL if tree is empty.
+ *
+ * static ex_node_t *
+ * ex_next(ex_t *tree, ex_node_t *node);
+ * static ex_node_t *
+ * ex_prev(ex_t *tree, ex_node_t *node);
+ * Description: Get node's successor/predecessor.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * node: A node in tree.
+ * Ret: node's successor/predecessor in tree, or NULL if node is
+ * last/first.
+ *
+ * static ex_node_t *
+ * ex_search(ex_t *tree, ex_node_t *key);
+ * Description: Search for node that matches key.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * key : Search key.
+ * Ret: Node in tree that matches key, or NULL if no match.
+ *
+ * static ex_node_t *
+ * ex_nsearch(ex_t *tree, ex_node_t *key);
+ * static ex_node_t *
+ * ex_psearch(ex_t *tree, ex_node_t *key);
+ * Description: Search for node that matches key. If no match is found,
+ * return what would be key's successor/predecessor, were
+ * key in tree.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * key : Search key.
+ * Ret: Node in tree that matches key, or if no match, hypothetical node's
+ * successor/predecessor (NULL if no successor/predecessor).
+ *
+ * static void
+ * ex_insert(ex_t *tree, ex_node_t *node);
+ * Description: Insert node into tree.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * node: Node to be inserted into tree.
+ *
+ * static void
+ * ex_remove(ex_t *tree, ex_node_t *node);
+ * Description: Remove node from tree.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * node: Node in tree to be removed.
+ *
+ * static ex_node_t *
+ * ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ * ex_node_t *, void *), void *arg);
+ * static ex_node_t *
+ * ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ * ex_node_t *, void *), void *arg);
+ * Description: Iterate forward/backward over tree, starting at node. If
+ * tree is modified, iteration must be immediately
+ * terminated by the callback function that causes the
+ * modification.
+ * Args:
+ * tree : Pointer to an initialized red-black tree object.
+ * start: Node at which to start iteration, or NULL to start at
+ * first/last node.
+ * cb : Callback function, which is called for each node during
+ * iteration. Under normal circumstances the callback function
+ * should return NULL, which causes iteration to continue. If a
+ * callback function returns non-NULL, iteration is immediately
+ * terminated and the non-NULL return value is returned by the
+ * iterator. This is useful for re-starting iteration after
+ * modifying tree.
+ * arg : Opaque pointer passed to cb().
+ * Ret: NULL if iteration completed, or the non-NULL callback return value
+ * that caused termination of the iteration.
+ */
+#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \
+a_attr void \
+a_prefix##new(a_rbt_type *rbtree) { \
+ rb_new(a_type, a_field, rbtree); \
+} \
+a_attr a_type * \
+a_prefix##first(a_rbt_type *rbtree) { \
+ a_type *ret; \
+ rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##last(a_rbt_type *rbtree) { \
+ a_type *ret; \
+ rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##next(a_rbt_type *rbtree, a_type *node) { \
+ a_type *ret; \
+ if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \
+ a_field, node), ret); \
+ } else { \
+ a_type *tnode = rbtree->rbt_root; \
+ assert(tnode != &rbtree->rbt_nil); \
+ ret = &rbtree->rbt_nil; \
+ while (true) { \
+ int cmp = (a_cmp)(node, tnode); \
+ if (cmp < 0) { \
+ ret = tnode; \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ break; \
+ } \
+ assert(tnode != &rbtree->rbt_nil); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \
+ a_type *ret; \
+ if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \
+ rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \
+ a_field, node), ret); \
+ } else { \
+ a_type *tnode = rbtree->rbt_root; \
+ assert(tnode != &rbtree->rbt_nil); \
+ ret = &rbtree->rbt_nil; \
+ while (true) { \
+ int cmp = (a_cmp)(node, tnode); \
+ if (cmp < 0) { \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ ret = tnode; \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ break; \
+ } \
+ assert(tnode != &rbtree->rbt_nil); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##search(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ int cmp; \
+ ret = rbtree->rbt_root; \
+ while (ret != &rbtree->rbt_nil \
+ && (cmp = (a_cmp)(key, ret)) != 0) { \
+ if (cmp < 0) { \
+ ret = rbtn_left_get(a_type, a_field, ret); \
+ } else { \
+ ret = rbtn_right_get(a_type, a_field, ret); \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ a_type *tnode = rbtree->rbt_root; \
+ ret = &rbtree->rbt_nil; \
+ while (tnode != &rbtree->rbt_nil) { \
+ int cmp = (a_cmp)(key, tnode); \
+ if (cmp < 0) { \
+ ret = tnode; \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ ret = tnode; \
+ break; \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \
+ a_type *ret; \
+ a_type *tnode = rbtree->rbt_root; \
+ ret = &rbtree->rbt_nil; \
+ while (tnode != &rbtree->rbt_nil) { \
+ int cmp = (a_cmp)(key, tnode); \
+ if (cmp < 0) { \
+ tnode = rbtn_left_get(a_type, a_field, tnode); \
+ } else if (cmp > 0) { \
+ ret = tnode; \
+ tnode = rbtn_right_get(a_type, a_field, tnode); \
+ } else { \
+ ret = tnode; \
+ break; \
+ } \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = (NULL); \
+ } \
+ return (ret); \
+} \
+a_attr void \
+a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
+ struct { \
+ a_type *node; \
+ int cmp; \
+ } path[sizeof(void *) << 4], *pathp; \
+ rbt_node_new(a_type, a_field, rbtree, node); \
+ /* Wind. */ \
+ path->node = rbtree->rbt_root; \
+ for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ int cmp = pathp->cmp = a_cmp(node, pathp->node); \
+ assert(cmp != 0); \
+ if (cmp < 0) { \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } else { \
+ pathp[1].node = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ } \
+ } \
+ pathp->node = node; \
+ /* Unwind. */ \
+ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
+ a_type *cnode = pathp->node; \
+ if (pathp->cmp < 0) { \
+ a_type *left = pathp[1].node; \
+ rbtn_left_set(a_type, a_field, cnode, left); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* Fix up 4-node. */ \
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, cnode, tnode); \
+ cnode = tnode; \
+ } \
+ } else { \
+ return; \
+ } \
+ } else { \
+ a_type *right = pathp[1].node; \
+ rbtn_right_set(a_type, a_field, cnode, right); \
+ if (rbtn_red_get(a_type, a_field, right)) { \
+ a_type *left = rbtn_left_get(a_type, a_field, cnode); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ /* Split 4-node. */ \
+ rbtn_black_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, right); \
+ rbtn_red_set(a_type, a_field, cnode); \
+ } else { \
+ /* Lean left. */ \
+ a_type *tnode; \
+ bool tred = rbtn_red_get(a_type, a_field, cnode); \
+ rbtn_rotate_left(a_type, a_field, cnode, tnode); \
+ rbtn_color_set(a_type, a_field, tnode, tred); \
+ rbtn_red_set(a_type, a_field, cnode); \
+ cnode = tnode; \
+ } \
+ } else { \
+ return; \
+ } \
+ } \
+ pathp->node = cnode; \
+ } \
+ /* Set root, and make it black. */ \
+ rbtree->rbt_root = path->node; \
+ rbtn_black_set(a_type, a_field, rbtree->rbt_root); \
+} \
+a_attr void \
+a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
+ struct { \
+ a_type *node; \
+ int cmp; \
+ } *pathp, *nodep, path[sizeof(void *) << 4]; \
+ /* Wind. */ \
+ nodep = NULL; /* Silence compiler warning. */ \
+ path->node = rbtree->rbt_root; \
+ for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \
+ int cmp = pathp->cmp = a_cmp(node, pathp->node); \
+ if (cmp < 0) { \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } else { \
+ pathp[1].node = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ if (cmp == 0) { \
+ /* Find node's successor, in preparation for swap. */ \
+ pathp->cmp = 1; \
+ nodep = pathp; \
+ for (pathp++; pathp->node != &rbtree->rbt_nil; \
+ pathp++) { \
+ pathp->cmp = -1; \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } \
+ break; \
+ } \
+ } \
+ } \
+ assert(nodep->node == node); \
+ pathp--; \
+ if (pathp->node != node) { \
+ /* Swap node with its successor. */ \
+ bool tred = rbtn_red_get(a_type, a_field, pathp->node); \
+ rbtn_color_set(a_type, a_field, pathp->node, \
+ rbtn_red_get(a_type, a_field, node)); \
+ rbtn_left_set(a_type, a_field, pathp->node, \
+ rbtn_left_get(a_type, a_field, node)); \
+ /* If node's successor is its right child, the following code */\
+ /* will do the wrong thing for the right child pointer. */\
+ /* However, it doesn't matter, because the pointer will be */\
+ /* properly set when the successor is pruned. */\
+ rbtn_right_set(a_type, a_field, pathp->node, \
+ rbtn_right_get(a_type, a_field, node)); \
+ rbtn_color_set(a_type, a_field, node, tred); \
+ /* The pruned leaf node's child pointers are never accessed */\
+ /* again, so don't bother setting them to nil. */\
+ nodep->node = pathp->node; \
+ pathp->node = node; \
+ if (nodep == path) { \
+ rbtree->rbt_root = nodep->node; \
+ } else { \
+ if (nodep[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, nodep[-1].node, \
+ nodep->node); \
+ } else { \
+ rbtn_right_set(a_type, a_field, nodep[-1].node, \
+ nodep->node); \
+ } \
+ } \
+ } else { \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ if (left != &rbtree->rbt_nil) { \
+ /* node has no successor, but it has a left child. */\
+ /* Splice node out, without losing the left child. */\
+ assert(rbtn_red_get(a_type, a_field, node) == false); \
+ assert(rbtn_red_get(a_type, a_field, left)); \
+ rbtn_black_set(a_type, a_field, left); \
+ if (pathp == path) { \
+ rbtree->rbt_root = left; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ left); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ left); \
+ } \
+ } \
+ return; \
+ } else if (pathp == path) { \
+ /* The tree only contained one node. */ \
+ rbtree->rbt_root = &rbtree->rbt_nil; \
+ return; \
+ } \
+ } \
+ if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ /* Prune red node, which requires no fixup. */ \
+ assert(pathp[-1].cmp < 0); \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ &rbtree->rbt_nil); \
+ return; \
+ } \
+ /* The node to be pruned is black, so unwind until balance is */\
+ /* restored. */\
+ pathp->node = &rbtree->rbt_nil; \
+ for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
+ assert(pathp->cmp != 0); \
+ if (pathp->cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp->node, \
+ pathp[1].node); \
+ assert(rbtn_red_get(a_type, a_field, pathp[1].node) \
+ == false); \
+ if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ a_type *right = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ a_type *rightleft = rbtn_left_get(a_type, a_field, \
+ right); \
+ a_type *tnode; \
+ if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ /* In the following diagrams, ||, //, and \\ */\
+ /* indicate the path to the removed node. */\
+ /* */\
+ /* || */\
+ /* pathp(r) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ /* */\
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ rbtn_rotate_right(a_type, a_field, right, tnode); \
+ rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ } else { \
+ /* || */\
+ /* pathp(r) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ /* */\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ } \
+ /* Balance restored, but rotation modified subtree */\
+ /* root. */\
+ assert((uintptr_t)pathp > (uintptr_t)path); \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ return; \
+ } else { \
+ a_type *right = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ a_type *rightleft = rbtn_left_get(a_type, a_field, \
+ right); \
+ if (rbtn_red_get(a_type, a_field, rightleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, rightleft); \
+ rbtn_rotate_right(a_type, a_field, right, tnode); \
+ rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subree root, which may actually be the tree */\
+ /* root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* // \ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ a_type *tnode; \
+ rbtn_red_set(a_type, a_field, pathp->node); \
+ rbtn_rotate_left(a_type, a_field, pathp->node, \
+ tnode); \
+ pathp->node = tnode; \
+ } \
+ } \
+ } else { \
+ a_type *left; \
+ rbtn_right_set(a_type, a_field, pathp->node, \
+ pathp[1].node); \
+ left = rbtn_left_get(a_type, a_field, pathp->node); \
+ if (rbtn_red_get(a_type, a_field, left)) { \
+ a_type *tnode; \
+ a_type *leftright = rbtn_right_get(a_type, a_field, \
+ left); \
+ a_type *leftrightleft = rbtn_left_get(a_type, a_field, \
+ leftright); \
+ if (rbtn_red_get(a_type, a_field, leftrightleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (r) (b) */\
+ /* \ */\
+ /* (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *unode; \
+ rbtn_black_set(a_type, a_field, leftrightleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ unode); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ rbtn_right_set(a_type, a_field, unode, tnode); \
+ rbtn_rotate_left(a_type, a_field, unode, tnode); \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (r) (b) */\
+ /* \ */\
+ /* (b) */\
+ /* / */\
+ /* (b) */\
+ assert(leftright != &rbtree->rbt_nil); \
+ rbtn_red_set(a_type, a_field, leftright); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ rbtn_black_set(a_type, a_field, tnode); \
+ } \
+ /* Balance restored, but rotation modified subtree */\
+ /* root, which may actually be the tree root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ } \
+ return; \
+ } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* || */\
+ /* pathp(r) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ rbtn_red_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subtree root. */\
+ assert((uintptr_t)pathp > (uintptr_t)path); \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, pathp[-1].node, \
+ tnode); \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(r) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ rbtn_red_set(a_type, a_field, left); \
+ rbtn_black_set(a_type, a_field, pathp->node); \
+ /* Balance restored. */ \
+ return; \
+ } \
+ } else { \
+ a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+ if (rbtn_red_get(a_type, a_field, leftleft)) { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (r) */\
+ a_type *tnode; \
+ rbtn_black_set(a_type, a_field, leftleft); \
+ rbtn_rotate_right(a_type, a_field, pathp->node, \
+ tnode); \
+ /* Balance restored, but rotation modified */\
+ /* subtree root, which may actually be the tree */\
+ /* root. */\
+ if (pathp == path) { \
+ /* Set root. */ \
+ rbtree->rbt_root = tnode; \
+ } else { \
+ if (pathp[-1].cmp < 0) { \
+ rbtn_left_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } else { \
+ rbtn_right_set(a_type, a_field, \
+ pathp[-1].node, tnode); \
+ } \
+ } \
+ return; \
+ } else { \
+ /* || */\
+ /* pathp(b) */\
+ /* / \\ */\
+ /* (b) (b) */\
+ /* / */\
+ /* (b) */\
+ rbtn_red_set(a_type, a_field, left); \
+ } \
+ } \
+ } \
+ } \
+ /* Set root. */ \
+ rbtree->rbt_root = path->node; \
+ assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \
+}/* \
+a_attr a_type * \
+a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ if (node == &rbtree->rbt_nil) { \
+ return (&rbtree->rbt_nil); \
+ } else { \
+ a_type *ret; \
+ if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \
+ a_field, node), cb, arg)) != &rbtree->rbt_nil \
+ || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ int cmp = a_cmp(start, node); \
+ if (cmp < 0) { \
+ a_type *ret; \
+ if ((ret = a_prefix##iter_start(rbtree, start, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } else if (cmp > 0) { \
+ return (a_prefix##iter_start(rbtree, start, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)); \
+ } else { \
+ a_type *ret; \
+ if ((ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \
+ a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \
+ a_rbt_type *, a_type *, void *), void *arg) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \
+ cb, arg); \
+ } else { \
+ ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ if (node == &rbtree->rbt_nil) { \
+ return (&rbtree->rbt_nil); \
+ } else { \
+ a_type *ret; \
+ if ((ret = a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \
+ a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \
+ void *arg) { \
+ int cmp = a_cmp(start, node); \
+ if (cmp > 0) { \
+ a_type *ret; \
+ if ((ret = a_prefix##reverse_iter_start(rbtree, start, \
+ rbtn_right_get(a_type, a_field, node), cb, arg)) != \
+ &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } else if (cmp < 0) { \
+ return (a_prefix##reverse_iter_start(rbtree, start, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } else { \
+ a_type *ret; \
+ if ((ret = cb(rbtree, node, arg)) != NULL) { \
+ return (ret); \
+ } \
+ return (a_prefix##reverse_iter_recurse(rbtree, \
+ rbtn_left_get(a_type, a_field, node), cb, arg)); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##reverse_iter_start(rbtree, start, \
+ rbtree->rbt_root, cb, arg); \
+ } else { \
+ ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \
+ cb, arg); \
+ } \
+ if (ret == &rbtree->rbt_nil) { \
+ ret = NULL; \
+ } \
+ return (ret); \
+}*/
+
+#endif /* RB_H_ */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2config/db_config.h b/plugins/Dbx_kv/src/hamsterdb/src/2config/db_config.h
new file mode 100644
index 0000000000..77f63944ef
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2config/db_config.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The configuration settings of a Database.
+ *
+ * @exception_safe nothrow
+ * @thread_safe no
+ */
+
+#ifndef HAM_DB_CONFIG_H
+#define HAM_DB_CONFIG_H
+
+#include "0root/root.h"
+
+#include <ham/types.h>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct DatabaseConfiguration
+{
+ // Constructor initializes with default values
+ DatabaseConfiguration()
+ : db_name(0), flags(0), key_type(HAM_TYPE_BINARY),
+ key_size(HAM_KEY_SIZE_UNLIMITED), record_size(HAM_RECORD_SIZE_UNLIMITED),
+ key_compressor(0), record_compressor(0) {
+ }
+
+ // the database name
+ uint16_t db_name;
+
+ // the database flags
+ uint32_t flags;
+
+ // the key type
+ int key_type;
+
+ // the key size (if specified)
+ size_t key_size;
+
+ // the record size (if specified)
+ size_t record_size;
+
+ // the algorithm for key compression
+ int key_compressor;
+
+ // the algorithm for record compression
+ int record_compressor;
+
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_DB_CONFIG_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2config/env_config.h b/plugins/Dbx_kv/src/hamsterdb/src/2config/env_config.h
new file mode 100644
index 0000000000..9db5de4771
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2config/env_config.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The configuration settings of an Environment.
+ *
+ * @exception_safe nothrow
+ * @thread_safe no
+ */
+
+#ifndef HAM_ENV_CONFIG_H
+#define HAM_ENV_CONFIG_H
+
+#include "0root/root.h"
+
+#include <string>
+#include <limits>
+
+#include <ham/hamsterdb.h>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+#undef max
+
+namespace hamsterdb {
+
+struct EnvironmentConfiguration
+{
+ // Constructor initializes with default values
+ EnvironmentConfiguration()
+ : flags(0), file_mode(0644), max_databases(0),
+ page_size_bytes(HAM_DEFAULT_PAGE_SIZE),
+ cache_size_bytes(HAM_DEFAULT_CACHE_SIZE),
+ file_size_limit_bytes(std::numeric_limits<size_t>::max()),
+ remote_timeout_sec(0), journal_compressor(0),
+ is_encryption_enabled(false), journal_switch_threshold(0),
+ posix_advice(HAM_POSIX_FADVICE_NORMAL) {
+ }
+
+ // the environment's flags
+ uint32_t flags;
+
+ // the file mode
+ int file_mode;
+
+ // the number of databases
+ int max_databases;
+
+ // the page size (in bytes)
+ size_t page_size_bytes;
+
+ // the cache size (in bytes)
+ uint64_t cache_size_bytes;
+
+ // the file size limit (in bytes)
+ size_t file_size_limit_bytes;
+
+ // the remote timeout (in seconds)
+ size_t remote_timeout_sec;
+
+ // the path (or remote location)
+ std::string filename;
+
+ // the path of the logfile
+ std::string log_filename;
+
+ // the algorithm for journal compression
+ int journal_compressor;
+
+ // true if AES encryption is enabled
+ bool is_encryption_enabled;
+
+ // the AES encryption key
+ uint8_t encryption_key[16];
+
+ // threshold for switching journal files
+ size_t journal_switch_threshold;
+
+ // parameter for posix_fadvise()
+ int posix_advice;
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_ENV_CONFIG_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2device/device.h b/plugins/Dbx_kv/src/hamsterdb/src/2device/device.h
new file mode 100644
index 0000000000..7550fad06a
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2device/device.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Device management; a device encapsulates the physical device, either a
+ * file or memory chunks (for in-memory-databases)
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_DEVICE_H
+#define HAM_DEVICE_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "2config/env_config.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Page;
+
+class Device {
+ public:
+ // Constructor
+ Device(const EnvironmentConfiguration &config)
+ : m_config(config) {
+ }
+
+ // virtual destructor
+ virtual ~Device() {
+ }
+
+ // Returns the current page size
+ size_t page_size() const {
+ return (m_config.page_size_bytes);
+ }
+
+ // Create a new device - called in ham_env_create
+ virtual void create() = 0;
+
+ // Opens an existing device - called in ham_env_open
+ virtual void open() = 0;
+
+ // Returns true if the device is open
+ virtual bool is_open() = 0;
+
+ // Closes the device - called in ham_env_close
+ virtual void close() = 0;
+
+ // Flushes the device - called in ham_env_flush
+ virtual void flush() = 0;
+
+ // Truncate/resize the device
+ virtual void truncate(uint64_t new_size) = 0;
+
+ // Returns the current file/storage size
+ virtual uint64_t file_size() = 0;
+
+ // Seek position in a file
+ virtual void seek(uint64_t offset, int whence) = 0;
+
+ // Tell the position in a file
+ virtual uint64_t tell() = 0;
+
+ // Reads from the device; this function does not use mmap
+ virtual void read(uint64_t offset, void *buffer, size_t len) = 0;
+
+ // Writes to the device; this function does not use mmap
+ virtual void write(uint64_t offset, void *buffer, size_t len) = 0;
+
+ // Allocate storage from this device; this function
+ // will *NOT* use mmap. returns the offset of the allocated storage.
+ virtual uint64_t alloc(size_t len) = 0;
+
+ // Reads a page from the device; this function CAN use mmap
+ virtual void read_page(Page *page, uint64_t address) = 0;
+
+ // Writes a page to the device
+ virtual void write_page(Page *page) = 0;
+
+ // Allocate storage for a page from this device; this function
+ // can use mmap if available
+ virtual void alloc_page(Page *page) = 0;
+
+ // Frees a page on the device.
+ // The caller is responsible for flushing the page; the @ref free_page
+ // function will assert that the page is not dirty.
+ virtual void free_page(Page *page) = 0;
+
+ // Returns true if the specified range is in mapped memory
+ virtual bool is_mapped(uint64_t file_offset, size_t size) const = 0;
+
+ protected:
+ // the Environment configuration settings
+ const EnvironmentConfiguration &m_config;
+
+ friend class DeviceTest;
+ friend class InMemoryDeviceTest;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DEVICE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2device/device_disk.h b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_disk.h
new file mode 100644
index 0000000000..1bd62a904e
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_disk.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Device-implementation for disk-based files. Exception safety is "strong"
+ * for most operations, but currently it's possible that the Page is modified
+ * if DiskDevice::read_page fails in the middle.
+ *
+ * @exception_safe: basic/strong
+ * @thread_safe: no
+ */
+
+#ifndef HAM_DEVICE_DISK_H
+#define HAM_DEVICE_DISK_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/file.h"
+#include "1mem/mem.h"
+#include "2device/device.h"
+#include "2page/page.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/*
+ * a File-based device
+ */
+class DiskDevice : public Device {
+ struct State {
+ // the database file
+ File file;
+
+ // pointer to the the mmapped data
+ uint8_t *mmapptr;
+
+ // the size of mmapptr as used in mmap
+ uint64_t mapped_size;
+
+ // the (cached) size of the file
+ uint64_t file_size;
+ };
+
+ public:
+ DiskDevice(const EnvironmentConfiguration &config)
+ : Device(config) {
+ State state;
+ state.mmapptr = 0;
+ state.mapped_size = 0;
+ state.file_size = 0;
+ std::swap(m_state, state);
+ }
+
+ // Create a new device
+ virtual void create() {
+ File file;
+ file.create(m_config.filename.c_str(), m_config.file_mode);
+ file.set_posix_advice(m_config.posix_advice);
+ m_state.file = file;
+ }
+
+ // opens an existing device
+ //
+ // tries to map the file; if it fails then continue with read/write
+ virtual void open() {
+ bool read_only = (m_config.flags & HAM_READ_ONLY) != 0;
+
+ State state = m_state;
+ state.file.open(m_config.filename.c_str(), read_only);
+ state.file.set_posix_advice(m_config.posix_advice);
+
+ // the file size which backs the mapped ptr
+ state.file_size = state.file.get_file_size();
+
+ if (m_config.flags & HAM_DISABLE_MMAP) {
+ std::swap(m_state, state);
+ return;
+ }
+
+ // make sure we do not exceed the "real" size of the file, otherwise
+ // we crash when accessing memory which exceeds the mapping (at least
+ // on Win32)
+ size_t granularity = File::get_granularity();
+ if (state.file_size == 0 || state.file_size % granularity) {
+ std::swap(m_state, state);
+ return;
+ }
+
+ state.mapped_size = state.file_size;
+ state.file.mmap(0, state.mapped_size, read_only, &state.mmapptr);
+ std::swap(m_state, state);
+ }
+
+ // returns true if the device is open
+ virtual bool is_open() {
+ return (m_state.file.is_open());
+ }
+
+ // closes the device
+ virtual void close() {
+ State state = m_state;
+ if (state.mmapptr)
+ state.file.munmap(state.mmapptr, state.mapped_size);
+ state.file.close();
+
+ std::swap(m_state, state);
+ }
+
+ // flushes the device
+ virtual void flush() {
+ m_state.file.flush();
+ }
+
+ // truncate/resize the device
+ virtual void truncate(uint64_t new_file_size) {
+ if (new_file_size > m_config.file_size_limit_bytes)
+ throw Exception(HAM_LIMITS_REACHED);
+ m_state.file.truncate(new_file_size);
+ m_state.file_size = new_file_size;
+ }
+
+ // get the current file/storage size
+ virtual uint64_t file_size() {
+ ham_assert(m_state.file_size == m_state.file.get_file_size());
+ return (m_state.file_size);
+ }
+
+ // seek to a position in a file
+ virtual void seek(uint64_t offset, int whence) {
+ m_state.file.seek(offset, whence);
+ }
+
+ // tell the position in a file
+ virtual uint64_t tell() {
+ return (m_state.file.tell());
+ }
+
+ // reads from the device; this function does NOT use mmap
+ virtual void read(uint64_t offset, void *buffer, size_t len) {
+ m_state.file.pread(offset, buffer, len);
+ }
+
+ // writes to the device; this function does not use mmap,
+ // and is responsible for writing the data is run through the file
+ // filters
+ virtual void write(uint64_t offset, void *buffer, size_t len) {
+ m_state.file.pwrite(offset, buffer, len);
+ }
+
+ // allocate storage from this device; this function
+ // will *NOT* return mmapped memory
+ virtual uint64_t alloc(size_t len) {
+ uint64_t address = m_state.file_size;
+ truncate(address + len);
+ return ((uint64_t)address);
+ }
+
+ // reads a page from the device; this function CAN return a
+ // pointer to mmapped memory
+ virtual void read_page(Page *page, uint64_t address) {
+ // if this page is in the mapped area: return a pointer into that area.
+ // otherwise fall back to read/write.
+ if (address < m_state.mapped_size && m_state.mmapptr != 0) {
+ // ok, this page is mapped. If the Page object has a memory buffer
+ // then free it; afterwards return a pointer into the mapped memory
+ page->free_buffer();
+ // the following line will not throw a C++ exception, but can
+ // raise a signal. If that's the case then we don't catch it because
+ // something is seriously wrong and proper recovery is not possible.
+ page->assign_mapped_buffer(&m_state.mmapptr[address], address);
+ return;
+ }
+
+ // this page is not in the mapped area; allocate a buffer
+ if (page->get_data() == 0) {
+ // note that |p| will not leak if file.pread() throws; |p| is stored
+ // in the |page| object and will be cleaned up by the caller in
+ // case of an exception.
+ uint8_t *p = Memory::allocate<uint8_t>(m_config.page_size_bytes);
+ page->assign_allocated_buffer(p, address);
+ }
+
+ m_state.file.pread(address, page->get_data(), m_config.page_size_bytes);
+ }
+
+ // writes a page to the device
+ virtual void write_page(Page *page) {
+ write(page->get_address(), page->get_data(), m_config.page_size_bytes);
+ }
+
+ // Allocates storage for a page from this device; this function
+ // will *NOT* return mmapped memory
+ virtual void alloc_page(Page *page) {
+ uint64_t address = m_state.file_size;
+
+ truncate(address + m_config.page_size_bytes);
+ page->set_address(address);
+
+ // allocate a memory buffer
+ uint8_t *p = Memory::allocate<uint8_t>(m_config.page_size_bytes);
+ page->assign_allocated_buffer(p, address);
+ }
+
+ // Frees a page on the device; plays counterpoint to |alloc_page|
+ virtual void free_page(Page *page) {
+ ham_assert(page->get_data() != 0);
+ page->free_buffer();
+ }
+
+ // Returns true if the specified range is in mapped memory
+ virtual bool is_mapped(uint64_t file_offset, size_t size) const {
+ return (file_offset + size <= m_state.mapped_size);
+ }
+
+ private:
+ State m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DEVICE_DISK_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2device/device_factory.h b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_factory.h
new file mode 100644
index 0000000000..7cde29d5af
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_factory.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A factory for Device objects
+ *
+ * @exception_safe: strong
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_DEVICE_FACTORY_H
+#define HAM_DEVICE_FACTORY_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "2config/env_config.h"
+#include "2device/device_disk.h"
+#include "2device/device_inmem.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct DeviceFactory {
+ // creates a new Device instance depending on the flags
+ static Device *create(const EnvironmentConfiguration &config) {
+ if (config.flags & HAM_IN_MEMORY)
+ return (new InMemoryDevice(config));
+ else
+ return (new DiskDevice(config));
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DEVICE_FACTORY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2device/device_inmem.h b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_inmem.h
new file mode 100644
index 0000000000..3e2055148b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2device/device_inmem.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: strong
+ * @thread_safe: no
+ */
+
+#ifndef HAM_DEVICE_INMEM_H
+#define HAM_DEVICE_INMEM_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1mem/mem.h"
+#include "2device/device.h"
+#include "2page/page.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/*
+ * an In-Memory device
+ */
+class InMemoryDevice : public Device {
+ struct State {
+ // flag whether this device was "opened" or is uninitialized
+ bool is_open;
+
+ // the allocated bytes
+ uint64_t allocated_size;
+ };
+
+ public:
+ // constructor
+ InMemoryDevice(const EnvironmentConfiguration &config)
+ : Device(config) {
+ State state;
+ state.is_open = false;
+ state.allocated_size = 0;
+ std::swap(m_state, state);
+ }
+
+ // Create a new device
+ virtual void create() {
+ m_state.is_open = true;
+ }
+
+ // opens an existing device
+ virtual void open() {
+ ham_assert(!"can't open an in-memory-device");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // returns true if the device is open
+ virtual bool is_open() {
+ return (m_state.is_open);
+ }
+
+ // closes the device
+ virtual void close() {
+ ham_assert(m_state.is_open);
+ m_state.is_open = false;
+ }
+
+ // flushes the device
+ virtual void flush() {
+ }
+
+ // truncate/resize the device
+ virtual void truncate(uint64_t newsize) {
+ }
+
+ // get the current file/storage size
+ virtual uint64_t file_size() {
+ ham_assert(!"this operation is not possible for in-memory-databases");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // seek position in a file
+ virtual void seek(uint64_t offset, int whence) {
+ ham_assert(!"can't seek in an in-memory-device");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // tell the position in a file
+ virtual uint64_t tell() {
+ ham_assert(!"can't tell in an in-memory-device");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // reads from the device; this function does not use mmap
+ virtual void read(uint64_t offset, void *buffer, size_t len) {
+ ham_assert(!"operation is not possible for in-memory-databases");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // writes to the device
+ virtual void write(uint64_t offset, void *buffer, size_t len) {
+ ham_assert(!"operation is not possible for in-memory-databases");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // reads a page from the device
+ virtual void read_page(Page *page, uint64_t address) {
+ ham_assert(!"operation is not possible for in-memory-databases");
+ throw Exception(HAM_NOT_IMPLEMENTED);
+ }
+
+ // writes a page to the device
+ virtual void write_page(Page *page) {
+ }
+
+ // allocate storage from this device; this function
+ // will *NOT* use mmap.
+ virtual uint64_t alloc(size_t size) {
+ if (m_state.allocated_size + size > m_config.file_size_limit_bytes)
+ throw Exception(HAM_LIMITS_REACHED);
+
+ uint64_t retval = (uint64_t)Memory::allocate<uint8_t>(size);
+ m_state.allocated_size += size;
+ return (retval);
+ }
+
+ // allocate storage for a page from this device
+ virtual void alloc_page(Page *page) {
+ ham_assert(page->get_data() == 0);
+
+ size_t page_size = m_config.page_size_bytes;
+ if (m_state.allocated_size + page_size > m_config.file_size_limit_bytes)
+ throw Exception(HAM_LIMITS_REACHED);
+
+ uint8_t *p = Memory::allocate<uint8_t>(page_size);
+ page->assign_allocated_buffer(p, (uint64_t)PTR_TO_U64(p));
+
+ m_state.allocated_size += page_size;
+ }
+
+ // frees a page on the device; plays counterpoint to @ref alloc_page
+ virtual void free_page(Page *page) {
+ page->free_buffer();
+
+ ham_assert(m_state.allocated_size >= m_config.page_size_bytes);
+ m_state.allocated_size -= m_config.page_size_bytes;
+ }
+
+ // Returns true if the specified range is in mapped memory
+ virtual bool is_mapped(uint64_t file_offset, size_t size) const {
+ return (false);
+ }
+
+ // releases a chunk of memory previously allocated with alloc()
+ void release(void *ptr, size_t size) {
+ Memory::release(ptr);
+ ham_assert(m_state.allocated_size >= size);
+ m_state.allocated_size -= size;
+ }
+
+ private:
+ State m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DEVICE_INMEM_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager.h b/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager.h
new file mode 100644
index 0000000000..3a6be50d44
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Manager for the log sequence number (lsn)
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_LSN_MANAGER_H
+#define HAM_LSN_MANAGER_H
+
+#include "0root/root.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class LsnManager
+{
+ public:
+ // Constructor
+ LsnManager()
+ : m_state(1) {
+ }
+
+ // Returns the next lsn
+ uint64_t next() {
+ return (m_state++);
+ }
+
+ private:
+ friend struct LsnManagerTest;
+
+ // the actual lsn
+ uint64_t m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_LSN_MANAGER_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager_test.h b/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager_test.h
new file mode 100644
index 0000000000..59197a66cd
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2lsn_manager/lsn_manager_test.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Test gateway for LsnManager
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_LSN_MANAGER_TEST_H
+#define HAM_LSN_MANAGER_TEST_H
+
+#include "0root/root.h"
+
+#include "2lsn_manager/lsn_manager.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct LsnManagerTest
+{
+ // Constructor
+ LsnManagerTest(LsnManager *lsn_manager)
+ : m_state(lsn_manager->m_state) {
+ }
+
+ // Returns the current lsn
+ uint64_t lsn() const {
+ return (m_state);
+ }
+
+ uint64_t &m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_LSN_MANAGER_TEST_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2page/page.cc b/plugins/Dbx_kv/src/hamsterdb/src/2page/page.cc
new file mode 100644
index 0000000000..64558e9370
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2page/page.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+#include "1base/error.h"
+#include "1os/os.h"
+#include "2page/page.h"
+#include "2device/device.h"
+#include "3btree/btree_node_proxy.h"
+
+namespace hamsterdb {
+
+uint64_t Page::ms_page_count_flushed = 0;
+
+Page::Page(Device *device, LocalDatabase *db)
+ : m_device(device), m_db(db), m_address(0), m_is_allocated(false),
+ m_is_without_header(false), m_is_dirty(false), m_cursor_list(0),
+ m_node_proxy(0), m_data(0)
+{
+ memset(&m_prev[0], 0, sizeof(m_prev));
+ memset(&m_next[0], 0, sizeof(m_next));
+}
+
+Page::~Page()
+{
+ ham_assert(m_cursor_list == 0);
+
+#ifdef HAM_ENABLE_HELGRIND
+ // safely unlock the mutex
+ m_mutex.try_lock();
+#endif
+ m_mutex.unlock();
+
+ if (m_node_proxy) {
+ delete m_node_proxy;
+ m_node_proxy = 0;
+ }
+
+ if (m_data != 0)
+ m_device->free_page(this);
+}
+
+void
+Page::alloc(uint32_t type, uint32_t flags)
+{
+ m_device->alloc_page(this);
+
+ if (flags & kInitializeWithZeroes) {
+ size_t page_size = m_device->page_size();
+ memset(get_raw_payload(), 0, page_size);
+ }
+
+ if (type)
+ set_type(type);
+}
+
+void
+Page::fetch(uint64_t address)
+{
+ m_device->read_page(this, address);
+ set_address(address);
+}
+
+void
+Page::flush()
+{
+ if (is_dirty()) {
+ m_device->write_page(this);
+ set_dirty(false);
+ ms_page_count_flushed++;
+ }
+}
+
+void
+Page::free_buffer()
+{
+ if (m_node_proxy) {
+ delete m_node_proxy;
+ m_node_proxy = 0;
+ }
+
+ if (m_is_allocated)
+ Memory::release(m_data);
+ m_data = 0;
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2page/page.h b/plugins/Dbx_kv/src/hamsterdb/src/2page/page.h
new file mode 100644
index 0000000000..f68edc474b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2page/page.h
@@ -0,0 +1,435 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: strong
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PAGE_H
+#define HAM_PAGE_H
+
+#include <string.h>
+#include <boost/atomic.hpp>
+
+#include "1base/error.h"
+#include "1base/spinlock.h"
+#include "1mem/mem.h"
+
+namespace hamsterdb {
+
+class Device;
+class BtreeCursor;
+class BtreeNodeProxy;
+class LocalDatabase;
+
+#include "1base/packstart.h"
+
+/*
+ * This header is only available if the (non-persistent) flag
+ * kNpersNoHeader is not set! Blob pages do not have this header.
+ */
+typedef HAM_PACK_0 struct HAM_PACK_1 PPageHeader {
+ // flags of this page - currently only used for the Page::kType* codes
+ uint32_t flags;
+
+ // reserved
+ uint32_t reserved;
+
+ // the lsn of the last operation
+ uint64_t lsn;
+
+ // the persistent data blob
+ uint8_t payload[1];
+
+} HAM_PACK_2 PPageHeader;
+
+#include "1base/packstop.h"
+
+#include "1base/packstart.h"
+
+/*
+ * A union combining the page header and a pointer to the raw page data.
+ *
+ * This structure definition is present outside of @ref Page scope
+ * to allow compile-time OFFSETOF macros to correctly judge the size,
+ * depending on platform and compiler settings.
+ */
+typedef HAM_PACK_0 union HAM_PACK_1 PPageData {
+ // the persistent header
+ struct PPageHeader header;
+
+ // a char pointer to the allocated storage on disk
+ uint8_t payload[1];
+
+} HAM_PACK_2 PPageData;
+
+#include "1base/packstop.h"
+
+/*
+ * The Page class
+ *
+ * Each Page instance is a node in several linked lists.
+ * In order to avoid multiple memory allocations, the previous/next pointers
+ * are part of the Page class (m_prev and m_next). Both fields are arrays
+ * of pointers and can be used i.e. with m_prev[Page::kListBucket] etc.
+ * (or with the methods defined below).
+ */
+class Page {
+ public:
+ // Misc. enums
+ enum {
+ // sizeof the persistent page header
+ kSizeofPersistentHeader = sizeof(PPageHeader) - 1,
+
+ // instruct Page::alloc() to reset the page with zeroes
+ kInitializeWithZeroes,
+ };
+
+ // The various linked lists (indices in m_prev, m_next)
+ enum {
+ // list of all cached pages
+ kListCache = 0,
+
+ // list of all pages in a changeset
+ kListChangeset = 1,
+
+ // a bucket in the hash table of the cache
+ kListBucket = 2,
+
+ // array limit
+ kListMax = 3
+ };
+
+ // non-persistent page flags
+ enum {
+ // page->m_data was allocated with malloc, not mmap
+ kNpersMalloc = 1,
+
+ // page has no header (i.e. it's part of a large blob)
+ kNpersNoHeader = 2
+ };
+
+ // Page types
+ //
+ // When large BLOBs span multiple pages, only their initial page
+ // will have a valid type code; subsequent pages of this blog will store
+ // the data as-is, so as to provide one continuous storage space
+ enum {
+ // unidentified db page type
+ kTypeUnknown = 0x00000000,
+
+ // the header page: this is the first page in the environment (offset 0)
+ kTypeHeader = 0x10000000,
+
+ // a B+tree root page
+ kTypeBroot = 0x20000000,
+
+ // a B+tree node page
+ kTypeBindex = 0x30000000,
+
+ // a page storing the state of the PageManager
+ kTypePageManager = 0x40000000,
+
+ // a page which stores blobs
+ kTypeBlob = 0x50000000
+ };
+
+ // Default constructor
+ Page(Device *device, LocalDatabase *db = 0);
+
+ // Destructor - releases allocated memory and resources, but neither
+ // flushes dirty pages to disk nor moves them to the freelist!
+ // Asserts that no cursors are attached.
+ ~Page();
+
+ // Returns the size of the usable persistent payload of a page
+ // (page_size minus the overhead of the page header)
+ static uint32_t usable_page_size(uint32_t raw_page_size) {
+ return (raw_page_size - Page::kSizeofPersistentHeader);
+ }
+
+
+ // Returns the database which manages this page; can be NULL if this
+ // page belongs to the Environment (i.e. for freelist-pages)
+ LocalDatabase *get_db() {
+ return (m_db);
+ }
+
+ // Sets the database to which this Page belongs
+ void set_db(LocalDatabase *db) {
+ m_db = db;
+ }
+
+ // Returns the spinlock
+ Spinlock &mutex() {
+ return (m_mutex);
+ }
+
+ // Returns the device
+ Device *device() {
+ return (m_device);
+ }
+
+ // Returns true if this is the header page of the Environment
+ bool is_header() const {
+ return (m_address == 0);
+ }
+
+ // Returns the address of this page
+ uint64_t get_address() const {
+ return (m_address);
+ }
+
+ // Sets the address of this page
+ void set_address(uint64_t address) {
+ m_address = address;
+ }
+
+ // Returns true if this page is dirty (and needs to be flushed to disk)
+ bool is_dirty() const {
+ return (m_is_dirty);
+ }
+
+ // Sets this page dirty/not dirty
+ void set_dirty(bool dirty) {
+ m_is_dirty = dirty;
+ }
+
+ // Returns true if the page's buffer was allocated with malloc
+ bool is_allocated() const {
+ return (m_is_allocated);
+ }
+
+ // Returns true if the page has no persistent header
+ bool is_without_header() const {
+ return (m_is_without_header);
+ }
+
+ // Sets a flag whether the page has no persistent header
+ void set_without_header(bool without_header) {
+ m_is_without_header = without_header;
+ }
+
+ // Assign a buffer which was allocated with malloc()
+ void assign_allocated_buffer(void *buffer, uint64_t address) {
+ m_data = (PPageData *)buffer;
+ m_is_allocated = true;
+ m_address = address;
+ }
+
+ // Assign a buffer from mmapped storage
+ void assign_mapped_buffer(void *buffer, uint64_t address) {
+ m_data = (PPageData *)buffer;
+ m_is_allocated = false;
+ m_address = address;
+ }
+
+ // Free resources associated with the buffer
+ void free_buffer();
+
+ // Returns the linked list of coupled cursors (can be NULL)
+ BtreeCursor *cursor_list() {
+ return (m_cursor_list);
+ }
+
+ // Sets the (head of the) linked list of cursors
+ void set_cursor_list(BtreeCursor *cursor) {
+ m_cursor_list = cursor;
+ }
+
+ // Returns the page's type (kType*)
+ uint32_t get_type() const {
+ return (m_data->header.flags);
+ }
+
+ // Sets the page's type (kType*)
+ void set_type(uint32_t type) {
+ m_data->header.flags = type;
+ }
+
+ // Returns the lsn of the last modification
+ uint64_t get_lsn() const {
+ return (m_data->header.lsn);
+ }
+
+ // Sets the lsn of the last modification
+ void set_lsn(uint64_t lsn) {
+ m_data->header.lsn = lsn;
+ }
+
+ // Sets the pointer to the persistent data
+ void set_data(PPageData *data) {
+ m_data = data;
+ }
+
+ // Returns the pointer to the persistent data
+ PPageData *get_data() {
+ return (m_data);
+ }
+
+ // Returns the persistent payload (after the header!)
+ uint8_t *get_payload() {
+ return (m_data->header.payload);
+ }
+
+ // Returns the persistent payload (after the header!)
+ const uint8_t *get_payload() const {
+ return (m_data->header.payload);
+ }
+
+ // Returns the persistent payload (including the header!)
+ uint8_t *get_raw_payload() {
+ return (m_data->payload);
+ }
+
+ // Returns the persistent payload (including the header!)
+ const uint8_t *get_raw_payload() const {
+ return (m_data->payload);
+ }
+
+ // Allocates a new page from the device
+ // |flags|: either 0 or kInitializeWithZeroes
+ void alloc(uint32_t type, uint32_t flags = 0);
+
+ // Reads a page from the device
+ void fetch(uint64_t address);
+
+ // Writes the page to the device
+ void flush();
+
+ // Returns true if this page is in a linked list
+ bool is_in_list(Page *list_head, int list) {
+ if (get_next(list) != 0)
+ return (true);
+ if (get_previous(list) != 0)
+ return (true);
+ return (list_head == this);
+ }
+
+ // Inserts this page at the beginning of a list and returns the
+ // new head of the list
+ Page *list_insert(Page *list_head, int list) {
+ set_next(list, 0);
+ set_previous(list, 0);
+
+ if (!list_head)
+ return (this);
+
+ set_next(list, list_head);
+ list_head->set_previous(list, this);
+ return (this);
+ }
+
+ // Removes this page from a list and returns the new head of the list
+ Page *list_remove(Page *list_head, int list) {
+ Page *n, *p;
+
+ if (this == list_head) {
+ n = get_next(list);
+ if (n)
+ n->set_previous(list, 0);
+ set_next(list, 0);
+ set_previous(list, 0);
+ return (n);
+ }
+
+ n = get_next(list);
+ p = get_previous(list);
+ if (p)
+ p->set_next(list, n);
+ if (n)
+ n->set_previous(list, p);
+ set_next(list, 0);
+ set_previous(list, 0);
+ return (list_head);
+ }
+
+ // Returns the next page in a linked list
+ Page *get_next(int list) {
+ return (m_next[list]);
+ }
+
+ // Returns the previous page of a linked list
+ Page *get_previous(int list) {
+ return (m_prev[list]);
+ }
+
+ // Returns the cached BtreeNodeProxy
+ BtreeNodeProxy *get_node_proxy() {
+ return (m_node_proxy);
+ }
+
+ // Sets the cached BtreeNodeProxy
+ void set_node_proxy(BtreeNodeProxy *proxy) {
+ m_node_proxy = proxy;
+ }
+
+ // tracks number of flushed pages
+ static uint64_t ms_page_count_flushed;
+
+ private:
+ friend class PageCollection;
+
+ // Sets the previous page of a linked list
+ void set_previous(int list, Page *other) {
+ m_prev[list] = other;
+ }
+
+ // Sets the next page in a linked list
+ void set_next(int list, Page *other) {
+ m_next[list] = other;
+ }
+
+ // the Device for allocating storage
+ Device *m_device;
+
+ // the Database handle (can be NULL)
+ LocalDatabase *m_db;
+
+ // The spinlock is locked if the page is in use or written to disk
+ Spinlock m_mutex;
+
+ // address of this page
+ uint64_t m_address;
+
+ // Page buffer was allocated with malloc() (if not then it was mapped
+ // with mmap)
+ bool m_is_allocated;
+
+ // Page does not have a persistent header
+ bool m_is_without_header;
+
+ // is this page dirty and needs to be flushed to disk?
+ bool m_is_dirty;
+
+ // linked list of all cursors which point to that page
+ BtreeCursor *m_cursor_list;
+
+ // linked lists of pages - see comments above
+ Page *m_prev[Page::kListMax];
+ Page *m_next[Page::kListMax];
+
+ // the cached BtreeNodeProxy object
+ BtreeNodeProxy *m_node_proxy;
+
+ // the persistent data of this page
+ PPageData *m_data;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_PAGE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2page/page_collection.h b/plugins/Dbx_kv/src/hamsterdb/src/2page/page_collection.h
new file mode 100644
index 0000000000..b396c78165
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2page/page_collection.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: strong
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PAGE_COLLECTION_H
+#define HAM_PAGE_COLLECTION_H
+
+#include <string.h>
+
+#include <boost/atomic.hpp>
+
+#include "1mem/mem.h"
+#include "2page/page.h"
+
+namespace hamsterdb {
+
+/*
+ * The PageCollection class
+ */
+class PageCollection {
+ public:
+ // Default constructor
+ PageCollection(int list_id)
+ : m_head(0), m_tail(0), m_size(0), m_id(list_id) {
+ }
+
+ // Destructor
+ ~PageCollection() {
+ clear();
+ }
+
+ bool is_empty() const {
+ return (m_size == 0);
+ }
+
+ int size() const {
+ return (m_size);
+ }
+
+ // Atomically applies the |visitor()| to each page
+ template<typename Visitor>
+ void for_each(Visitor &visitor) {
+ for (Page *p = m_head; p != 0; p = p->get_next(m_id)) {
+ if (!visitor(p))
+ break;
+ }
+ }
+
+ // Atomically applies the |visitor()| to each page; starts at the tail
+ template<typename Visitor>
+ void for_each_reverse(Visitor &visitor) {
+ for (Page *p = m_tail; p != 0; p = p->get_previous(m_id)) {
+ if (!visitor(p))
+ break;
+ }
+ }
+
+ // Same as |for_each()|, but removes the page if |visitor()| returns true
+ template<typename Visitor>
+ void extract(Visitor &visitor) {
+ Page *page = m_head;
+ while (page) {
+ Page *next = page->get_next(m_id);
+ if (visitor(page)) {
+ del_impl(page);
+ }
+ page = next;
+ }
+ }
+
+ // Clears the collection.
+ void clear() {
+ Page *page = m_head;
+ while (page) {
+ Page *next = page->get_next(m_id);
+ del_impl(page);
+ page = next;
+ }
+
+ ham_assert(m_head == 0);
+ ham_assert(m_tail == 0);
+ ham_assert(m_size == 0);
+ }
+
+ // Returns the head
+ Page *head() const {
+ return (m_head);
+ }
+
+ // Returns the tail
+ Page *tail() const {
+ return (m_tail);
+ }
+
+ // Returns a page from the collection
+ Page *get(uint64_t address) const {
+ for (Page *p = m_head; p != 0; p = p->get_next(m_id)) {
+ if (p->get_address() == address)
+ return (p);
+ }
+ return (0);
+ }
+
+ // Removes a page from the collection. Returns true if the page was removed,
+ // otherwise false (if the page was not in the list)
+ bool del(Page *page) {
+ if (has(page)) {
+ del_impl(page);
+ return (true);
+ }
+ return (false);
+ }
+
+ // Adds a new page at the head of the list. Returns true if the page was
+ // added, otherwise false (that's the case if the page is already part of
+ // the list)
+ bool put(Page *page) {
+ if (!has(page)) {
+ m_head = page->list_insert(m_head, m_id);
+ if (!m_tail)
+ m_tail = page;
+ ++m_size;
+ return (true);
+ }
+ return (false);
+ }
+
+ // Returns true if a page with the |address| is already stored.
+ bool has(uint64_t address) const {
+ return (get(address) != 0);
+ }
+
+ // Returns true if the |page| is already stored. This is much faster
+ // than has(uint64_t address).
+ bool has(Page *page) const {
+ return (page->is_in_list(m_head, m_id));
+ }
+
+ private:
+ void del_impl(Page *page) {
+ // First update the tail because Page::list_remove() will change the
+ // pointers!
+ if (m_tail == page)
+ m_tail = page->get_previous(m_id);
+ m_head = page->list_remove(m_head, m_id);
+ ham_assert(m_size > 0);
+ --m_size;
+ }
+
+ // The head of the linked list
+ Page *m_head;
+
+ // The tail of the linked list
+ Page *m_tail;
+
+ // Number of elements in the list
+ int m_size;
+
+ // The list ID
+ int m_id;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_PAGE_COLLECTION_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.am b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.am
new file mode 100644
index 0000000000..b5c5c881f4
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.am
@@ -0,0 +1,15 @@
+
+AM_CPPFLAGS = -DHAM_ENABLE_REMOTE -I$(top_builddir)/include
+
+# INCLUDES =
+
+noinst_LTLIBRARIES = libprotocol.la
+
+nodist_libprotocol_la_SOURCES = messages.pb.cc
+libprotocol_la_SOURCES = protocol.h
+libprotocol_la_LIBADD = -lprotobuf
+
+EXTRA_DIST = messages.proto
+messages.pb.cc proto: $(srcdir)/messages.proto
+ protoc $(srcdir)/messages.proto --cpp_out=.
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.in b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.in
new file mode 100644
index 0000000000..e198a11d7d
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/Makefile.in
@@ -0,0 +1,627 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/2protobuf
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
+ $(top_srcdir)/depcomp
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/ax_cxx_gcc_abi_demangle.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+libprotocol_la_DEPENDENCIES =
+am_libprotocol_la_OBJECTS =
+nodist_libprotocol_la_OBJECTS = messages.pb.lo
+libprotocol_la_OBJECTS = $(am_libprotocol_la_OBJECTS) \
+ $(nodist_libprotocol_la_OBJECTS)
+AM_V_lt = $(am__v_lt_@AM_V@)
+am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp = $(SHELL) $(top_srcdir)/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+CXXCOMPILE = $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS)
+LTCXXCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CXXFLAGS) $(CXXFLAGS)
+AM_V_CXX = $(am__v_CXX_@AM_V@)
+am__v_CXX_ = $(am__v_CXX_@AM_DEFAULT_V@)
+am__v_CXX_0 = @echo " CXX " $@;
+am__v_CXX_1 =
+CXXLD = $(CXX)
+CXXLINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
+ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CXXLD = $(am__v_CXXLD_@AM_V@)
+am__v_CXXLD_ = $(am__v_CXXLD_@AM_DEFAULT_V@)
+am__v_CXXLD_0 = @echo " CXXLD " $@;
+am__v_CXXLD_1 =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_@AM_V@)
+am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_@AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(libprotocol_la_SOURCES) $(nodist_libprotocol_la_SOURCES)
+DIST_SOURCES = $(libprotocol_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CHRONO_LDFLAGS = @BOOST_CHRONO_LDFLAGS@
+BOOST_CHRONO_LDPATH = @BOOST_CHRONO_LDPATH@
+BOOST_CHRONO_LIBS = @BOOST_CHRONO_LIBS@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+BOOST_THREAD_LDFLAGS = @BOOST_THREAD_LDFLAGS@
+BOOST_THREAD_LDPATH = @BOOST_THREAD_LDPATH@
+BOOST_THREAD_LIBS = @BOOST_THREAD_LIBS@
+BOOST_THREAD_WIN32_LDFLAGS = @BOOST_THREAD_WIN32_LDFLAGS@
+BOOST_THREAD_WIN32_LDPATH = @BOOST_THREAD_WIN32_LDPATH@
+BOOST_THREAD_WIN32_LIBS = @BOOST_THREAD_WIN32_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JDK_INCLUDE = @JDK_INCLUDE@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+AM_CPPFLAGS = -DHAM_ENABLE_REMOTE -I$(top_builddir)/include
+
+# INCLUDES =
+noinst_LTLIBRARIES = libprotocol.la
+nodist_libprotocol_la_SOURCES = messages.pb.cc
+libprotocol_la_SOURCES = protocol.h
+libprotocol_la_LIBADD = -lprotobuf
+EXTRA_DIST = messages.proto
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .cc .lo .o .obj
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/2protobuf/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/2protobuf/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; \
+ locs=`for p in $$list; do echo $$p; done | \
+ sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \
+ sort -u`; \
+ test -z "$$locs" || { \
+ echo rm -f $${locs}; \
+ rm -f $${locs}; \
+ }
+
+libprotocol.la: $(libprotocol_la_OBJECTS) $(libprotocol_la_DEPENDENCIES) $(EXTRA_libprotocol_la_DEPENDENCIES)
+ $(AM_V_CXXLD)$(CXXLINK) $(libprotocol_la_OBJECTS) $(libprotocol_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/messages.pb.Plo@am__quote@
+
+.cc.o:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.o$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ $<
+
+.cc.obj:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.obj$$||'`;\
+@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ `$(CYGPATH_W) '$<'` &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Po
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(CXXCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.cc.lo:
+@am__fastdepCXX_TRUE@ $(AM_V_CXX)depbase=`echo $@ | sed 's|[^/]*$$|$(DEPDIR)/&|;s|\.lo$$||'`;\
+@am__fastdepCXX_TRUE@ $(LTCXXCOMPILE) -MT $@ -MD -MP -MF $$depbase.Tpo -c -o $@ $< &&\
+@am__fastdepCXX_TRUE@ $(am__mv) $$depbase.Tpo $$depbase.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ $(AM_V_CXX)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(AM_V_CXX@am__nodep@)$(LTCXXCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES cscopelist-am ctags \
+ ctags-am distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am
+
+messages.pb.cc proto: $(srcdir)/messages.proto
+ protoc $(srcdir)/messages.proto --cpp_out=.
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/messages.proto b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/messages.proto
new file mode 100644
index 0000000000..f8ec8fdcb6
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/messages.proto
@@ -0,0 +1,457 @@
+
+
+package hamsterdb;
+
+option optimize_for = LITE_RUNTIME;
+
+message ProtoWrapper {
+ enum Type {
+ CONNECT_REQUEST = 10;
+ CONNECT_REPLY = 11;
+ DISCONNECT_REQUEST = 12;
+ DISCONNECT_REPLY = 13;
+ ENV_RENAME_REQUEST = 20;
+ ENV_RENAME_REPLY = 21;
+ ENV_GET_PARAMETERS_REQUEST = 30;
+ ENV_GET_PARAMETERS_REPLY = 31;
+ ENV_GET_DATABASE_NAMES_REQUEST = 40;
+ ENV_GET_DATABASE_NAMES_REPLY = 41;
+ ENV_FLUSH_REQUEST = 50;
+ ENV_FLUSH_REPLY = 51;
+ ENV_CREATE_DB_REQUEST = 60;
+ ENV_CREATE_DB_REPLY = 61;
+ ENV_OPEN_DB_REQUEST = 70;
+ ENV_OPEN_DB_REPLY = 71;
+ ENV_ERASE_DB_REQUEST = 80;
+ ENV_ERASE_DB_REPLY = 81;
+ DB_CLOSE_REQUEST = 90;
+ DB_CLOSE_REPLY = 91;
+ DB_GET_PARAMETERS_REQUEST = 100;
+ DB_GET_PARAMETERS_REPLY = 101;
+ // DB_FLUSH_REQUEST = 110;
+ // DB_FLUSH_REPLY = 111;
+ TXN_BEGIN_REQUEST = 120;
+ TXN_BEGIN_REPLY = 121;
+ TXN_COMMIT_REQUEST = 130;
+ TXN_COMMIT_REPLY = 131;
+ TXN_ABORT_REQUEST = 140;
+ TXN_ABORT_REPLY = 141;
+ DB_CHECK_INTEGRITY_REQUEST = 150;
+ DB_CHECK_INTEGRITY_REPLY = 151;
+ DB_GET_KEY_COUNT_REQUEST = 160;
+ DB_GET_KEY_COUNT_REPLY = 161;
+ DB_INSERT_REQUEST = 170;
+ DB_INSERT_REPLY = 171;
+ DB_ERASE_REQUEST = 180;
+ DB_ERASE_REPLY = 181;
+ DB_FIND_REQUEST = 190;
+ DB_FIND_REPLY = 191;
+ CURSOR_CREATE_REQUEST = 200;
+ CURSOR_CREATE_REPLY = 201;
+ CURSOR_CLONE_REQUEST = 210;
+ CURSOR_CLONE_REPLY = 211;
+ CURSOR_CLOSE_REQUEST = 220;
+ CURSOR_CLOSE_REPLY = 221;
+ CURSOR_INSERT_REQUEST = 230;
+ CURSOR_INSERT_REPLY = 231;
+ CURSOR_ERASE_REQUEST = 240;
+ CURSOR_ERASE_REPLY = 241;
+ CURSOR_GET_RECORD_COUNT_REQUEST = 260;
+ CURSOR_GET_RECORD_COUNT_REPLY = 261;
+ CURSOR_GET_DUPLICATE_POSITION_REQUEST = 262;
+ CURSOR_GET_DUPLICATE_POSITION_REPLY = 263;
+ CURSOR_GET_RECORD_SIZE_REQUEST = 264;
+ CURSOR_GET_RECORD_SIZE_REPLY = 265;
+ CURSOR_OVERWRITE_REQUEST = 270;
+ CURSOR_OVERWRITE_REPLY = 271;
+ CURSOR_MOVE_REQUEST = 280;
+ CURSOR_MOVE_REPLY = 281;
+ }
+
+ required Type type = 1;
+
+ optional ConnectRequest connect_request = 10;
+ optional ConnectReply connect_reply = 11;
+ optional DisconnectRequest disconnect_request = 12;
+ optional DisconnectReply disconnect_reply = 13;
+ optional EnvRenameRequest env_rename_request = 20;
+ optional EnvRenameReply env_rename_reply = 21;
+ optional EnvGetParametersRequest env_get_parameters_request = 30;
+ optional EnvGetParametersReply env_get_parameters_reply = 31;
+ optional EnvGetDatabaseNamesRequest env_get_database_names_request = 40;
+ optional EnvGetDatabaseNamesReply env_get_database_names_reply = 41;
+ optional EnvFlushRequest env_flush_request = 50;
+ optional EnvFlushReply env_flush_reply = 51;
+ optional EnvCreateDbRequest env_create_db_request = 60;
+ optional EnvCreateDbReply env_create_db_reply = 61;
+ optional EnvOpenDbRequest env_open_db_request = 70;
+ optional EnvOpenDbReply env_open_db_reply = 71;
+ optional EnvEraseDbRequest env_erase_db_request = 80;
+ optional EnvEraseDbReply env_erase_db_reply = 81;
+ optional DbCloseRequest db_close_request = 90;
+ optional DbCloseReply db_close_reply = 91;
+ optional DbGetParametersRequest db_get_parameters_request = 100;
+ optional DbGetParametersReply db_get_parameters_reply = 101;
+ optional TxnBeginRequest txn_begin_request = 120;
+ optional TxnBeginReply txn_begin_reply = 121;
+ optional TxnCommitRequest txn_commit_request = 130;
+ optional TxnCommitReply txn_commit_reply = 131;
+ optional TxnAbortRequest txn_abort_request = 140;
+ optional TxnAbortReply txn_abort_reply = 141;
+ optional DbCheckIntegrityRequest db_check_integrity_request = 150;
+ optional DbCheckIntegrityReply db_check_integrity_reply = 151;
+ optional DbCountRequest db_count_request = 160;
+ optional DbCountReply db_count_reply = 161;
+ optional DbInsertRequest db_insert_request = 170;
+ optional DbInsertReply db_insert_reply = 171;
+ optional DbEraseRequest db_erase_request = 180;
+ optional DbEraseReply db_erase_reply = 181;
+ optional DbFindRequest db_find_request = 190;
+ optional DbFindReply db_find_reply = 191;
+ optional CursorCreateRequest cursor_create_request = 200;
+ optional CursorCreateReply cursor_create_reply = 201;
+ optional CursorCloneRequest cursor_clone_request = 210;
+ optional CursorCloneReply cursor_clone_reply = 211;
+ optional CursorCloseRequest cursor_close_request = 220;
+ optional CursorCloseReply cursor_close_reply = 221;
+ optional CursorInsertRequest cursor_insert_request = 230;
+ optional CursorInsertReply cursor_insert_reply = 231;
+ optional CursorEraseRequest cursor_erase_request = 240;
+ optional CursorEraseReply cursor_erase_reply = 241;
+ optional CursorGetRecordCountRequest cursor_get_record_count_request = 260;
+ optional CursorGetRecordCountReply cursor_get_record_count_reply = 261;
+ optional CursorGetDuplicatePositionRequest cursor_get_duplicate_position_request = 262;
+ optional CursorGetDuplicatePositionReply cursor_get_duplicate_position_reply = 263;
+ optional CursorGetRecordSizeRequest cursor_get_record_size_request = 264;
+ optional CursorGetRecordSizeReply cursor_get_record_size_reply = 265;
+ optional CursorOverwriteRequest cursor_overwrite_request = 270;
+ optional CursorOverwriteReply cursor_overwrite_reply = 271;
+ optional CursorMoveRequest cursor_move_request = 280;
+ optional CursorMoveReply cursor_move_reply = 281;
+}
+
+message ConnectRequest {
+ required string path = 1;
+}
+
+message ConnectReply {
+ required sint32 status = 1;
+ optional uint32 env_flags = 2;
+ optional uint64 env_handle = 3;
+}
+
+message DisconnectRequest {
+ required uint64 env_handle = 1;
+}
+
+message DisconnectReply {
+ required sint32 status = 1;
+}
+
+message EnvGetParametersRequest {
+ required uint64 env_handle = 1;
+ repeated uint32 names = 2;
+}
+
+message EnvGetParametersReply {
+ required sint32 status = 1;
+ optional uint32 cache_size = 2;
+ optional uint32 page_size = 3;
+ optional uint32 max_env_databases = 4;
+ optional uint32 flags = 5;
+ optional uint32 filemode = 6;
+ optional string filename = 7;
+};
+
+message EnvGetDatabaseNamesRequest {
+ required uint64 env_handle = 1;
+}
+
+message EnvGetDatabaseNamesReply {
+ required sint32 status = 1;
+ repeated uint32 names = 2;
+}
+
+message EnvRenameRequest {
+ required uint64 env_handle = 1;
+ required uint32 oldname = 2;
+ required uint32 newname = 3;
+ required uint32 flags = 4;
+}
+
+message EnvRenameReply {
+ required sint32 status = 1;
+};
+
+message EnvFlushRequest {
+ required uint64 env_handle = 1;
+ required uint32 flags = 2;
+}
+
+message EnvFlushReply {
+ required sint32 status = 1;
+};
+
+message EnvCreateDbRequest {
+ required uint64 env_handle = 1;
+ required uint32 dbname = 2;
+ required uint32 flags = 3;
+ repeated uint32 param_names = 4;
+ repeated uint64 param_values = 5;
+}
+
+message EnvCreateDbReply {
+ required sint32 status = 1;
+ optional uint64 db_handle = 2;
+ optional uint32 db_flags = 3;
+};
+
+message EnvOpenDbRequest {
+ required uint64 env_handle = 1;
+ required uint32 dbname = 2;
+ required uint32 flags = 3;
+ repeated uint32 param_names = 4;
+ repeated uint64 param_values = 5;
+}
+
+message EnvOpenDbReply {
+ required sint32 status = 1;
+ optional uint64 db_handle = 2;
+ optional uint32 db_flags = 3;
+};
+
+message EnvEraseDbRequest {
+ required uint64 env_handle = 1;
+ required uint32 name = 2;
+ required uint32 flags = 3;
+}
+
+message EnvEraseDbReply {
+ required sint32 status = 1;
+};
+
+message DbCloseRequest {
+ required uint64 db_handle = 1;
+ required uint32 flags = 2;
+}
+
+message DbCloseReply {
+ required sint32 status = 1;
+};
+
+message DbGetParametersRequest {
+ required uint64 db_handle = 1;
+ repeated uint32 names = 2;
+}
+
+message DbGetParametersReply {
+ required sint32 status = 1;
+ optional uint32 max_env_databases = 2;
+ optional uint32 flags = 3;
+ optional uint32 key_size = 4;
+ optional uint32 dbname = 5;
+ optional uint32 keys_per_page = 6;
+ optional uint32 key_type = 7;
+ optional uint32 record_size = 8;
+};
+
+message TxnBeginRequest {
+ required uint64 env_handle = 1;
+ required uint32 flags = 2;
+ optional string name = 3;
+}
+
+message TxnBeginReply {
+ required sint32 status = 1;
+ required uint64 txn_handle = 2;
+};
+
+message TxnCommitRequest {
+ required uint64 txn_handle = 1;
+ required uint32 flags = 2;
+}
+
+message TxnCommitReply {
+ required sint32 status = 1;
+};
+
+message TxnAbortRequest {
+ required uint64 txn_handle = 1;
+ required uint32 flags = 2;
+}
+
+message TxnAbortReply {
+ required sint32 status = 1;
+};
+
+message DbCheckIntegrityRequest {
+ required uint64 db_handle = 1;
+ required uint32 flags = 2;
+}
+
+message DbCheckIntegrityReply {
+ required sint32 status = 1;
+};
+
+message DbCountRequest {
+ required uint64 db_handle = 1;
+ required uint64 txn_handle = 2;
+ required bool distinct = 3;
+};
+
+message DbCountReply {
+ required sint32 status = 1;
+ required uint64 keycount = 2;
+};
+
+message Key {
+ optional bytes data = 1;
+ required uint32 flags = 2;
+ required uint32 intflags = 3;
+}
+
+message Record {
+ optional bytes data = 1;
+ required uint32 flags = 2;
+ required uint32 partial_offset = 3;
+ required uint32 partial_size = 4;
+}
+
+message DbInsertRequest {
+ required uint64 db_handle = 1;
+ required uint64 txn_handle = 2;
+ optional Key key = 3;
+ optional Record record = 4;
+ required uint32 flags = 5;
+};
+
+message DbInsertReply {
+ required sint32 status = 1;
+ optional Key key = 2;
+};
+
+message DbEraseRequest {
+ required uint64 db_handle = 1;
+ required uint64 txn_handle = 2;
+ required Key key = 3;
+ required uint32 flags = 4;
+};
+
+message DbEraseReply {
+ required sint32 status = 1;
+};
+
+message DbFindRequest {
+ required uint64 db_handle = 1;
+ required uint64 txn_handle = 2;
+ required uint64 cursor_handle = 3;
+ required Key key = 4;
+ optional Record record = 5;
+ required uint32 flags = 6;
+};
+
+message DbFindReply {
+ required sint32 status = 1;
+ required Record record = 2;
+ optional Key key = 3;
+};
+
+message CursorCreateRequest {
+ required uint64 db_handle = 1;
+ required uint64 txn_handle = 2;
+ required uint32 flags = 3;
+};
+
+message CursorCreateReply {
+ required sint32 status = 1;
+ required uint64 cursor_handle = 2;
+};
+
+message CursorCloneRequest {
+ required uint64 cursor_handle = 1;
+};
+
+message CursorCloneReply {
+ required sint32 status = 1;
+ required uint64 cursor_handle = 2;
+};
+
+message CursorCloseRequest {
+ required uint64 cursor_handle = 1;
+};
+
+message CursorCloseReply {
+ required sint32 status = 1;
+};
+
+message CursorInsertRequest {
+ required uint64 cursor_handle = 1;
+ optional Key key = 2;
+ optional Record record = 3;
+ required uint32 flags = 4;
+};
+
+message CursorInsertReply {
+ required sint32 status = 1;
+ optional Key key = 2;
+};
+
+message CursorEraseRequest {
+ required uint64 cursor_handle = 1;
+ required uint32 flags = 2;
+};
+
+message CursorEraseReply {
+ required sint32 status = 1;
+};
+
+message CursorGetRecordCountRequest {
+ required uint64 cursor_handle = 1;
+ required uint32 flags = 2;
+};
+
+message CursorGetRecordCountReply {
+ required sint32 status = 1;
+ required uint32 count = 2;
+};
+
+message CursorGetRecordSizeRequest {
+ required uint64 cursor_handle = 1;
+};
+
+message CursorGetRecordSizeReply {
+ required sint32 status = 1;
+ required uint64 size = 2;
+};
+
+message CursorGetDuplicatePositionRequest {
+ required uint64 cursor_handle = 1;
+};
+
+message CursorGetDuplicatePositionReply {
+ required sint32 status = 1;
+ required uint32 position = 2;
+};
+
+message CursorOverwriteRequest {
+ required uint64 cursor_handle = 1;
+ required Record record = 2;
+ required uint32 flags = 3;
+};
+
+message CursorOverwriteReply {
+ required sint32 status = 1;
+};
+
+message CursorMoveRequest {
+ required uint64 cursor_handle = 1;
+ optional Key key = 2;
+ optional Record record = 3;
+ required uint32 flags = 4;
+};
+
+message CursorMoveReply {
+ required sint32 status = 1;
+ optional Key key = 2;
+ optional Record record = 3;
+};
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/protocol.h b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/protocol.h
new file mode 100644
index 0000000000..8a2ab9d49f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protobuf/protocol.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Abstraction layer for the remote protocol
+ *
+ * @exception_safe: no
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PROTOCOL_H
+#define HAM_PROTOCOL_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1mem/mem.h"
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "2protobuf/messages.pb.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+/** a magic and version indicator for the remote protocol */
+#define HAM_TRANSFER_MAGIC_V1 (('h'<<24)|('a'<<16)|('m'<<8)|'1')
+
+/**
+ * the Protocol class maps a single message that is exchanged between
+ * client and server
+ */
+class Protocol : public hamsterdb::ProtoWrapper
+{
+ public:
+ Protocol() { }
+
+ /** constructor - assigns a type */
+ Protocol(hamsterdb::ProtoWrapper_Type type) {
+ set_type(type);
+ }
+
+ /** helper function which copies a ham_key_t into a ProtoBuf key */
+ static void assign_key(hamsterdb::Key *protokey, ham_key_t *hamkey,
+ bool deep_copy = true) {
+ if (deep_copy)
+ protokey->set_data(hamkey->data, hamkey->size);
+ protokey->set_flags(hamkey->flags);
+ protokey->set_intflags(hamkey->_flags);
+ }
+
+ /** helper function which copies a ham_record_t into a ProtoBuf record */
+ static void assign_record(hamsterdb::Record *protorec,
+ ham_record_t *hamrec, bool deep_copy = true) {
+ if (deep_copy)
+ protorec->set_data(hamrec->data, hamrec->size);
+ protorec->set_flags(hamrec->flags);
+ protorec->set_partial_offset(hamrec->partial_offset);
+ protorec->set_partial_size(hamrec->partial_size);
+ }
+
+ /**
+ * Factory function; creates a new Protocol structure from a serialized
+ * buffer
+ */
+ static Protocol *unpack(const uint8_t *buf, uint32_t size) {
+ if (*(uint32_t *)&buf[0] != HAM_TRANSFER_MAGIC_V1) {
+ ham_trace(("invalid protocol version"));
+ return (0);
+ }
+
+ Protocol *p = new Protocol;
+ if (!p->ParseFromArray(buf + 8, size - 8)) {
+ delete p;
+ return (0);
+ }
+ return (p);
+ }
+
+ /*
+ * Packs the Protocol structure into a memory buffer and returns
+ * a pointer to the buffer and the buffer size
+ */
+ bool pack(uint8_t **data, uint32_t *size) {
+ uint32_t packed_size = ByteSize();
+ /* we need 8 more bytes for magic and size */
+ uint8_t *p = Memory::allocate<uint8_t>(packed_size + 8);
+ if (!p)
+ return (false);
+
+ /* write the magic and the payload size of the packed structure */
+ *(uint32_t *)&p[0] = HAM_TRANSFER_MAGIC_V1;
+ *(uint32_t *)&p[4] = packed_size;
+
+ /* now write the packed structure */
+ if (!SerializeToArray(&p[8], packed_size)) {
+ Memory::release(p);
+ return (false);
+ }
+
+ *data = p;
+ *size = packed_size + 8;
+ return (true);
+ }
+
+ /*
+ * Packs the Protocol structure into a ByteArray
+ */
+ bool pack(ByteArray *barray) {
+ uint32_t packed_size = ByteSize();
+ /* we need 8 more bytes for magic and size */
+ uint8_t *p = (uint8_t *)barray->resize(packed_size + 8);
+ if (!p)
+ return (false);
+
+ /* write the magic and the payload size of the packed structure */
+ *(uint32_t *)&p[0] = HAM_TRANSFER_MAGIC_V1;
+ *(uint32_t *)&p[4] = packed_size;
+
+ /* now write the packed structure */
+ return (SerializeToArray(&p[8], packed_size));
+ }
+
+ /**
+ * shutdown/free globally allocated memory
+ */
+ static void shutdown() {
+ google::protobuf::ShutdownProtobufLibrary();
+ }
+};
+
+#endif /* HAM_PROTOCOL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.am b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.am
new file mode 100644
index 0000000000..cf5a3fb3ec
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.am
@@ -0,0 +1,5 @@
+
+EXTRA_DIST = messages.h messages.proto
+
+gen proto:
+ cat messages.proto | ../../bin/genserializer.pl > messages.h
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.in b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.in
new file mode 100644
index 0000000000..da966a01d3
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/Makefile.in
@@ -0,0 +1,451 @@
+# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = src/2protoserde
+DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/ax_cxx_gcc_abi_demangle.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CHRONO_LDFLAGS = @BOOST_CHRONO_LDFLAGS@
+BOOST_CHRONO_LDPATH = @BOOST_CHRONO_LDPATH@
+BOOST_CHRONO_LIBS = @BOOST_CHRONO_LIBS@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+BOOST_THREAD_LDFLAGS = @BOOST_THREAD_LDFLAGS@
+BOOST_THREAD_LDPATH = @BOOST_THREAD_LDPATH@
+BOOST_THREAD_LIBS = @BOOST_THREAD_LIBS@
+BOOST_THREAD_WIN32_LDFLAGS = @BOOST_THREAD_WIN32_LDFLAGS@
+BOOST_THREAD_WIN32_LDPATH = @BOOST_THREAD_WIN32_LDPATH@
+BOOST_THREAD_WIN32_LIBS = @BOOST_THREAD_WIN32_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+JDK_INCLUDE = @JDK_INCLUDE@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+EXTRA_DIST = messages.h messages.proto
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/2protoserde/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign src/2protoserde/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ cscopelist-am ctags-am distclean distclean-generic \
+ distclean-libtool distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags-am uninstall uninstall-am
+
+
+gen proto:
+ cat messages.proto | ../../bin/genserializer.pl > messages.h
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.h b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.h
new file mode 100644
index 0000000000..38d091dd8f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.h
@@ -0,0 +1,1839 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_MESSAGES_H
+#define HAM_MESSAGES_H
+
+#include "0root/root.h"
+
+#include <assert.h>
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+/** a magic and version indicator for the remote protocol */
+#define HAM_TRANSFER_MAGIC_V2 (('h'<<24)|('a'<<16)|('m'<<8)|'2')
+
+namespace hamsterdb {
+
+enum {
+ kTxnBeginRequest,
+ kTxnBeginReply,
+ kTxnCommitRequest,
+ kTxnCommitReply,
+ kTxnAbortRequest,
+ kTxnAbortReply,
+ kDbGetKeyCountRequest,
+ kDbGetKeyCountReply,
+ kDbInsertRequest,
+ kDbInsertReply,
+ kDbEraseRequest,
+ kDbEraseReply,
+ kDbFindRequest,
+ kDbFindReply,
+ kCursorCreateRequest,
+ kCursorCreateReply,
+ kCursorCloneRequest,
+ kCursorCloneReply,
+ kCursorCloseRequest,
+ kCursorCloseReply,
+ kCursorInsertRequest,
+ kCursorInsertReply,
+ kCursorEraseRequest,
+ kCursorEraseReply,
+ kCursorGetRecordCountRequest,
+ kCursorGetRecordCountReply,
+ kCursorGetRecordSizeRequest,
+ kCursorGetRecordSizeReply,
+ kCursorGetDuplicatePositionRequest,
+ kCursorGetDuplicatePositionReply,
+ kCursorOverwriteRequest,
+ kCursorOverwriteReply,
+ kCursorMoveRequest,
+ kCursorMoveReply
+};
+
+template<typename Ex, typename In>
+struct Serialized_Base {
+ Ex value;
+
+ Serialized_Base() {
+ clear();
+ }
+
+ Serialized_Base(const Ex &t)
+ : value((In)t) {
+ }
+
+ operator Ex() {
+ return (value);
+ }
+
+ void clear() {
+ value = (Ex)0;
+ }
+
+ size_t get_size() const {
+ return (sizeof(In));
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ *(In *)*pptr = (In)value;
+ *pptr += sizeof(In);
+ *psize -= sizeof(In);
+ assert(*psize >= 0);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ value = (Ex) *(In *)*pptr;
+ *pptr += sizeof(In);
+ *psize -= sizeof(In);
+ assert(*psize >= 0);
+ }
+};
+
+struct SerializedBytes {
+ uint8_t *value;
+ uint32_t size;
+
+ SerializedBytes() {
+ clear();
+ }
+
+ size_t align(size_t s) const {
+ if (s % 4) return (s + 4 - (s % 4));
+ return (s);
+ }
+
+ void clear() {
+ value = 0; size = 0;
+ }
+
+ size_t get_size() const {
+ return (sizeof(uint32_t) + align(size)); // align to 32bits
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ *(uint32_t *)*pptr = size;
+ *pptr += sizeof(uint32_t);
+ *psize -= sizeof(uint32_t);
+ if (size) {
+ memcpy(*pptr, value, size);
+ *pptr += align(size); // align to 32bits
+ *psize -= align(size);
+ assert(*psize >= 0);
+ }
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ size = *(uint32_t *)*pptr;
+ *pptr += sizeof(uint32_t);
+ *psize -= sizeof(uint32_t);
+ if (size) {
+ value = *pptr;
+ *pptr += align(size); // align to 32bits
+ *psize -= align(size);
+ assert(*psize >= 0);
+ }
+ else
+ value = 0;
+ }
+};
+
+typedef Serialized_Base<bool, uint32_t> SerializedBool;
+typedef Serialized_Base<uint8_t, uint32_t> SerializedUint8;
+typedef Serialized_Base<uint16_t, uint32_t> SerializedUint16;
+typedef Serialized_Base<uint32_t, uint32_t> SerializedUint32;
+typedef Serialized_Base<int8_t, int32_t> SerializedSint8;
+typedef Serialized_Base<int16_t, int32_t> SerializedSint16;
+typedef Serialized_Base<int32_t, int32_t> SerializedSint32;
+typedef Serialized_Base<uint64_t, uint64_t> SerializedUint64;
+typedef Serialized_Base<int64_t, int64_t> SerializedSint64;
+
+
+struct SerializedKey {
+ SerializedBool has_data;
+ SerializedBytes data;
+ SerializedUint32 flags;
+ SerializedUint32 intflags;
+
+ SerializedKey() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ has_data.get_size() +
+ (has_data.value ? data.get_size() : 0) +
+ flags.get_size() +
+ intflags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ has_data = false;
+ data.clear();
+ flags.clear();
+ intflags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ has_data.serialize(pptr, psize);
+ if (has_data.value) data.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ intflags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ has_data.deserialize(pptr, psize);
+ if (has_data.value) data.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ intflags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedRecord {
+ SerializedBool has_data;
+ SerializedBytes data;
+ SerializedUint32 flags;
+ SerializedUint32 partial_offset;
+ SerializedUint32 partial_size;
+
+ SerializedRecord() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ has_data.get_size() +
+ (has_data.value ? data.get_size() : 0) +
+ flags.get_size() +
+ partial_offset.get_size() +
+ partial_size.get_size() +
+ 0);
+ }
+
+ void clear() {
+ has_data = false;
+ data.clear();
+ flags.clear();
+ partial_offset.clear();
+ partial_size.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ has_data.serialize(pptr, psize);
+ if (has_data.value) data.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ partial_offset.serialize(pptr, psize);
+ partial_size.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ has_data.deserialize(pptr, psize);
+ if (has_data.value) data.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ partial_offset.deserialize(pptr, psize);
+ partial_size.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedConnectRequest {
+ SerializedBytes path;
+
+ SerializedConnectRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ path.get_size() +
+ 0);
+ }
+
+ void clear() {
+ path.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ path.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ path.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedConnectReply {
+ SerializedSint32 status;
+ SerializedUint32 env_flags;
+ SerializedUint64 env_handle;
+
+ SerializedConnectReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ env_flags.get_size() +
+ env_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ env_flags.clear();
+ env_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ env_flags.serialize(pptr, psize);
+ env_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ env_flags.deserialize(pptr, psize);
+ env_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnBeginRequest {
+ SerializedUint64 env_handle;
+ SerializedUint32 flags;
+ SerializedBytes name;
+
+ SerializedTxnBeginRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ env_handle.get_size() +
+ flags.get_size() +
+ name.get_size() +
+ 0);
+ }
+
+ void clear() {
+ env_handle.clear();
+ flags.clear();
+ name.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ env_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ name.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ env_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ name.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnBeginReply {
+ SerializedSint32 status;
+ SerializedUint64 txn_handle;
+
+ SerializedTxnBeginReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ txn_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ txn_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnCommitRequest {
+ SerializedUint64 txn_handle;
+ SerializedUint32 flags;
+
+ SerializedTxnCommitRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ txn_handle.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ txn_handle.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ txn_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ txn_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnCommitReply {
+ SerializedSint32 status;
+
+ SerializedTxnCommitReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnAbortRequest {
+ SerializedUint64 txn_handle;
+ SerializedUint32 flags;
+
+ SerializedTxnAbortRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ txn_handle.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ txn_handle.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ txn_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ txn_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedTxnAbortReply {
+ SerializedSint32 status;
+
+ SerializedTxnAbortReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbGetKeyCountRequest {
+ SerializedUint64 db_handle;
+ SerializedUint64 txn_handle;
+ SerializedBool distinct;
+
+ SerializedDbGetKeyCountRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ db_handle.get_size() +
+ txn_handle.get_size() +
+ distinct.get_size() +
+ 0);
+ }
+
+ void clear() {
+ db_handle.clear();
+ txn_handle.clear();
+ distinct.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ db_handle.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ distinct.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ db_handle.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ distinct.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbGetKeyCountReply {
+ SerializedSint32 status;
+ SerializedUint64 keycount;
+
+ SerializedDbGetKeyCountReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ keycount.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ keycount.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ keycount.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ keycount.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbInsertRequest {
+ SerializedUint64 db_handle;
+ SerializedUint64 txn_handle;
+ SerializedUint32 flags;
+ SerializedBool has_key;
+ SerializedKey key;
+ SerializedBool has_record;
+ SerializedRecord record;
+
+ SerializedDbInsertRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ db_handle.get_size() +
+ txn_handle.get_size() +
+ flags.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ has_record.get_size() +
+ (has_record.value ? record.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ db_handle.clear();
+ txn_handle.clear();
+ flags.clear();
+ has_key = false;
+ key.clear();
+ has_record = false;
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ db_handle.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ has_record.serialize(pptr, psize);
+ if (has_record.value) record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ db_handle.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ has_record.deserialize(pptr, psize);
+ if (has_record.value) record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbInsertReply {
+ SerializedSint32 status;
+ SerializedBool has_key;
+ SerializedKey key;
+
+ SerializedDbInsertReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ has_key = false;
+ key.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbEraseRequest {
+ SerializedUint64 db_handle;
+ SerializedUint64 txn_handle;
+ SerializedKey key;
+ SerializedUint32 flags;
+
+ SerializedDbEraseRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ db_handle.get_size() +
+ txn_handle.get_size() +
+ key.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ db_handle.clear();
+ txn_handle.clear();
+ key.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ db_handle.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ key.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ db_handle.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ key.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbEraseReply {
+ SerializedSint32 status;
+
+ SerializedDbEraseReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbFindRequest {
+ SerializedUint64 db_handle;
+ SerializedUint64 txn_handle;
+ SerializedUint64 cursor_handle;
+ SerializedUint32 flags;
+ SerializedKey key;
+ SerializedBool has_record;
+ SerializedRecord record;
+
+ SerializedDbFindRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ db_handle.get_size() +
+ txn_handle.get_size() +
+ cursor_handle.get_size() +
+ flags.get_size() +
+ key.get_size() +
+ has_record.get_size() +
+ (has_record.value ? record.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ db_handle.clear();
+ txn_handle.clear();
+ cursor_handle.clear();
+ flags.clear();
+ key.clear();
+ has_record = false;
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ db_handle.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ cursor_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ key.serialize(pptr, psize);
+ has_record.serialize(pptr, psize);
+ if (has_record.value) record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ db_handle.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ cursor_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ key.deserialize(pptr, psize);
+ has_record.deserialize(pptr, psize);
+ if (has_record.value) record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedDbFindReply {
+ SerializedSint32 status;
+ SerializedBool has_key;
+ SerializedKey key;
+ SerializedBool has_record;
+ SerializedRecord record;
+
+ SerializedDbFindReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ has_record.get_size() +
+ (has_record.value ? record.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ has_key = false;
+ key.clear();
+ has_record = false;
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ has_record.serialize(pptr, psize);
+ if (has_record.value) record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ has_record.deserialize(pptr, psize);
+ if (has_record.value) record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCreateRequest {
+ SerializedUint64 db_handle;
+ SerializedUint64 txn_handle;
+ SerializedUint32 flags;
+
+ SerializedCursorCreateRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ db_handle.get_size() +
+ txn_handle.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ db_handle.clear();
+ txn_handle.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ db_handle.serialize(pptr, psize);
+ txn_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ db_handle.deserialize(pptr, psize);
+ txn_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCreateReply {
+ SerializedSint32 status;
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorCreateReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCloneRequest {
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorCloneRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCloneReply {
+ SerializedSint32 status;
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorCloneReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCloseRequest {
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorCloseRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorCloseReply {
+ SerializedSint32 status;
+
+ SerializedCursorCloseReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorInsertRequest {
+ SerializedUint64 cursor_handle;
+ SerializedUint32 flags;
+ SerializedBool has_key;
+ SerializedKey key;
+ SerializedBool has_record;
+ SerializedRecord record;
+
+ SerializedCursorInsertRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ flags.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ has_record.get_size() +
+ (has_record.value ? record.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ flags.clear();
+ has_key = false;
+ key.clear();
+ has_record = false;
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ has_record.serialize(pptr, psize);
+ if (has_record.value) record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ has_record.deserialize(pptr, psize);
+ if (has_record.value) record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorInsertReply {
+ SerializedSint32 status;
+ SerializedBool has_key;
+ SerializedKey key;
+
+ SerializedCursorInsertReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ has_key = false;
+ key.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorEraseRequest {
+ SerializedUint64 cursor_handle;
+ SerializedUint32 flags;
+
+ SerializedCursorEraseRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorEraseReply {
+ SerializedSint32 status;
+
+ SerializedCursorEraseReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetRecordCountRequest {
+ SerializedUint64 cursor_handle;
+ SerializedUint32 flags;
+
+ SerializedCursorGetRecordCountRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetRecordCountReply {
+ SerializedSint32 status;
+ SerializedUint32 count;
+
+ SerializedCursorGetRecordCountReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ count.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ count.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ count.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ count.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetRecordSizeRequest {
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorGetRecordSizeRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetRecordSizeReply {
+ SerializedSint32 status;
+ SerializedUint64 size;
+
+ SerializedCursorGetRecordSizeReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ size.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ size.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ size.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ size.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetDuplicatePositionRequest {
+ SerializedUint64 cursor_handle;
+
+ SerializedCursorGetDuplicatePositionRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorGetDuplicatePositionReply {
+ SerializedSint32 status;
+ SerializedUint32 position;
+
+ SerializedCursorGetDuplicatePositionReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ position.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ position.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ position.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ position.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorOverwriteRequest {
+ SerializedUint64 cursor_handle;
+ SerializedRecord record;
+ SerializedUint32 flags;
+
+ SerializedCursorOverwriteRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ record.get_size() +
+ flags.get_size() +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ record.clear();
+ flags.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ record.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ record.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorOverwriteReply {
+ SerializedSint32 status;
+
+ SerializedCursorOverwriteReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorMoveRequest {
+ SerializedUint64 cursor_handle;
+ SerializedUint32 flags;
+ SerializedBool has_key;
+ SerializedKey key;
+ SerializedBool has_record;
+ SerializedRecord record;
+
+ SerializedCursorMoveRequest() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ cursor_handle.get_size() +
+ flags.get_size() +
+ has_key.get_size() +
+ (has_key.value ? key.get_size() : 0) +
+ has_record.get_size() +
+ (has_record.value ? record.get_size() : 0) +
+ 0);
+ }
+
+ void clear() {
+ cursor_handle.clear();
+ flags.clear();
+ has_key = false;
+ key.clear();
+ has_record = false;
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ cursor_handle.serialize(pptr, psize);
+ flags.serialize(pptr, psize);
+ has_key.serialize(pptr, psize);
+ if (has_key.value) key.serialize(pptr, psize);
+ has_record.serialize(pptr, psize);
+ if (has_record.value) record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ cursor_handle.deserialize(pptr, psize);
+ flags.deserialize(pptr, psize);
+ has_key.deserialize(pptr, psize);
+ if (has_key.value) key.deserialize(pptr, psize);
+ has_record.deserialize(pptr, psize);
+ if (has_record.value) record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedCursorMoveReply {
+ SerializedSint32 status;
+ SerializedKey key;
+ SerializedRecord record;
+
+ SerializedCursorMoveReply() {
+ clear();
+ }
+
+ size_t get_size() const {
+ return (
+ status.get_size() +
+ key.get_size() +
+ record.get_size() +
+ 0);
+ }
+
+ void clear() {
+ status.clear();
+ key.clear();
+ record.clear();
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ status.serialize(pptr, psize);
+ key.serialize(pptr, psize);
+ record.serialize(pptr, psize);
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ status.deserialize(pptr, psize);
+ key.deserialize(pptr, psize);
+ record.deserialize(pptr, psize);
+ }
+};
+
+struct SerializedWrapper {
+ SerializedUint32 magic;
+ SerializedUint32 size;
+ SerializedUint32 id;
+ SerializedTxnBeginRequest txn_begin_request;
+ SerializedTxnBeginReply txn_begin_reply;
+ SerializedTxnCommitRequest txn_commit_request;
+ SerializedTxnCommitReply txn_commit_reply;
+ SerializedTxnAbortRequest txn_abort_request;
+ SerializedTxnAbortReply txn_abort_reply;
+ SerializedDbGetKeyCountRequest db_count_request;
+ SerializedDbGetKeyCountReply db_count_reply;
+ SerializedDbInsertRequest db_insert_request;
+ SerializedDbInsertReply db_insert_reply;
+ SerializedDbEraseRequest db_erase_request;
+ SerializedDbEraseReply db_erase_reply;
+ SerializedDbFindRequest db_find_request;
+ SerializedDbFindReply db_find_reply;
+ SerializedCursorCreateRequest cursor_create_request;
+ SerializedCursorCreateReply cursor_create_reply;
+ SerializedCursorCloneRequest cursor_clone_request;
+ SerializedCursorCloneReply cursor_clone_reply;
+ SerializedCursorCloseRequest cursor_close_request;
+ SerializedCursorCloseReply cursor_close_reply;
+ SerializedCursorInsertRequest cursor_insert_request;
+ SerializedCursorInsertReply cursor_insert_reply;
+ SerializedCursorEraseRequest cursor_erase_request;
+ SerializedCursorEraseReply cursor_erase_reply;
+ SerializedCursorGetRecordCountRequest cursor_get_record_count_request;
+ SerializedCursorGetRecordCountReply cursor_get_record_count_reply;
+ SerializedCursorGetRecordSizeRequest cursor_get_record_size_request;
+ SerializedCursorGetRecordSizeReply cursor_get_record_size_reply;
+ SerializedCursorGetDuplicatePositionRequest cursor_get_duplicate_position_request;
+ SerializedCursorGetDuplicatePositionReply cursor_get_duplicate_position_reply;
+ SerializedCursorOverwriteRequest cursor_overwrite_request;
+ SerializedCursorOverwriteReply cursor_overwrite_reply;
+ SerializedCursorMoveRequest cursor_move_request;
+ SerializedCursorMoveReply cursor_move_reply;
+
+ SerializedWrapper() {
+ clear();
+ }
+
+ // the methods in here have a custom implementation, otherwise we would
+ // generate many bools for the "optional" fields, and they would
+ // unnecessarily increase the structure size
+ void clear() {
+ magic = 0;
+ size = 0;
+ id = 0;
+ }
+
+ size_t get_size() const {
+ size_t s = magic.get_size() + size.get_size() + id.get_size();
+ switch (id.value) {
+ case kTxnBeginRequest:
+ return (s + txn_begin_request.get_size());
+ case kTxnBeginReply:
+ return (s + txn_begin_reply.get_size());
+ case kTxnCommitRequest:
+ return (s + txn_commit_request.get_size());
+ case kTxnCommitReply:
+ return (s + txn_commit_reply.get_size());
+ case kTxnAbortRequest:
+ return (s + txn_abort_request.get_size());
+ case kTxnAbortReply:
+ return (s + txn_abort_reply.get_size());
+ case kDbGetKeyCountRequest:
+ return (s + db_count_request.get_size());
+ case kDbGetKeyCountReply:
+ return (s + db_count_reply.get_size());
+ case kDbInsertRequest:
+ return (s + db_insert_request.get_size());
+ case kDbInsertReply:
+ return (s + db_insert_reply.get_size());
+ case kDbEraseRequest:
+ return (s + db_erase_request.get_size());
+ case kDbEraseReply:
+ return (s + db_erase_reply.get_size());
+ case kDbFindRequest:
+ return (s + db_find_request.get_size());
+ case kDbFindReply:
+ return (s + db_find_reply.get_size());
+ case kCursorCreateRequest:
+ return (s + cursor_create_request.get_size());
+ case kCursorCreateReply:
+ return (s + cursor_create_reply.get_size());
+ case kCursorCloneRequest:
+ return (s + cursor_clone_request.get_size());
+ case kCursorCloneReply:
+ return (s + cursor_clone_reply.get_size());
+ case kCursorCloseRequest:
+ return (s + cursor_close_request.get_size());
+ case kCursorCloseReply:
+ return (s + cursor_close_reply.get_size());
+ case kCursorInsertRequest:
+ return (s + cursor_insert_request.get_size());
+ case kCursorInsertReply:
+ return (s + cursor_insert_reply.get_size());
+ case kCursorEraseRequest:
+ return (s + cursor_erase_request.get_size());
+ case kCursorEraseReply:
+ return (s + cursor_erase_reply.get_size());
+ case kCursorGetRecordCountRequest:
+ return (s + cursor_get_record_count_request.get_size());
+ case kCursorGetRecordCountReply:
+ return (s + cursor_get_record_count_reply.get_size());
+ case kCursorGetRecordSizeRequest:
+ return (s + cursor_get_record_size_request.get_size());
+ case kCursorGetRecordSizeReply:
+ return (s + cursor_get_record_size_reply.get_size());
+ case kCursorGetDuplicatePositionRequest:
+ return (s + cursor_get_duplicate_position_request.get_size());
+ case kCursorGetDuplicatePositionReply:
+ return (s + cursor_get_duplicate_position_reply.get_size());
+ case kCursorOverwriteRequest:
+ return (s + cursor_overwrite_request.get_size());
+ case kCursorOverwriteReply:
+ return (s + cursor_overwrite_reply.get_size());
+ case kCursorMoveRequest:
+ return (s + cursor_move_request.get_size());
+ case kCursorMoveReply:
+ return (s + cursor_move_reply.get_size());
+ default:
+ assert(!"shouldn't be here");
+ return (0);
+ }
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ magic.serialize(pptr, psize);
+ size.serialize(pptr, psize);
+ id.serialize(pptr, psize);
+
+ switch (id.value) {
+ case kTxnBeginRequest:
+ txn_begin_request.serialize(pptr, psize);
+ break;
+ case kTxnBeginReply:
+ txn_begin_reply.serialize(pptr, psize);
+ break;
+ case kTxnCommitRequest:
+ txn_commit_request.serialize(pptr, psize);
+ break;
+ case kTxnCommitReply:
+ txn_commit_reply.serialize(pptr, psize);
+ break;
+ case kTxnAbortRequest:
+ txn_abort_request.serialize(pptr, psize);
+ break;
+ case kTxnAbortReply:
+ txn_abort_reply.serialize(pptr, psize);
+ break;
+ case kDbGetKeyCountRequest:
+ db_count_request.serialize(pptr, psize);
+ break;
+ case kDbGetKeyCountReply:
+ db_count_reply.serialize(pptr, psize);
+ break;
+ case kDbInsertRequest:
+ db_insert_request.serialize(pptr, psize);
+ break;
+ case kDbInsertReply:
+ db_insert_reply.serialize(pptr, psize);
+ break;
+ case kDbEraseRequest:
+ db_erase_request.serialize(pptr, psize);
+ break;
+ case kDbEraseReply:
+ db_erase_reply.serialize(pptr, psize);
+ break;
+ case kDbFindRequest:
+ db_find_request.serialize(pptr, psize);
+ break;
+ case kDbFindReply:
+ db_find_reply.serialize(pptr, psize);
+ break;
+ case kCursorCreateRequest:
+ cursor_create_request.serialize(pptr, psize);
+ break;
+ case kCursorCreateReply:
+ cursor_create_reply.serialize(pptr, psize);
+ break;
+ case kCursorCloneRequest:
+ cursor_clone_request.serialize(pptr, psize);
+ break;
+ case kCursorCloneReply:
+ cursor_clone_reply.serialize(pptr, psize);
+ break;
+ case kCursorCloseRequest:
+ cursor_close_request.serialize(pptr, psize);
+ break;
+ case kCursorCloseReply:
+ cursor_close_reply.serialize(pptr, psize);
+ break;
+ case kCursorInsertRequest:
+ cursor_insert_request.serialize(pptr, psize);
+ break;
+ case kCursorInsertReply:
+ cursor_insert_reply.serialize(pptr, psize);
+ break;
+ case kCursorEraseRequest:
+ cursor_erase_request.serialize(pptr, psize);
+ break;
+ case kCursorEraseReply:
+ cursor_erase_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountRequest:
+ cursor_get_record_count_request.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountReply:
+ cursor_get_record_count_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeRequest:
+ cursor_get_record_size_request.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeReply:
+ cursor_get_record_size_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionRequest:
+ cursor_get_duplicate_position_request.serialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionReply:
+ cursor_get_duplicate_position_reply.serialize(pptr, psize);
+ break;
+ case kCursorOverwriteRequest:
+ cursor_overwrite_request.serialize(pptr, psize);
+ break;
+ case kCursorOverwriteReply:
+ cursor_overwrite_reply.serialize(pptr, psize);
+ break;
+ case kCursorMoveRequest:
+ cursor_move_request.serialize(pptr, psize);
+ break;
+ case kCursorMoveReply:
+ cursor_move_reply.serialize(pptr, psize);
+ break;
+ default:
+ assert(!"shouldn't be here");
+ }
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ magic.deserialize(pptr, psize);
+ size.deserialize(pptr, psize);
+ id.deserialize(pptr, psize);
+
+ switch (id.value) {
+ case kTxnBeginRequest:
+ txn_begin_request.deserialize(pptr, psize);
+ break;
+ case kTxnBeginReply:
+ txn_begin_reply.deserialize(pptr, psize);
+ break;
+ case kTxnCommitRequest:
+ txn_commit_request.deserialize(pptr, psize);
+ break;
+ case kTxnCommitReply:
+ txn_commit_reply.deserialize(pptr, psize);
+ break;
+ case kTxnAbortRequest:
+ txn_abort_request.deserialize(pptr, psize);
+ break;
+ case kTxnAbortReply:
+ txn_abort_reply.deserialize(pptr, psize);
+ break;
+ case kDbGetKeyCountRequest:
+ db_count_request.deserialize(pptr, psize);
+ break;
+ case kDbGetKeyCountReply:
+ db_count_reply.deserialize(pptr, psize);
+ break;
+ case kDbInsertRequest:
+ db_insert_request.deserialize(pptr, psize);
+ break;
+ case kDbInsertReply:
+ db_insert_reply.deserialize(pptr, psize);
+ break;
+ case kDbEraseRequest:
+ db_erase_request.deserialize(pptr, psize);
+ break;
+ case kDbEraseReply:
+ db_erase_reply.deserialize(pptr, psize);
+ break;
+ case kDbFindRequest:
+ db_find_request.deserialize(pptr, psize);
+ break;
+ case kDbFindReply:
+ db_find_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCreateRequest:
+ cursor_create_request.deserialize(pptr, psize);
+ break;
+ case kCursorCreateReply:
+ cursor_create_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCloneRequest:
+ cursor_clone_request.deserialize(pptr, psize);
+ break;
+ case kCursorCloneReply:
+ cursor_clone_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCloseRequest:
+ cursor_close_request.deserialize(pptr, psize);
+ break;
+ case kCursorCloseReply:
+ cursor_close_reply.deserialize(pptr, psize);
+ break;
+ case kCursorInsertRequest:
+ cursor_insert_request.deserialize(pptr, psize);
+ break;
+ case kCursorInsertReply:
+ cursor_insert_reply.deserialize(pptr, psize);
+ break;
+ case kCursorEraseRequest:
+ cursor_erase_request.deserialize(pptr, psize);
+ break;
+ case kCursorEraseReply:
+ cursor_erase_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountRequest:
+ cursor_get_record_count_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountReply:
+ cursor_get_record_count_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeRequest:
+ cursor_get_record_size_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeReply:
+ cursor_get_record_size_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionRequest:
+ cursor_get_duplicate_position_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionReply:
+ cursor_get_duplicate_position_reply.deserialize(pptr, psize);
+ break;
+ case kCursorOverwriteRequest:
+ cursor_overwrite_request.deserialize(pptr, psize);
+ break;
+ case kCursorOverwriteReply:
+ cursor_overwrite_reply.serialize(pptr, psize);
+ break;
+ case kCursorMoveRequest:
+ cursor_move_request.deserialize(pptr, psize);
+ break;
+ case kCursorMoveReply:
+ cursor_move_reply.deserialize(pptr, psize);
+ break;
+ default:
+ assert(!"shouldn't be here");
+ }
+ }
+};
+
+
+} // namespace hamsterdb
+#endif // HAM_MESSAGES_H
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.proto b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.proto
new file mode 100644
index 0000000000..cbd68bf655
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2protoserde/messages.proto
@@ -0,0 +1,646 @@
+SET_OPTION(prefix, Serialized)
+
+PROLOGUE_BEGIN
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_MESSAGES_H
+#define HAM_MESSAGES_H
+
+#include "0root/root.h"
+
+#include <assert.h>
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+/** a magic and version indicator for the remote protocol */
+#define HAM_TRANSFER_MAGIC_V2 (('h'<<24)|('a'<<16)|('m'<<8)|'2')
+
+namespace hamsterdb {
+
+enum {
+ kTxnBeginRequest,
+ kTxnBeginReply,
+ kTxnCommitRequest,
+ kTxnCommitReply,
+ kTxnAbortRequest,
+ kTxnAbortReply,
+ kDbGetKeyCountRequest,
+ kDbGetKeyCountReply,
+ kDbInsertRequest,
+ kDbInsertReply,
+ kDbEraseRequest,
+ kDbEraseReply,
+ kDbFindRequest,
+ kDbFindReply,
+ kCursorCreateRequest,
+ kCursorCreateReply,
+ kCursorCloneRequest,
+ kCursorCloneReply,
+ kCursorCloseRequest,
+ kCursorCloseReply,
+ kCursorInsertRequest,
+ kCursorInsertReply,
+ kCursorEraseRequest,
+ kCursorEraseReply,
+ kCursorGetRecordCountRequest,
+ kCursorGetRecordCountReply,
+ kCursorGetRecordSizeRequest,
+ kCursorGetRecordSizeReply,
+ kCursorGetDuplicatePositionRequest,
+ kCursorGetDuplicatePositionReply,
+ kCursorOverwriteRequest,
+ kCursorOverwriteReply,
+ kCursorMoveRequest,
+ kCursorMoveReply
+};
+
+PROLOGUE_END
+
+MESSAGE_BEGIN(Key)
+ optional bytes data;
+ uint32 flags;
+ uint32 intflags;
+MESSAGE_END
+
+MESSAGE_BEGIN(Record)
+ optional bytes data;
+ uint32 flags;
+ uint32 partial_offset;
+ uint32 partial_size;
+MESSAGE_END
+
+MESSAGE_BEGIN(ConnectRequest)
+ bytes path;
+MESSAGE_END
+
+MESSAGE_BEGIN(ConnectReply)
+ sint32 status;
+ uint32 env_flags;
+ uint64 env_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnBeginRequest)
+ uint64 env_handle;
+ uint32 flags;
+ bytes name;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnBeginReply)
+ sint32 status;
+ uint64 txn_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnCommitRequest)
+ uint64 txn_handle;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnCommitReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnAbortRequest)
+ uint64 txn_handle;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(TxnAbortReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbGetKeyCountRequest)
+ uint64 db_handle;
+ uint64 txn_handle;
+ bool distinct;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbGetKeyCountReply)
+ sint32 status;
+ uint64 keycount;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbInsertRequest)
+ uint64 db_handle;
+ uint64 txn_handle;
+ uint32 flags;
+ optional Key key;
+ optional Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbInsertReply)
+ sint32 status;
+ optional Key key;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbEraseRequest)
+ uint64 db_handle;
+ uint64 txn_handle;
+ Key key;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbEraseReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbFindRequest)
+ uint64 db_handle;
+ uint64 txn_handle;
+ uint64 cursor_handle;
+ uint32 flags;
+ Key key;
+ optional Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(DbFindReply)
+ sint32 status;
+ optional Key key;
+ optional Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCreateRequest)
+ uint64 db_handle;
+ uint64 txn_handle;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCreateReply)
+ sint32 status;
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCloneRequest)
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCloneReply)
+ sint32 status;
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCloseRequest)
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorCloseReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorInsertRequest)
+ uint64 cursor_handle;
+ uint32 flags;
+ optional Key key;
+ optional Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorInsertReply)
+ sint32 status;
+ optional Key key;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorEraseRequest)
+ uint64 cursor_handle;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorEraseReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetRecordCountRequest)
+ uint64 cursor_handle;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetRecordCountReply)
+ sint32 status;
+ uint32 count;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetRecordSizeRequest)
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetRecordSizeReply)
+ sint32 status;
+ uint64 size;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetDuplicatePositionRequest)
+ uint64 cursor_handle;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorGetDuplicatePositionReply)
+ sint32 status;
+ uint32 position;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorOverwriteRequest)
+ uint64 cursor_handle;
+ Record record;
+ uint32 flags;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorOverwriteReply)
+ sint32 status;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorMoveRequest)
+ uint64 cursor_handle;
+ uint32 flags;
+ optional Key key;
+ optional Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(CursorMoveReply)
+ sint32 status;
+ Key key;
+ Record record;
+MESSAGE_END
+
+MESSAGE_BEGIN(Wrapper)
+ uint32 magic;
+ uint32 size;
+ uint32 id;
+ TxnBeginRequest txn_begin_request;
+ TxnBeginReply txn_begin_reply;
+ TxnCommitRequest txn_commit_request;
+ TxnCommitReply txn_commit_reply;
+ TxnAbortRequest txn_abort_request;
+ TxnAbortReply txn_abort_reply;
+ DbGetKeyCountRequest db_count_request;
+ DbGetKeyCountReply db_count_reply;
+ DbInsertRequest db_insert_request;
+ DbInsertReply db_insert_reply;
+ DbEraseRequest db_erase_request;
+ DbEraseReply db_erase_reply;
+ DbFindRequest db_find_request;
+ DbFindReply db_find_reply;
+ CursorCreateRequest cursor_create_request;
+ CursorCreateReply cursor_create_reply;
+ CursorCloneRequest cursor_clone_request;
+ CursorCloneReply cursor_clone_reply;
+ CursorCloseRequest cursor_close_request;
+ CursorCloseReply cursor_close_reply;
+ CursorInsertRequest cursor_insert_request;
+ CursorInsertReply cursor_insert_reply;
+ CursorEraseRequest cursor_erase_request;
+ CursorEraseReply cursor_erase_reply;
+ CursorGetRecordCountRequest cursor_get_record_count_request;
+ CursorGetRecordCountReply cursor_get_record_count_reply;
+ CursorGetRecordSizeRequest cursor_get_record_size_request;
+ CursorGetRecordSizeReply cursor_get_record_size_reply;
+ CursorGetDuplicatePositionRequest cursor_get_duplicate_position_request;
+ CursorGetDuplicatePositionReply cursor_get_duplicate_position_reply;
+ CursorOverwriteRequest cursor_overwrite_request;
+ CursorOverwriteReply cursor_overwrite_reply;
+ CursorMoveRequest cursor_move_request;
+ CursorMoveReply cursor_move_reply;
+
+ CUSTOM_IMPLEMENTATION_BEGIN
+ // the methods in here have a custom implementation, otherwise we would
+ // generate many bools for the "optional" fields, and they would
+ // unnecessarily increase the structure size
+ void clear() {
+ magic = 0;
+ size = 0;
+ id = 0;
+ }
+
+ size_t get_size() const {
+ size_t s = magic.get_size() + size.get_size() + id.get_size();
+ switch (id.value) {
+ case kTxnBeginRequest:
+ return (s + txn_begin_request.get_size());
+ case kTxnBeginReply:
+ return (s + txn_begin_reply.get_size());
+ case kTxnCommitRequest:
+ return (s + txn_commit_request.get_size());
+ case kTxnCommitReply:
+ return (s + txn_commit_reply.get_size());
+ case kTxnAbortRequest:
+ return (s + txn_abort_request.get_size());
+ case kTxnAbortReply:
+ return (s + txn_abort_reply.get_size());
+ case kDbGetKeyCountRequest:
+ return (s + db_count_request.get_size());
+ case kDbGetKeyCountReply:
+ return (s + db_count_reply.get_size());
+ case kDbInsertRequest:
+ return (s + db_insert_request.get_size());
+ case kDbInsertReply:
+ return (s + db_insert_reply.get_size());
+ case kDbEraseRequest:
+ return (s + db_erase_request.get_size());
+ case kDbEraseReply:
+ return (s + db_erase_reply.get_size());
+ case kDbFindRequest:
+ return (s + db_find_request.get_size());
+ case kDbFindReply:
+ return (s + db_find_reply.get_size());
+ case kCursorCreateRequest:
+ return (s + cursor_create_request.get_size());
+ case kCursorCreateReply:
+ return (s + cursor_create_reply.get_size());
+ case kCursorCloneRequest:
+ return (s + cursor_clone_request.get_size());
+ case kCursorCloneReply:
+ return (s + cursor_clone_reply.get_size());
+ case kCursorCloseRequest:
+ return (s + cursor_close_request.get_size());
+ case kCursorCloseReply:
+ return (s + cursor_close_reply.get_size());
+ case kCursorInsertRequest:
+ return (s + cursor_insert_request.get_size());
+ case kCursorInsertReply:
+ return (s + cursor_insert_reply.get_size());
+ case kCursorEraseRequest:
+ return (s + cursor_erase_request.get_size());
+ case kCursorEraseReply:
+ return (s + cursor_erase_reply.get_size());
+ case kCursorGetRecordCountRequest:
+ return (s + cursor_get_record_count_request.get_size());
+ case kCursorGetRecordCountReply:
+ return (s + cursor_get_record_count_reply.get_size());
+ case kCursorGetRecordSizeRequest:
+ return (s + cursor_get_record_size_request.get_size());
+ case kCursorGetRecordSizeReply:
+ return (s + cursor_get_record_size_reply.get_size());
+ case kCursorGetDuplicatePositionRequest:
+ return (s + cursor_get_duplicate_position_request.get_size());
+ case kCursorGetDuplicatePositionReply:
+ return (s + cursor_get_duplicate_position_reply.get_size());
+ case kCursorOverwriteRequest:
+ return (s + cursor_overwrite_request.get_size());
+ case kCursorOverwriteReply:
+ return (s + cursor_overwrite_reply.get_size());
+ case kCursorMoveRequest:
+ return (s + cursor_move_request.get_size());
+ case kCursorMoveReply:
+ return (s + cursor_move_reply.get_size());
+ default:
+ assert(!"shouldn't be here");
+ return (0);
+ }
+ }
+
+ void serialize(unsigned char **pptr, int *psize) const {
+ magic.serialize(pptr, psize);
+ size.serialize(pptr, psize);
+ id.serialize(pptr, psize);
+
+ switch (id.value) {
+ case kTxnBeginRequest:
+ txn_begin_request.serialize(pptr, psize);
+ break;
+ case kTxnBeginReply:
+ txn_begin_reply.serialize(pptr, psize);
+ break;
+ case kTxnCommitRequest:
+ txn_commit_request.serialize(pptr, psize);
+ break;
+ case kTxnCommitReply:
+ txn_commit_reply.serialize(pptr, psize);
+ break;
+ case kTxnAbortRequest:
+ txn_abort_request.serialize(pptr, psize);
+ break;
+ case kTxnAbortReply:
+ txn_abort_reply.serialize(pptr, psize);
+ break;
+ case kDbGetKeyCountRequest:
+ db_count_request.serialize(pptr, psize);
+ break;
+ case kDbGetKeyCountReply:
+ db_count_reply.serialize(pptr, psize);
+ break;
+ case kDbInsertRequest:
+ db_insert_request.serialize(pptr, psize);
+ break;
+ case kDbInsertReply:
+ db_insert_reply.serialize(pptr, psize);
+ break;
+ case kDbEraseRequest:
+ db_erase_request.serialize(pptr, psize);
+ break;
+ case kDbEraseReply:
+ db_erase_reply.serialize(pptr, psize);
+ break;
+ case kDbFindRequest:
+ db_find_request.serialize(pptr, psize);
+ break;
+ case kDbFindReply:
+ db_find_reply.serialize(pptr, psize);
+ break;
+ case kCursorCreateRequest:
+ cursor_create_request.serialize(pptr, psize);
+ break;
+ case kCursorCreateReply:
+ cursor_create_reply.serialize(pptr, psize);
+ break;
+ case kCursorCloneRequest:
+ cursor_clone_request.serialize(pptr, psize);
+ break;
+ case kCursorCloneReply:
+ cursor_clone_reply.serialize(pptr, psize);
+ break;
+ case kCursorCloseRequest:
+ cursor_close_request.serialize(pptr, psize);
+ break;
+ case kCursorCloseReply:
+ cursor_close_reply.serialize(pptr, psize);
+ break;
+ case kCursorInsertRequest:
+ cursor_insert_request.serialize(pptr, psize);
+ break;
+ case kCursorInsertReply:
+ cursor_insert_reply.serialize(pptr, psize);
+ break;
+ case kCursorEraseRequest:
+ cursor_erase_request.serialize(pptr, psize);
+ break;
+ case kCursorEraseReply:
+ cursor_erase_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountRequest:
+ cursor_get_record_count_request.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountReply:
+ cursor_get_record_count_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeRequest:
+ cursor_get_record_size_request.serialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeReply:
+ cursor_get_record_size_reply.serialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionRequest:
+ cursor_get_duplicate_position_request.serialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionReply:
+ cursor_get_duplicate_position_reply.serialize(pptr, psize);
+ break;
+ case kCursorOverwriteRequest:
+ cursor_overwrite_request.serialize(pptr, psize);
+ break;
+ case kCursorOverwriteReply:
+ cursor_overwrite_reply.serialize(pptr, psize);
+ break;
+ case kCursorMoveRequest:
+ cursor_move_request.serialize(pptr, psize);
+ break;
+ case kCursorMoveReply:
+ cursor_move_reply.serialize(pptr, psize);
+ break;
+ default:
+ assert(!"shouldn't be here");
+ }
+ }
+
+ void deserialize(unsigned char **pptr, int *psize) {
+ magic.deserialize(pptr, psize);
+ size.deserialize(pptr, psize);
+ id.deserialize(pptr, psize);
+
+ switch (id.value) {
+ case kTxnBeginRequest:
+ txn_begin_request.deserialize(pptr, psize);
+ break;
+ case kTxnBeginReply:
+ txn_begin_reply.deserialize(pptr, psize);
+ break;
+ case kTxnCommitRequest:
+ txn_commit_request.deserialize(pptr, psize);
+ break;
+ case kTxnCommitReply:
+ txn_commit_reply.deserialize(pptr, psize);
+ break;
+ case kTxnAbortRequest:
+ txn_abort_request.deserialize(pptr, psize);
+ break;
+ case kTxnAbortReply:
+ txn_abort_reply.deserialize(pptr, psize);
+ break;
+ case kDbGetKeyCountRequest:
+ db_count_request.deserialize(pptr, psize);
+ break;
+ case kDbGetKeyCountReply:
+ db_count_reply.deserialize(pptr, psize);
+ break;
+ case kDbInsertRequest:
+ db_insert_request.deserialize(pptr, psize);
+ break;
+ case kDbInsertReply:
+ db_insert_reply.deserialize(pptr, psize);
+ break;
+ case kDbEraseRequest:
+ db_erase_request.deserialize(pptr, psize);
+ break;
+ case kDbEraseReply:
+ db_erase_reply.deserialize(pptr, psize);
+ break;
+ case kDbFindRequest:
+ db_find_request.deserialize(pptr, psize);
+ break;
+ case kDbFindReply:
+ db_find_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCreateRequest:
+ cursor_create_request.deserialize(pptr, psize);
+ break;
+ case kCursorCreateReply:
+ cursor_create_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCloneRequest:
+ cursor_clone_request.deserialize(pptr, psize);
+ break;
+ case kCursorCloneReply:
+ cursor_clone_reply.deserialize(pptr, psize);
+ break;
+ case kCursorCloseRequest:
+ cursor_close_request.deserialize(pptr, psize);
+ break;
+ case kCursorCloseReply:
+ cursor_close_reply.deserialize(pptr, psize);
+ break;
+ case kCursorInsertRequest:
+ cursor_insert_request.deserialize(pptr, psize);
+ break;
+ case kCursorInsertReply:
+ cursor_insert_reply.deserialize(pptr, psize);
+ break;
+ case kCursorEraseRequest:
+ cursor_erase_request.deserialize(pptr, psize);
+ break;
+ case kCursorEraseReply:
+ cursor_erase_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountRequest:
+ cursor_get_record_count_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordCountReply:
+ cursor_get_record_count_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeRequest:
+ cursor_get_record_size_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetRecordSizeReply:
+ cursor_get_record_size_reply.deserialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionRequest:
+ cursor_get_duplicate_position_request.deserialize(pptr, psize);
+ break;
+ case kCursorGetDuplicatePositionReply:
+ cursor_get_duplicate_position_reply.deserialize(pptr, psize);
+ break;
+ case kCursorOverwriteRequest:
+ cursor_overwrite_request.deserialize(pptr, psize);
+ break;
+ case kCursorOverwriteReply:
+ cursor_overwrite_reply.serialize(pptr, psize);
+ break;
+ case kCursorMoveRequest:
+ cursor_move_request.deserialize(pptr, psize);
+ break;
+ case kCursorMoveReply:
+ cursor_move_reply.deserialize(pptr, psize);
+ break;
+ default:
+ assert(!"shouldn't be here");
+ }
+ }
+ CUSTOM_IMPLEMENTATION_END
+MESSAGE_END
+
+
+EPILOGUE_BEGIN
+
+} // namespace hamsterdb
+#endif // HAM_MESSAGES_H
+
+EPILOGUE_END
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2queue/queue.h b/plugins/Dbx_kv/src/hamsterdb/src/2queue/queue.h
new file mode 100644
index 0000000000..a45d45dfa2
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2queue/queue.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A thread-safe message queue. Producers can insert at the front, Consumers
+ * pick messages from the tail.
+ *
+ * The queue uses a Spinlock for synchronization, but locks it only very,
+ * very briefly.
+ */
+
+#ifndef HAM_QUEUE_H
+#define HAM_QUEUE_H
+
+#include "0root/root.h"
+
+#include <ham/types.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/spinlock.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+// The Message. Other messages can derive from it and append their own
+// payload.
+struct MessageBase
+{
+ // Message flags
+ enum {
+ // Message is mandatory and must not be skipped
+ kIsMandatory = 0
+ };
+
+ MessageBase(int type_, int flags_)
+ : type(type_), flags(flags_), previous(0), next(0) {
+ }
+
+ virtual ~MessageBase() {
+ }
+
+ int type;
+ int flags;
+ MessageBase *previous;
+ MessageBase *next;
+};
+
+
+class Queue
+{
+ public:
+ template<typename T>
+ struct Message : public MessageBase
+ {
+ Message(int type, int flags)
+ : MessageBase(type, flags) {
+ }
+
+ T payload;
+ };
+
+ Queue()
+ : m_head(0), m_tail(0) {
+ }
+
+ // Pushes a |message| object to the queue
+ void push(MessageBase *message) {
+ ScopedSpinlock lock(m_mutex);
+ if (!m_tail) {
+ ham_assert(m_head == 0);
+ m_head = m_tail = message;
+ }
+ else if (m_tail == m_head) {
+ m_tail->previous = message;
+ message->next = m_tail;
+ m_head = message;
+ }
+ else {
+ message->next = m_head;
+ m_head->previous = message;
+ m_head = message;
+ }
+ }
+
+ // Pops a message from the tail of the queue. Returns null if the queue
+ // is empty.
+ MessageBase *pop() {
+ ScopedSpinlock lock(m_mutex);
+ if (!m_tail) {
+ ham_assert(m_head == 0);
+ return (0);
+ }
+
+ MessageBase *msg = m_tail;
+ if (m_tail == m_head)
+ m_head = m_tail = 0;
+ else
+ m_tail = m_tail->previous;
+ return (msg);
+ }
+
+ private:
+ // For synchronization
+ Spinlock m_mutex;
+
+ // The head of the linked list (and newest MessageBase)
+ MessageBase *m_head;
+
+ // The tail of the linked list (and oldest MessageBase)
+ MessageBase *m_tail;
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_QUEUE_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/2worker/worker.h b/plugins/Dbx_kv/src/hamsterdb/src/2worker/worker.h
new file mode 100644
index 0000000000..2f6798b32c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/2worker/worker.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The worker thread. Asynchronously purges the cache. Thread will start as
+ * soon as it's constructed.
+ */
+
+#ifndef HAM_WORKER_H
+#define HAM_WORKER_H
+
+#include "0root/root.h"
+
+#include <boost/thread.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2queue/queue.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Worker
+{
+ public:
+ Worker()
+ : m_stop_requested(false), m_thread(&Worker::run, this) {
+ }
+
+ void add_to_queue(MessageBase *message) {
+ m_queue.push(message);
+
+ ScopedLock lock(m_mutex);
+ m_cond.notify_one();
+ }
+
+ void stop_and_join() {
+ {
+ ScopedLock lock(m_mutex);
+ m_stop_requested = true;
+ m_cond.notify_one();
+ }
+ m_thread.join();
+ }
+
+ private:
+ // The thread function
+ void run() {
+ while (true) {
+ MessageBase *message = 0;
+ {
+ ScopedLock lock(m_mutex);
+ if (m_stop_requested)
+ return;
+ message = m_queue.pop();
+ if (!message) {
+ m_cond.wait(lock); // will unlock m_mutex while waiting
+ message = m_queue.pop();
+ }
+ }
+
+ if (message) {
+ handle_message(message);
+ delete message;
+ }
+ }
+ }
+
+ // The message handler - has to be overridden
+ virtual void handle_message(MessageBase *message) = 0;
+
+ // A queue for storing messages
+ Queue m_queue;
+
+ // true if the Environment is closed
+ bool m_stop_requested;
+
+ // A mutex for protecting |m_cond|
+ boost::mutex m_mutex;
+
+ // A condition to wait for
+ boost::condition_variable m_cond;
+
+ // The actual thread
+ boost::thread m_thread;
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_WORKER_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.cc b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.cc
new file mode 100644
index 0000000000..d0c075cdec
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "blob_manager.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+uint64_t
+BlobManager::allocate(Context *context, ham_record_t *record,
+ uint32_t flags)
+{
+ // PARTIAL WRITE
+ //
+ // if offset+partial_size equals the full record size, then we won't
+ // have any gaps. In this case we just write the full record and ignore
+ // the partial parameters.
+ if (flags & HAM_PARTIAL) {
+ if (record->partial_offset == 0 && record->partial_size == record->size)
+ flags &= ~HAM_PARTIAL;
+ }
+
+ m_metric_total_allocated++;
+
+ return (do_allocate(context, record, flags));
+}
+
+void
+BlobManager::read(Context *context, uint64_t blobid, ham_record_t *record,
+ uint32_t flags, ByteArray *arena)
+{
+ m_metric_total_read++;
+
+ return (do_read(context, blobid, record, flags, arena));
+}
+
+uint64_t
+BlobManager::overwrite(Context *context, uint64_t old_blobid,
+ ham_record_t *record, uint32_t flags)
+{
+ // PARTIAL WRITE
+ //
+ // if offset+partial_size equals the full record size, then we won't
+ // have any gaps. In this case we just write the full record and ignore
+ // the partial parameters.
+ if (flags & HAM_PARTIAL) {
+ if (record->partial_offset == 0 && record->partial_size == record->size)
+ flags &= ~HAM_PARTIAL;
+ }
+
+ return (do_overwrite(context, old_blobid, record, flags));
+}
+
+uint64_t
+BlobManager::get_blob_size(Context *context, uint64_t blob_id)
+{
+ return (do_get_blob_size(context, blob_id));
+}
+
+void
+BlobManager::erase(Context *context, uint64_t blob_id, Page *page,
+ uint32_t flags)
+{
+ return (do_erase(context, blob_id, page, flags));
+}
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.h b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.h
new file mode 100644
index 0000000000..208345e2ed
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager.h
@@ -0,0 +1,231 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @brief functions for reading/writing/allocating blobs (memory chunks of
+ * arbitrary size)
+ *
+ */
+
+#ifndef HAM_BLOB_MANAGER_H
+#define HAM_BLOB_MANAGER_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb_int.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class LocalEnvironment;
+
+#include "1base/packstart.h"
+
+// A blob header structure
+//
+// This header is prepended to the blob's payload. It holds the blob size and
+// the blob's address (which is not required but useful for error checking.)
+HAM_PACK_0 class HAM_PACK_1 PBlobHeader
+{
+ public:
+ PBlobHeader() {
+ memset(this, 0, sizeof(PBlobHeader));
+ }
+
+ // Returns a PBlobHeader from a file address
+ static PBlobHeader *from_page(Page *page, uint64_t address) {
+ uint32_t readstart = (uint32_t)(address - page->get_address());
+ return (PBlobHeader *)&page->get_raw_payload()[readstart];
+ }
+
+ // Returns the blob flags
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // Sets the blob's flags
+ void set_flags(uint32_t flags) {
+ m_flags = flags;
+ }
+
+ // Returns the absolute address of the blob
+ uint64_t get_self() const {
+ return (m_blobid);
+ }
+
+ // Sets the absolute address of the blob
+ void set_self(uint64_t id) {
+ m_blobid = id;
+ }
+
+ // Returns the payload size of the blob
+ uint64_t get_size() const {
+ return (m_size);
+ }
+
+ // Sets the payload size of the blob
+ void set_size(uint64_t size) {
+ m_size = size;
+ }
+
+ // Returns the allocated size of the blob (includes padding)
+ uint64_t get_alloc_size() const {
+ return (m_allocated_size);
+ }
+
+ // Sets the allocated size of a blob (includes padding)
+ void set_alloc_size(uint64_t size) {
+ m_allocated_size = size;
+ }
+
+ private:
+ // Flags; currently only used in hamsterdb-pro to store compression
+ // information
+ uint32_t m_flags;
+
+ // The blob ID - which is the absolute address/offset of this
+ //* structure in the file
+ uint64_t m_blobid;
+
+ // The allocated size of the blob; this is the size, which is used
+ // by the blob and it's header and maybe additional padding
+ uint64_t m_allocated_size;
+
+ // The "real" size of the blob (excluding the header)
+ uint64_t m_size;
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+// The BlobManager manages blobs (not a surprise)
+//
+// This is an abstract baseclass, derived for In-Memory- and Disk-based
+// Environments.
+class BlobManager
+{
+ protected:
+ // Flags for the PBlobHeader structure
+ enum {
+ // Blob is compressed
+ kIsCompressed = 1
+ };
+
+ public:
+ // Flags for allocate(); make sure that they do not conflict with
+ // the flags for ham_db_insert()
+ enum {
+ // Do not compress the blob, even if compression is enabled
+ kDisableCompression = 0x10000000
+ };
+
+ BlobManager(LocalEnvironment *env)
+ : m_env(env), m_metric_before_compression(0),
+ m_metric_after_compression(0), m_metric_total_allocated(0),
+ m_metric_total_read(0) {
+ }
+
+ virtual ~BlobManager() { }
+
+ // Allocates/create a new blob.
+ // This function returns the blob-id (the start address of the blob
+ // header)
+ //
+ // |flags| can be HAM_PARTIAL, kDisableCompression
+ uint64_t allocate(Context *context, ham_record_t *record, uint32_t flags);
+
+ // Reads a blob and stores the data in @a record.
+ // @ref flags: either 0 or HAM_DIRECT_ACCESS
+ void read(Context *context, uint64_t blob_id, ham_record_t *record,
+ uint32_t flags, ByteArray *arena);
+
+ // Retrieves the size of a blob
+ uint64_t get_blob_size(Context *context, uint64_t blob_id);
+
+ // Overwrites an existing blob
+ //
+ // Will return an error if the blob does not exist. Returns the blob-id
+ // (the start address of the blob header)
+ uint64_t overwrite(Context *context, uint64_t old_blob_id,
+ ham_record_t *record, uint32_t flags);
+
+ // Deletes an existing blob
+ void erase(Context *context, uint64_t blob_id, Page *page = 0,
+ uint32_t flags = 0);
+
+ // Fills in the current metrics
+ void fill_metrics(ham_env_metrics_t *metrics) const {
+ metrics->blob_total_allocated = m_metric_total_allocated;
+ metrics->blob_total_read = m_metric_total_read;
+ metrics->record_bytes_before_compression = m_metric_before_compression;
+ metrics->record_bytes_after_compression = m_metric_after_compression;
+ }
+
+ protected:
+ // Allocates/create a new blob.
+ // This function returns the blob-id (the start address of the blob
+ // header)
+ virtual uint64_t do_allocate(Context *context, ham_record_t *record,
+ uint32_t flags) = 0;
+
+ // Reads a blob and stores the data in @a record.
+ // @ref flags: either 0 or HAM_DIRECT_ACCESS
+ virtual void do_read(Context *context, uint64_t blob_id,
+ ham_record_t *record, uint32_t flags,
+ ByteArray *arena) = 0;
+
+ // Retrieves the size of a blob
+ virtual uint64_t do_get_blob_size(Context *context,
+ uint64_t blob_id) = 0;
+
+ // Overwrites an existing blob
+ //
+ // Will return an error if the blob does not exist. Returns the blob-id
+ // (the start address of the blob header)
+ virtual uint64_t do_overwrite(Context *context, uint64_t old_blob_id,
+ ham_record_t *record, uint32_t flags) = 0;
+
+ // Deletes an existing blob
+ virtual void do_erase(Context *context, uint64_t blob_id,
+ Page *page = 0, uint32_t flags = 0) = 0;
+
+ // The Environment which created this BlobManager
+ LocalEnvironment *m_env;
+
+ // Usage tracking - number of bytes before compression
+ uint64_t m_metric_before_compression;
+
+ // Usage tracking - number of bytes after compression
+ uint64_t m_metric_after_compression;
+
+ private:
+ // Usage tracking - number of blobs allocated
+ uint64_t m_metric_total_allocated;
+
+ // Usage tracking - number of blobs read
+ uint64_t m_metric_total_read;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BLOB_MANAGER_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.cc b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.cc
new file mode 100644
index 0000000000..231789774b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.cc
@@ -0,0 +1,637 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <algorithm>
+#include <vector>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "2device/device.h"
+#include "3blob_manager/blob_manager_disk.h"
+#include "3page_manager/page_manager.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+uint64_t
+DiskBlobManager::do_allocate(Context *context, ham_record_t *record,
+ uint32_t flags)
+{
+ uint8_t *chunk_data[2];
+ uint32_t chunk_size[2];
+ uint32_t page_size = m_env->config().page_size_bytes;
+
+ PBlobHeader blob_header;
+ uint32_t alloc_size = sizeof(PBlobHeader) + record->size;
+
+ // first check if we can add another blob to the last used page
+ Page *page = m_env->page_manager()->get_last_blob_page(context);
+
+ PBlobPageHeader *header = 0;
+ uint64_t address = 0;
+ if (page) {
+ header = PBlobPageHeader::from_page(page);
+ // allocate space for the blob
+ if (!alloc_from_freelist(header, alloc_size, &address))
+ page = 0;
+ else
+ address += page->get_address();
+ }
+
+ if (!address) {
+ // Allocate a new page. If the blob exceeds a page then allocate multiple
+ // pages that are directly next to each other.
+ uint32_t required_size = alloc_size + kPageOverhead;
+ uint32_t num_pages = required_size / page_size;
+ if (num_pages * page_size < required_size)
+ num_pages++;
+
+ // |page| now points to the first page that was allocated, and
+ // the only one which has a header and a freelist
+ page = m_env->page_manager()->alloc_multiple_blob_pages(context, num_pages);
+ ham_assert(page->is_without_header() == false);
+
+ // initialize the PBlobPageHeader
+ header = PBlobPageHeader::from_page(page);
+ header->initialize();
+ header->set_num_pages(num_pages);
+ header->set_free_bytes((num_pages * page_size) - kPageOverhead);
+
+ // and move the remaining space to the freelist, unless we span multiple
+ // pages (then the rest will be discarded) - TODO can we reuse it somehow?
+ if (num_pages == 1
+ && kPageOverhead + alloc_size > 0
+ && header->get_free_bytes() - alloc_size > 0) {
+ header->set_freelist_offset(0, kPageOverhead + alloc_size);
+ header->set_freelist_size(0, header->get_free_bytes() - alloc_size);
+ }
+
+ address = page->get_address() + kPageOverhead;
+ ham_assert(check_integrity(header));
+ }
+
+ // addjust "free bytes" counter
+ ham_assert(header->get_free_bytes() >= alloc_size);
+ header->set_free_bytes(header->get_free_bytes() - alloc_size);
+
+ // store the page id if it still has space left
+ if (header->get_free_bytes())
+ m_env->page_manager()->set_last_blob_page(page);
+ else
+ m_env->page_manager()->set_last_blob_page(0);
+
+ // initialize the blob header
+ blob_header.set_alloc_size(alloc_size);
+ blob_header.set_size(record->size);
+ blob_header.set_self(address);
+
+ // PARTIAL WRITE
+ //
+ // Are there gaps at the beginning? If yes, then we'll fill with zeros
+ ByteArray zeroes;
+ if ((flags & HAM_PARTIAL) && (record->partial_offset > 0)) {
+ uint32_t gapsize = record->partial_offset;
+
+ // first: write the header
+ chunk_data[0] = (uint8_t *)&blob_header;
+ chunk_size[0] = sizeof(blob_header);
+ write_chunks(context, page, address, chunk_data, chunk_size, 1);
+
+ address += sizeof(blob_header);
+
+ // now fill the gap; if the gap is bigger than a pagesize we'll
+ // split the gap into smaller chunks
+ while (gapsize) {
+ uint32_t size = gapsize >= page_size
+ ? page_size
+ : gapsize;
+ chunk_data[0] = (uint8_t *)zeroes.resize(size, 0);
+ chunk_size[0] = size;
+ write_chunks(context, page, address, chunk_data, chunk_size, 1);
+ gapsize -= size;
+ address += size;
+ }
+
+ // now write the "real" data
+ chunk_data[0] = (uint8_t *)record->data;
+ chunk_size[0] = record->partial_size;
+
+ write_chunks(context, page, address, chunk_data, chunk_size, 1);
+ address += record->partial_size;
+ }
+ else {
+ // not writing partially: write header and data, then we're done
+ chunk_data[0] = (uint8_t *)&blob_header;
+ chunk_size[0] = sizeof(blob_header);
+ chunk_data[1] = (uint8_t *)record->data;
+ chunk_size[1] = (flags & HAM_PARTIAL)
+ ? record->partial_size
+ : record->size;
+
+ write_chunks(context, page, address, chunk_data, chunk_size, 2);
+ address += chunk_size[0] + chunk_size[1];
+ }
+
+ // store the blobid; it will be returned to the caller
+ uint64_t blobid = blob_header.get_self();
+
+ // PARTIAL WRITES:
+ //
+ // if we have gaps at the end of the blob: just append more chunks to
+ // fill these gaps. Since they can be pretty large we split them into
+ // smaller chunks if necessary.
+ if (flags & HAM_PARTIAL) {
+ if (record->partial_offset + record->partial_size < record->size) {
+ uint32_t gapsize = record->size
+ - (record->partial_offset + record->partial_size);
+
+ // now fill the gap; if the gap is bigger than a pagesize we'll
+ // split the gap into smaller chunks
+ //
+ // we split this loop in two - the outer loop will allocate the
+ // memory buffer, thus saving some allocations
+ while (gapsize) {
+ uint32_t size = gapsize > page_size
+ ? page_size
+ : gapsize;
+ chunk_data[0] = (uint8_t *)zeroes.resize(size, 0);
+ chunk_size[0] = size;
+ write_chunks(context, page, address, chunk_data, chunk_size, 1);
+ gapsize -= size;
+ address += size;
+ }
+ }
+ }
+
+ ham_assert(check_integrity(header));
+
+ return (blobid);
+}
+
+void
+DiskBlobManager::do_read(Context *context, uint64_t blobid,
+ ham_record_t *record, uint32_t flags, ByteArray *arena)
+{
+ Page *page;
+
+ // first step: read the blob header
+ PBlobHeader *blob_header = (PBlobHeader *)read_chunk(context, 0, &page,
+ blobid, true);
+
+ // sanity check
+ if (blob_header->get_self() != blobid) {
+ ham_log(("blob %lld not found", blobid));
+ throw Exception(HAM_BLOB_NOT_FOUND);
+ }
+
+ uint32_t blobsize = (uint32_t)blob_header->get_size();
+ record->size = blobsize;
+
+ if (flags & HAM_PARTIAL) {
+ if (record->partial_offset > blobsize) {
+ ham_trace(("partial offset is greater than the total record size"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+ if (record->partial_offset + record->partial_size > blobsize)
+ record->partial_size = blobsize = blobsize - record->partial_offset;
+ else
+ blobsize = record->partial_size;
+ }
+
+ // empty blob?
+ if (!blobsize) {
+ record->data = 0;
+ record->size = 0;
+ return;
+ }
+
+ // if the blob is in memory-mapped storage (and the user does not require
+ // a copy of the data): simply return a pointer
+ if ((flags & HAM_FORCE_DEEP_COPY) == 0
+ && m_env->device()->is_mapped(blobid, blobsize)
+ && !(record->flags & HAM_RECORD_USER_ALLOC)) {
+ record->data = read_chunk(context, page, 0,
+ blobid + sizeof(PBlobHeader) + (flags & HAM_PARTIAL
+ ? record->partial_offset
+ : 0), true);
+ }
+ // otherwise resize the blob buffer and copy the blob data into the buffer
+ else {
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ arena->resize(blobsize);
+ record->data = arena->get_ptr();
+ }
+
+ copy_chunk(context, page, 0,
+ blobid + sizeof(PBlobHeader) + (flags & HAM_PARTIAL
+ ? record->partial_offset
+ : 0),
+ (uint8_t *)record->data, blobsize, true);
+ }
+}
+
+uint64_t
+DiskBlobManager::do_get_blob_size(Context *context, uint64_t blobid)
+{
+ // read the blob header
+ PBlobHeader *blob_header = (PBlobHeader *)read_chunk(context, 0, 0, blobid,
+ true);
+
+ if (blob_header->get_self() != blobid)
+ throw Exception(HAM_BLOB_NOT_FOUND);
+
+ return (blob_header->get_size());
+}
+
+uint64_t
+DiskBlobManager::do_overwrite(Context *context, uint64_t old_blobid,
+ ham_record_t *record, uint32_t flags)
+{
+ PBlobHeader *old_blob_header, new_blob_header;
+ Page *page;
+
+ uint32_t alloc_size = sizeof(PBlobHeader) + record->size;
+
+ // first, read the blob header; if the new blob fits into the
+ // old blob, we overwrite the old blob (and add the remaining
+ // space to the freelist, if there is any)
+ old_blob_header = (PBlobHeader *)read_chunk(context, 0, &page,
+ old_blobid, false);
+
+ // sanity check
+ ham_assert(old_blob_header->get_self() == old_blobid);
+ if (old_blob_header->get_self() != old_blobid)
+ throw Exception(HAM_BLOB_NOT_FOUND);
+
+ // now compare the sizes; does the new data fit in the old allocated
+ // space?
+ if (alloc_size <= old_blob_header->get_alloc_size()) {
+ uint8_t *chunk_data[2];
+ uint32_t chunk_size[2];
+
+ // setup the new blob header
+ new_blob_header.set_self(old_blob_header->get_self());
+ new_blob_header.set_size(record->size);
+ new_blob_header.set_alloc_size(alloc_size);
+ new_blob_header.set_flags(0); // disable compression, just in case...
+
+ // PARTIAL WRITE
+ //
+ // if we have a gap at the beginning, then we have to write the
+ // blob header and the blob data in two steps; otherwise we can
+ // write both immediately
+ if ((flags & HAM_PARTIAL) && (record->partial_offset)) {
+ chunk_data[0] = (uint8_t *)&new_blob_header;
+ chunk_size[0] = sizeof(new_blob_header);
+ write_chunks(context, page, new_blob_header.get_self(),
+ chunk_data, chunk_size, 1);
+
+ chunk_data[0] = (uint8_t *)record->data;
+ chunk_size[0] = record->partial_size;
+ write_chunks(context, page, new_blob_header.get_self()
+ + sizeof(new_blob_header) + record->partial_offset,
+ chunk_data, chunk_size, 1);
+ }
+ else {
+ chunk_data[0] = (uint8_t *)&new_blob_header;
+ chunk_size[0] = sizeof(new_blob_header);
+ chunk_data[1] = (uint8_t *)record->data;
+ chunk_size[1] = (flags & HAM_PARTIAL)
+ ? record->partial_size
+ : record->size;
+
+ write_chunks(context, page, new_blob_header.get_self(),
+ chunk_data, chunk_size, 2);
+ }
+
+ // move remaining data to the freelist
+ if (alloc_size < old_blob_header->get_alloc_size()) {
+ PBlobPageHeader *header = PBlobPageHeader::from_page(page);
+ header->set_free_bytes(header->get_free_bytes()
+ + (uint32_t)(old_blob_header->get_alloc_size() - alloc_size));
+ add_to_freelist(header,
+ (uint32_t)(old_blobid + alloc_size) - page->get_address(),
+ (uint32_t)old_blob_header->get_alloc_size() - alloc_size);
+ }
+
+ // the old rid is the new rid
+ return (new_blob_header.get_self());
+ }
+
+ // if the new data is larger: allocate a fresh space for it
+ // and discard the old; 'overwrite' has become (delete + insert) now.
+ uint64_t new_blobid = allocate(context, record, flags);
+ erase(context, old_blobid, 0, 0);
+
+ return (new_blobid);
+}
+
+void
+DiskBlobManager::do_erase(Context *context, uint64_t blobid, Page *page,
+ uint32_t flags)
+{
+ // fetch the blob header
+ PBlobHeader *blob_header = (PBlobHeader *)read_chunk(context, 0, &page,
+ blobid, false);
+
+ // sanity check
+ ham_verify(blob_header->get_self() == blobid);
+ if (blob_header->get_self() != blobid)
+ throw Exception(HAM_BLOB_NOT_FOUND);
+
+ // update the "free bytes" counter in the blob page header
+ PBlobPageHeader *header = PBlobPageHeader::from_page(page);
+ header->set_free_bytes(header->get_free_bytes()
+ + blob_header->get_alloc_size());
+
+ // if the page is now completely empty (all blobs were erased) then move
+ // it to the freelist
+ if (header->get_free_bytes() == (header->get_num_pages()
+ * m_env->config().page_size_bytes) - kPageOverhead) {
+ m_env->page_manager()->set_last_blob_page(0);
+ m_env->page_manager()->del(context, page, header->get_num_pages());
+ header->initialize();
+ return;
+ }
+
+ // otherwise move the blob to the freelist
+ add_to_freelist(header, (uint32_t)(blobid - page->get_address()),
+ (uint32_t)blob_header->get_alloc_size());
+}
+
+bool
+DiskBlobManager::alloc_from_freelist(PBlobPageHeader *header, uint32_t size,
+ uint64_t *poffset)
+{
+ ham_assert(check_integrity(header));
+
+ // freelist is not used if this is a multi-page blob
+ if (header->get_num_pages() > 1)
+ return (false);
+
+ uint32_t count = header->get_freelist_entries();
+
+ for (uint32_t i = 0; i < count; i++) {
+ // exact match
+ if (header->get_freelist_size(i) == size) {
+ *poffset = header->get_freelist_offset(i);
+ header->set_freelist_offset(i, 0);
+ header->set_freelist_size(i, 0);
+ ham_assert(check_integrity(header));
+ return (true);
+ }
+ // space in freelist is larger than what we need? return this space,
+ // make sure the remaining gap stays in the freelist
+ if (header->get_freelist_size(i) > size) {
+ *poffset = header->get_freelist_offset(i);
+ header->set_freelist_offset(i, (uint32_t)(*poffset + size));
+ header->set_freelist_size(i, header->get_freelist_size(i) - size);
+ ham_assert(check_integrity(header));
+ return (true);
+ }
+ }
+
+ // there was no gap large enough for the blob
+ return (false);
+}
+
+void
+DiskBlobManager::add_to_freelist(PBlobPageHeader *header,
+ uint32_t offset, uint32_t size)
+{
+ ham_assert(check_integrity(header));
+
+ // freelist is not used if this is a multi-page blob
+ if (header->get_num_pages() > 1)
+ return;
+
+ uint32_t count = header->get_freelist_entries();
+
+ // first try to collapse the blobs
+ for (uint32_t i = 0; i < count; i++) {
+ if (offset + size == header->get_freelist_offset(i)) {
+ header->set_freelist_offset(i, offset);
+ header->set_freelist_size(i, header->get_freelist_size(i) + size);
+ ham_assert(check_integrity(header));
+ return;
+ }
+ if (header->get_freelist_offset(i) + header->get_freelist_size(i)
+ == offset) {
+ header->set_freelist_size(i, header->get_freelist_size(i) + size);
+ ham_assert(check_integrity(header));
+ return;
+ }
+ }
+
+ // otherwise store the blob in a new slot, if available
+ uint32_t smallest = 0;
+ for (uint32_t i = 0; i < count; i++) {
+ // slot is empty
+ if (header->get_freelist_size(i) == 0) {
+ header->set_freelist_offset(i, offset);
+ header->set_freelist_size(i, size);
+ ham_assert(check_integrity(header));
+ return;
+ }
+ // otherwise look for the smallest entry
+ if (header->get_freelist_size(i) < header->get_freelist_size(smallest)) {
+ smallest = i;
+ continue;
+ }
+ }
+
+ // overwrite the smallest entry?
+ if (size > header->get_freelist_size(smallest)) {
+ header->set_freelist_offset(smallest, offset);
+ header->set_freelist_size(smallest, size);
+ }
+
+ ham_assert(check_integrity(header));
+}
+
+bool
+DiskBlobManager::check_integrity(PBlobPageHeader *header) const
+{
+ ham_assert(header->get_num_pages() > 0);
+
+ if (header->get_free_bytes() + kPageOverhead
+ > (m_env->config().page_size_bytes * header->get_num_pages())) {
+ ham_trace(("integrity violated: free bytes exceeds page boundary"));
+ return (false);
+ }
+
+ // freelist is not used if this is a multi-page blob
+ if (header->get_num_pages() > 1)
+ return (true);
+
+ uint32_t count = header->get_freelist_entries();
+ uint32_t total_sizes = 0;
+ typedef std::pair<uint32_t, uint32_t> Range;
+ typedef std::vector<Range> RangeVec;
+ RangeVec ranges;
+
+ for (uint32_t i = 0; i < count - 1; i++) {
+ if (header->get_freelist_size(i) == 0) {
+ ham_assert(header->get_freelist_offset(i) == 0);
+ continue;
+ }
+ total_sizes += header->get_freelist_size(i);
+ ranges.push_back(std::make_pair(header->get_freelist_offset(i),
+ header->get_freelist_size(i)));
+ }
+
+ // the sum of freelist chunks must not exceed total number of free bytes
+ if (total_sizes > header->get_free_bytes()) {
+ ham_trace(("integrity violated: total freelist slots exceed free bytes"));
+ return (false);
+ }
+
+ std::sort(ranges.begin(), ranges.end());
+
+ if (!ranges.empty()) {
+ for (uint32_t i = 0; i < ranges.size() - 1; i++) {
+ if (ranges[i].first + ranges[i].second
+ > m_env->config().page_size_bytes * header->get_num_pages()) {
+ ham_trace(("integrity violated: freelist slot %u/%u exceeds page",
+ ranges[i].first, ranges[i].second));
+ return (false);
+ }
+ if (ranges[i].first + ranges[i].second > ranges[i + 1].first) {
+ ham_trace(("integrity violated: freelist slot %u/%u overlaps with %lu",
+ ranges[i].first, ranges[i].second,
+ ranges[i + 1].first));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+ }
+
+ return (true);
+}
+
+void
+DiskBlobManager::write_chunks(Context *context, Page *page,
+ uint64_t address, uint8_t **chunk_data, uint32_t *chunk_size,
+ uint32_t chunks)
+{
+ uint32_t page_size = m_env->config().page_size_bytes;
+
+ // for each chunk...
+ for (uint32_t i = 0; i < chunks; i++) {
+ uint32_t size = chunk_size[i];
+ uint8_t *data = chunk_data[i];
+
+ while (size) {
+ // get the page-id from this chunk
+ uint64_t pageid = address - (address % page_size);
+
+ // is this the current page? if yes then continue working with this page,
+ // otherwise fetch the page
+ if (page && page->get_address() != pageid)
+ page = 0;
+ if (!page)
+ page = m_env->page_manager()->fetch(context, pageid,
+ PageManager::kNoHeader);
+
+ uint32_t write_start = (uint32_t)(address - page->get_address());
+ uint32_t write_size = (uint32_t)(page_size - write_start);
+
+ // now write the data
+ if (write_size > size)
+ write_size = size;
+ memcpy(&page->get_raw_payload()[write_start], data, write_size);
+ page->set_dirty(true);
+ address += write_size;
+ data += write_size;
+ size -= write_size;
+ }
+ }
+}
+
+void
+DiskBlobManager::copy_chunk(Context *context, Page *page, Page **ppage,
+ uint64_t address, uint8_t *data, uint32_t size,
+ bool fetch_read_only)
+{
+ uint32_t page_size = m_env->config().page_size_bytes;
+ bool first_page = true;
+
+ while (size) {
+ // get the page-id from this chunk
+ uint64_t pageid = address - (address % page_size);
+
+ // is this the current page? if yes then continue working with this page,
+ // otherwise fetch the page
+ if (page && page->get_address() != pageid)
+ page = 0;
+
+ if (!page) {
+ uint32_t flags = 0;
+ if (fetch_read_only)
+ flags |= PageManager::kReadOnly;
+ if (!first_page)
+ flags |= PageManager::kNoHeader;
+ page = m_env->page_manager()->fetch(context, pageid, flags);
+ }
+
+ // now read the data from the page
+ uint32_t read_start = (uint32_t)(address - page->get_address());
+ uint32_t read_size = (uint32_t)(page_size - read_start);
+ if (read_size > size)
+ read_size = size;
+ memcpy(data, &page->get_raw_payload()[read_start], read_size);
+ address += read_size;
+ data += read_size;
+ size -= read_size;
+
+ first_page = false;
+ }
+
+ if (ppage)
+ *ppage = page;
+}
+
+uint8_t *
+DiskBlobManager::read_chunk(Context *context, Page *page, Page **ppage,
+ uint64_t address, bool fetch_read_only)
+{
+ // get the page-id from this chunk
+ uint32_t page_size = m_env->config().page_size_bytes;
+ uint64_t pageid = address - (address % page_size);
+
+ // is this the current page? if yes then continue working with this page,
+ // otherwise fetch the page
+ if (page && page->get_address() != pageid)
+ page = 0;
+
+ if (!page) {
+ uint32_t flags = 0;
+ if (fetch_read_only)
+ flags |= PageManager::kReadOnly;
+ page = m_env->page_manager()->fetch(context, pageid, flags);
+ if (ppage)
+ *ppage = page;
+ }
+
+ uint32_t read_start = (uint32_t)(address - page->get_address());
+ return (&page->get_raw_payload()[read_start]);
+}
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.h b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.h
new file mode 100644
index 0000000000..7ec8b67d95
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_disk.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HAM_BLOB_MANAGER_DISK_H
+#define HAM_BLOB_MANAGER_DISK_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3blob_manager/blob_manager.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+#include "1base/packstart.h"
+
+/*
+ * The header of a blob page
+ *
+ * Contains a fixed length freelist and a couter for the number of free
+ * bytes
+ */
+HAM_PACK_0 class HAM_PACK_1 PBlobPageHeader
+{
+ public:
+ void initialize() {
+ memset(this, 0, sizeof(PBlobPageHeader));
+ }
+
+ // Returns a PBlobPageHeader from a page
+ static PBlobPageHeader *from_page(Page *page) {
+ return (PBlobPageHeader *)&page->get_payload()[0];
+ }
+
+ // Returns the number of pages which are all managed by this header
+ uint32_t get_num_pages() const {
+ return (m_num_pages);
+ }
+
+ // Sets the number of pages which are all managed by this header
+ void set_num_pages(uint32_t num_pages) {
+ m_num_pages = num_pages;
+ }
+
+ // Returns the "free bytes" counter
+ uint32_t get_free_bytes() const {
+ return (m_free_bytes);
+ }
+
+ // Sets the "free bytes" counter
+ void set_free_bytes(uint32_t free_bytes) {
+ m_free_bytes = free_bytes;
+ }
+
+ // Returns the total number of freelist entries
+ uint8_t get_freelist_entries() const {
+ return (32);
+ }
+
+ // Returns the offset of freelist entry |i|
+ uint32_t get_freelist_offset(uint32_t i) const {
+ return (m_freelist[i].offset);
+ }
+
+ // Sets the offset of freelist entry |i|
+ void set_freelist_offset(uint32_t i, uint32_t offset) {
+ m_freelist[i].offset = offset;
+ }
+
+ // Returns the size of freelist entry |i|
+ uint32_t get_freelist_size(uint32_t i) const {
+ return (m_freelist[i].size);
+ }
+
+ // Sets the size of freelist entry |i|
+ void set_freelist_size(uint32_t i, uint32_t size) {
+ m_freelist[i].size = size;
+ }
+
+ private:
+ // Number of "regular" pages for this blob; used for blobs exceeding
+ // a page size
+ uint32_t m_num_pages;
+
+ // Number of free bytes in this page
+ uint32_t m_free_bytes;
+
+ struct FreelistEntry {
+ uint32_t offset;
+ uint32_t size;
+ };
+
+ // The freelist - offset/size pairs in this page
+ FreelistEntry m_freelist[32];
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+
+/*
+ * A BlobManager for disk-based databases
+ */
+class DiskBlobManager : public BlobManager
+{
+ enum {
+ // Overhead per page
+ kPageOverhead = Page::kSizeofPersistentHeader + sizeof(PBlobPageHeader)
+ };
+
+ public:
+ DiskBlobManager(LocalEnvironment *env)
+ : BlobManager(env) {
+ }
+
+ protected:
+ // allocate/create a blob
+ // returns the blob-id (the start address of the blob header)
+ virtual uint64_t do_allocate(Context *context, ham_record_t *record,
+ uint32_t flags);
+
+ // reads a blob and stores the data in |record|. The pointer |record.data|
+ // is backed by the |arena|, unless |HAM_RECORD_USER_ALLOC| is set.
+ // flags: either 0 or HAM_DIRECT_ACCESS
+ virtual void do_read(Context *context, uint64_t blobid,
+ ham_record_t *record, uint32_t flags,
+ ByteArray *arena);
+
+ // retrieves the size of a blob
+ virtual uint64_t do_get_blob_size(Context *context, uint64_t blobid);
+
+ // overwrite an existing blob
+ //
+ // will return an error if the blob does not exist
+ // returns the blob-id (the start address of the blob header) in |blobid|
+ virtual uint64_t do_overwrite(Context *context, uint64_t old_blobid,
+ ham_record_t *record, uint32_t flags);
+
+ // delete an existing blob
+ virtual void do_erase(Context *context, uint64_t blobid,
+ Page *page = 0, uint32_t flags = 0);
+
+ private:
+ friend class DuplicateManager;
+ friend struct BlobManagerFixture;
+
+ // write a series of data chunks to storage at file offset 'addr'.
+ //
+ // The chunks are assumed to be stored in sequential order, adjacent
+ // to each other, i.e. as one long data strip.
+ void write_chunks(Context *context, Page *page, uint64_t addr,
+ uint8_t **chunk_data, uint32_t *chunk_size,
+ uint32_t chunks);
+
+ // Same as above, but for reading chunks from the file. The data
+ // is copied to |data|.
+ void copy_chunk(Context *context, Page *page, Page **fpage,
+ uint64_t addr, uint8_t *data, uint32_t size,
+ bool fetch_read_only);
+
+ // Same as |copy_chunk|, but does not copy the data
+ uint8_t *read_chunk(Context *context, Page *page, Page **fpage,
+ uint64_t addr, bool fetch_read_only);
+
+ // adds a free chunk to the freelist
+ void add_to_freelist(PBlobPageHeader *header, uint32_t offset,
+ uint32_t size);
+
+ // searches the freelist for a free chunk; if available, returns |true|
+ // and stores the offset in |poffset|.
+ bool alloc_from_freelist(PBlobPageHeader *header, uint32_t size,
+ uint64_t *poffset);
+
+ // verifies the integrity of the freelist
+ bool check_integrity(PBlobPageHeader *header) const;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BLOB_MANAGER_DISK_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_factory.h b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_factory.h
new file mode 100644
index 0000000000..129849c7ad
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_factory.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HAM_BLOB_MANAGER_FACTORY_H
+#define HAM_BLOB_MANAGER_FACTORY_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3blob_manager/blob_manager_disk.h"
+#include "3blob_manager/blob_manager_inmem.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct BlobManagerFactory {
+ // creates a new BlobManager instance depending on the flags
+ static BlobManager *create(LocalEnvironment *env, uint32_t flags) {
+ if (flags & HAM_IN_MEMORY)
+ return (new InMemoryBlobManager(env));
+ else
+ return (new DiskBlobManager(env));
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BLOB_MANAGER_FACTORY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.cc b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.cc
new file mode 100644
index 0000000000..1044d815c5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.cc
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "2device/device_inmem.h"
+#include "3blob_manager/blob_manager_inmem.h"
+#include "4db/db_local.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+uint64_t
+InMemoryBlobManager::do_allocate(Context *context, ham_record_t *record,
+ uint32_t flags)
+{
+ // in-memory-database: the blobid is actually a pointer to the memory
+ // buffer, in which the blob (with the blob-header) is stored
+ uint8_t *p = (uint8_t *)m_env->device()->alloc(record->size
+ + sizeof(PBlobHeader));
+
+ // initialize the header
+ PBlobHeader *blob_header = (PBlobHeader *)p;
+ memset(blob_header, 0, sizeof(*blob_header));
+ blob_header->set_self((uint64_t)PTR_TO_U64(p));
+ blob_header->set_alloc_size(record->size + sizeof(PBlobHeader));
+ blob_header->set_size(record->size);
+
+ // do we have gaps? if yes, fill them with zeroes
+ if (flags & HAM_PARTIAL) {
+ uint8_t *s = p + sizeof(PBlobHeader);
+ if (record->partial_offset)
+ memset(s, 0, record->partial_offset);
+ memcpy(s + record->partial_offset, record->data, record->partial_size);
+ if (record->partial_offset + record->partial_size < record->size)
+ memset(s + record->partial_offset + record->partial_size, 0,
+ record->size - (record->partial_offset + record->partial_size));
+ }
+ else {
+ memcpy(p + sizeof(PBlobHeader), record->data, record->size);
+ }
+
+ return ((uint64_t)PTR_TO_U64(p));
+}
+
+void
+InMemoryBlobManager::do_read(Context *context, uint64_t blobid,
+ ham_record_t *record, uint32_t flags,
+ ByteArray *arena)
+{
+ // in-memory-database: the blobid is actually a pointer to the memory
+ // buffer, in which the blob is stored
+ PBlobHeader *blob_header = (PBlobHeader *)U64_TO_PTR(blobid);
+ uint8_t *data = (uint8_t *)(U64_TO_PTR(blobid)) + sizeof(PBlobHeader);
+
+ // when the database is closing, the header is already deleted
+ if (!blob_header) {
+ record->size = 0;
+ return;
+ }
+
+ uint32_t blobsize = (uint32_t)blob_header->get_size();
+ record->size = blobsize;
+
+ if (flags & HAM_PARTIAL) {
+ if (record->partial_offset > blobsize) {
+ ham_trace(("partial offset is greater than the total record size"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+ if (record->partial_offset + record->partial_size > blobsize)
+ record->partial_size = blobsize = blobsize - record->partial_offset;
+ else
+ blobsize = record->partial_size;
+ }
+
+ // empty blob?
+ if (!blobsize) {
+ record->data = 0;
+ record->size = 0;
+ }
+ else {
+ uint8_t *d = data;
+ if (flags & HAM_PARTIAL)
+ d += record->partial_offset;
+
+ if ((flags & HAM_DIRECT_ACCESS)
+ && !(record->flags & HAM_RECORD_USER_ALLOC)) {
+ record->data = d;
+ }
+ else {
+ // resize buffer if necessary
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ arena->resize(blobsize);
+ record->data = arena->get_ptr();
+ }
+ // and copy the data
+ memcpy(record->data, d, blobsize);
+ }
+ }
+}
+
+uint64_t
+InMemoryBlobManager::do_overwrite(Context *context, uint64_t old_blobid,
+ ham_record_t *record, uint32_t flags)
+{
+ // free the old blob, allocate a new blob (but if both sizes are equal,
+ // just overwrite the data)
+ PBlobHeader *phdr = (PBlobHeader *)U64_TO_PTR(old_blobid);
+
+ if (phdr->get_size() == record->size) {
+ uint8_t *p = (uint8_t *)phdr;
+ if (flags & HAM_PARTIAL) {
+ memmove(p + sizeof(PBlobHeader) + record->partial_offset,
+ record->data, record->partial_size);
+ }
+ else {
+ memmove(p + sizeof(PBlobHeader), record->data, record->size);
+ }
+ return ((uint64_t)PTR_TO_U64(phdr));
+ }
+ else {
+ uint64_t new_blobid = allocate(context, record, flags);
+
+ InMemoryDevice *dev = (InMemoryDevice *)m_env->device();
+ dev->release(phdr, (size_t)phdr->get_alloc_size());
+ return (new_blobid);
+ }
+}
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.h b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.h
new file mode 100644
index 0000000000..3c5b19a9fa
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3blob_manager/blob_manager_inmem.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef HAM_BLOB_MANAGER_INMEM_H
+#define HAM_BLOB_MANAGER_INMEM_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3blob_manager/blob_manager.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/**
+ * A BlobManager for in-memory blobs
+ */
+class InMemoryBlobManager : public BlobManager {
+ public:
+ InMemoryBlobManager(LocalEnvironment *env)
+ : BlobManager(env) {
+ }
+
+ protected:
+ // Allocates/create a new blob
+ // This function returns the blob-id (the start address of the blob
+ // header)
+ virtual uint64_t do_allocate(Context *context, ham_record_t *record,
+ uint32_t flags);
+
+ // Reads a blob and stores the data in |record|
+ // |flags|: either 0 or HAM_DIRECT_ACCESS
+ virtual void do_read(Context *context, uint64_t blobid,
+ ham_record_t *record, uint32_t flags,
+ ByteArray *arena);
+
+ // Retrieves the size of a blob
+ virtual uint64_t do_get_blob_size(Context *context, uint64_t blobid) {
+ PBlobHeader *blob_header = (PBlobHeader *)U64_TO_PTR(blobid);
+ return ((uint32_t)blob_header->get_size());
+ }
+
+ // Overwrites an existing blob
+ //
+ // Will return an error if the blob does not exist. Returns the blob-id
+ // (the start address of the blob header)
+ virtual uint64_t do_overwrite(Context *context, uint64_t old_blobid,
+ ham_record_t *record, uint32_t flags);
+
+ // Deletes an existing blob
+ virtual void do_erase(Context *context, uint64_t blobid,
+ Page *page = 0, uint32_t flags = 0) {
+ Memory::release((void *)U64_TO_PTR(blobid));
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BLOB_MANAGER_INMEM_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_check.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_check.cc
new file mode 100644
index 0000000000..73098ce3e1
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_check.cc
@@ -0,0 +1,325 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree verification
+ */
+
+#include "0root/root.h"
+
+#include <set>
+#include <string.h>
+#include <stdio.h>
+#if HAM_DEBUG
+# include <sstream>
+# include <fstream>
+#endif
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "2page/page.h"
+#include "3page_manager/page_manager.h"
+#include "3page_manager/page_manager_test.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_node_proxy.h"
+#include "4db/db.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class BtreeCheckAction
+{
+ public:
+ // Constructor
+ BtreeCheckAction(BtreeIndex *btree, Context *context, uint32_t flags)
+ : m_btree(btree), m_context(context), m_flags(flags) {
+ }
+
+ // This is the main method; it starts the verification.
+ void run() {
+ Page *page, *parent = 0;
+ uint32_t level = 0;
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ ham_assert(m_btree->get_root_address() != 0);
+
+ // get the root page of the tree
+ page = env->page_manager()->fetch(m_context, m_btree->get_root_address(),
+ PageManager::kReadOnly);
+
+#if HAM_DEBUG
+ if (m_flags & HAM_PRINT_GRAPH) {
+ m_graph << "digraph g {" << std::endl
+ << " graph [" << std::endl
+ << " rankdir = \"TD\"" << std::endl
+ << " ];" << std::endl
+ << " node [" << std::endl
+ << " fontsize = \"8\"" << std::endl
+ << " shape = \"ellipse\"" << std::endl
+ << " ];" << std::endl
+ << " edge [" << std::endl
+ << " ];" << std::endl;
+ }
+#endif
+
+ // for each level...
+ while (page) {
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ uint64_t ptr_down = node->get_ptr_down();
+
+ // verify the page and all its siblings
+ verify_level(parent, page, level);
+ parent = page;
+
+ // follow the pointer to the smallest child
+ if (ptr_down)
+ page = env->page_manager()->fetch(m_context, ptr_down,
+ PageManager::kReadOnly);
+ else
+ page = 0;
+
+ ++level;
+ }
+
+#if HAM_DEBUG
+ if (m_flags & HAM_PRINT_GRAPH) {
+ m_graph << "}" << std::endl;
+
+ std::ofstream file;
+ file.open("graph.dot");
+ file << m_graph.str();
+ }
+#endif
+ }
+
+ private:
+ // Verifies a whole level in the tree - start with "page" and traverse
+ // the linked list of all the siblings
+ void verify_level(Page *parent, Page *page, uint32_t level) {
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+ Page *child, *leftsib = 0;
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ // assert that the parent page's smallest item (item 0) is bigger
+ // than the largest item in this page
+ if (parent && node->get_left()) {
+ int cmp = compare_keys(db, page, 0, node->get_count() - 1);
+ if (cmp <= 0) {
+ ham_log(("integrity check failed in page 0x%llx: parent item "
+ "#0 <= item #%d\n", page->get_address(),
+ node->get_count() - 1));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+
+ m_children.clear();
+
+ while (page) {
+ // verify the page
+ verify_page(parent, leftsib, page, level);
+
+ // follow the right sibling
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ if (node->get_right())
+ child = env->page_manager()->fetch(m_context,
+ node->get_right(), PageManager::kReadOnly);
+ else
+ child = 0;
+
+ if (leftsib) {
+ BtreeNodeProxy *leftnode = m_btree->get_node_from_page(leftsib);
+ ham_assert(leftnode->is_leaf() == node->is_leaf());
+ }
+
+ leftsib = page;
+ page = child;
+ }
+ }
+
+ // Verifies a single page
+ void verify_page(Page *parent, Page *leftsib, Page *page, uint32_t level) {
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+#if HAM_DEBUG
+ if (m_flags & HAM_PRINT_GRAPH) {
+ std::stringstream ss;
+ ss << "node" << page->get_address();
+ m_graph << " \"" << ss.str() << "\" [" << std::endl
+ << " label = \"";
+ m_graph << "<fl>L|<fd>D|";
+ for (uint32_t i = 0; i < node->get_count(); i++) {
+ m_graph << "<f" << i << ">" << i << "|";
+ }
+ m_graph << "<fr>R\"" << std::endl
+ << " shape = \"record\"" << std::endl
+ << " ];" << std::endl;
+#if 0
+ // edge to the left sibling
+ if (node->get_left())
+ m_graph << "\"" << ss.str() << "\":fl -> \"node"
+ << node->get_left() << "\":fr [" << std::endl
+ << " ];" << std::endl;
+ // to the right sibling
+ if (node->get_right())
+ m_graph << " \"" << ss.str() << "\":fr -> \"node"
+ << node->get_right() << "\":fl [" << std::endl
+ << " ];" << std::endl;
+#endif
+ // to ptr_down
+ if (node->get_ptr_down())
+ m_graph << " \"" << ss.str() << "\":fd -> \"node"
+ << node->get_ptr_down() << "\":fd [" << std::endl
+ << " ];" << std::endl;
+ // to all children
+ if (!node->is_leaf()) {
+ for (uint32_t i = 0; i < node->get_count(); i++) {
+ m_graph << " \"" << ss.str() << "\":f" << i << " -> \"node"
+ << node->get_record_id(m_context, i) << "\":fd ["
+ << std::endl << " ];" << std::endl;
+ }
+ }
+ }
+#endif
+
+ if (node->get_count() == 0) {
+ // a rootpage can be empty! check if this page is the rootpage
+ if (page->get_address() == m_btree->get_root_address())
+ return;
+
+ // for internal nodes: ptr_down HAS to be set!
+ if (!node->is_leaf() && node->get_ptr_down() == 0) {
+ ham_log(("integrity check failed in page 0x%llx: empty page!\n",
+ page->get_address()));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+
+ // check if the largest item of the left sibling is smaller than
+ // the smallest item of this page
+ if (leftsib) {
+ BtreeNodeProxy *sibnode = m_btree->get_node_from_page(leftsib);
+ ham_key_t key1 = {0};
+ ham_key_t key2 = {0};
+
+ node->check_integrity(m_context);
+
+ if (node->get_count() > 0 && sibnode->get_count() > 0) {
+ sibnode->get_key(m_context, sibnode->get_count() - 1,
+ &m_barray1, &key1);
+ node->get_key(m_context, 0, &m_barray2, &key2);
+
+ int cmp = node->compare(&key1, &key2);
+ if (cmp >= 0) {
+ ham_log(("integrity check failed in page 0x%llx: item #0 "
+ "< left sibling item #%d\n", page->get_address(),
+ sibnode->get_count() - 1));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+ }
+
+ if (node->get_count() == 1)
+ return;
+
+ node->check_integrity(m_context);
+
+ if (node->get_count() > 0) {
+ for (uint32_t i = 0; i < node->get_count() - 1; i++) {
+ int cmp = compare_keys(db, page, (uint32_t)i, (uint32_t)(i + 1));
+ if (cmp >= 0) {
+ ham_log(("integrity check failed in page 0x%llx: item #%d "
+ "< item #%d", page->get_address(), i, i + 1));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+ }
+
+ // internal nodes: make sure that all record IDs are unique
+ if (!node->is_leaf()) {
+ if (m_children.find(node->get_ptr_down()) != m_children.end()) {
+ ham_log(("integrity check failed in page 0x%llx: record of item "
+ "-1 is not unique", page->get_address()));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ m_children.insert(node->get_ptr_down());
+
+ for (uint32_t i = 0; i < node->get_count(); i++) {
+ uint64_t child_id = node->get_record_id(m_context, i);
+ if (m_children.find(child_id) != m_children.end()) {
+ ham_log(("integrity check failed in page 0x%llx: record of item "
+ "#%d is not unique", page->get_address(), i));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ PageManagerTest test = env->page_manager()->test();
+ if (test.is_page_free(child_id)) {
+ ham_log(("integrity check failed in page 0x%llx: record of item "
+ "#%d is in freelist", page->get_address(), i));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ m_children.insert(child_id);
+ }
+ }
+ }
+
+ int compare_keys(LocalDatabase *db, Page *page, int lhs, int rhs) {
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ ham_key_t key1 = {0};
+ ham_key_t key2 = {0};
+
+ node->get_key(m_context, lhs, &m_barray1, &key1);
+ node->get_key(m_context, rhs, &m_barray2, &key2);
+
+ return (node->compare(&key1, &key2));
+ }
+
+ // The BtreeIndex on which we operate
+ BtreeIndex *m_btree;
+
+ // The current Context
+ Context *m_context;
+
+ // The flags as specified when calling ham_db_check_integrity
+ uint32_t m_flags;
+
+ // ByteArrays to avoid frequent memory allocations
+ ByteArray m_barray1;
+ ByteArray m_barray2;
+
+ // For checking uniqueness of record IDs on an internal level
+ std::set<uint64_t> m_children;
+
+#if HAM_DEBUG
+ // For printing the graph
+ std::ostringstream m_graph;
+#endif
+};
+
+void
+BtreeIndex::check_integrity(Context *context, uint32_t flags)
+{
+ BtreeCheckAction bta(this, context, flags);
+ bta.run();
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.cc
new file mode 100644
index 0000000000..b66b58c645
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.cc
@@ -0,0 +1,561 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "2page/page.h"
+#include "3page_manager/page_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_node_proxy.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+BtreeCursor::BtreeCursor(Cursor *parent)
+ : m_parent(parent), m_state(0), m_duplicate_index(0),
+ m_coupled_page(0), m_coupled_index(0), m_next_in_page(0),
+ m_previous_in_page(0)
+{
+ memset(&m_uncoupled_key, 0, sizeof(m_uncoupled_key));
+ m_btree = parent->get_db()->btree_index();
+}
+
+void
+BtreeCursor::set_to_nil()
+{
+ // uncoupled cursor: free the cached pointer
+ if (m_state == kStateUncoupled)
+ memset(&m_uncoupled_key, 0, sizeof(m_uncoupled_key));
+ // coupled cursor: remove from page
+ else if (m_state == kStateCoupled)
+ remove_cursor_from_page(m_coupled_page);
+
+ m_state = BtreeCursor::kStateNil;
+ m_duplicate_index = 0;
+}
+
+void
+BtreeCursor::uncouple_from_page(Context *context)
+{
+ if (m_state == kStateUncoupled || m_state == kStateNil)
+ return;
+
+ ham_assert(m_coupled_page != 0);
+
+ // get the btree-entry of this key
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ ham_assert(node->is_leaf());
+ node->get_key(context, m_coupled_index, &m_uncoupled_arena, &m_uncoupled_key);
+
+ // uncouple the page
+ remove_cursor_from_page(m_coupled_page);
+
+ // set the state and the uncoupled key
+ m_state = BtreeCursor::kStateUncoupled;
+}
+
+void
+BtreeCursor::clone(BtreeCursor *other)
+{
+ m_duplicate_index = other->m_duplicate_index;
+
+ // if the old cursor is coupled: couple the new cursor, too
+ if (other->m_state == kStateCoupled) {
+ couple_to_page(other->m_coupled_page, other->m_coupled_index);
+ }
+ // otherwise, if the src cursor is uncoupled: copy the key
+ else if (other->m_state == kStateUncoupled) {
+ memset(&m_uncoupled_key, 0, sizeof(m_uncoupled_key));
+
+ m_uncoupled_arena.copy(other->m_uncoupled_arena.get_ptr(),
+ other->m_uncoupled_arena.get_size());
+ m_uncoupled_key.data = m_uncoupled_arena.get_ptr();
+ m_uncoupled_key.size = m_uncoupled_arena.get_size();
+ m_state = kStateUncoupled;
+ }
+ else {
+ set_to_nil();
+ }
+}
+
+void
+BtreeCursor::overwrite(Context *context, ham_record_t *record, uint32_t flags)
+{
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ throw Exception(HAM_CURSOR_IS_NIL);
+
+ // copy the key flags, and remove all flags concerning the key size
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ node->set_record(context, m_coupled_index, record, m_duplicate_index,
+ flags | HAM_OVERWRITE, 0);
+
+ m_coupled_page->set_dirty(true);
+}
+
+ham_status_t
+BtreeCursor::move(Context *context, ham_key_t *key, ByteArray *key_arena,
+ ham_record_t *record, ByteArray *record_arena, uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ if (flags & HAM_CURSOR_FIRST)
+ st = move_first(context, flags);
+ else if (flags & HAM_CURSOR_LAST)
+ st = move_last(context, flags);
+ else if (flags & HAM_CURSOR_NEXT)
+ st = move_next(context, flags);
+ else if (flags & HAM_CURSOR_PREVIOUS)
+ st = move_previous(context, flags);
+ // no move, but cursor is nil? return error
+ else if (m_state == kStateNil) {
+ if (key || record)
+ return (HAM_CURSOR_IS_NIL);
+ else
+ return (0);
+ }
+ // no move, but cursor is not coupled? couple it
+ else if (m_state == kStateUncoupled)
+ couple(context);
+
+ if (st)
+ return (st);
+
+ ham_assert(m_state == kStateCoupled);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ ham_assert(node->is_leaf());
+
+ if (key)
+ node->get_key(context, m_coupled_index, key_arena, key);
+
+ if (record)
+ node->get_record(context, m_coupled_index, record_arena, record,
+ flags, m_duplicate_index);
+
+ return (0);
+}
+
+ham_status_t
+BtreeCursor::find(Context *context, ham_key_t *key, ByteArray *key_arena,
+ ham_record_t *record, ByteArray *record_arena, uint32_t flags)
+{
+ set_to_nil();
+
+ return (m_btree->find(context, m_parent, key, key_arena, record,
+ record_arena, flags));
+}
+
+bool
+BtreeCursor::points_to(Context *context, Page *page, int slot)
+{
+ if (m_state == kStateUncoupled)
+ couple(context);
+
+ if (m_state == kStateCoupled)
+ return (m_coupled_page == page && m_coupled_index == slot);
+
+ return (false);
+}
+
+bool
+BtreeCursor::points_to(Context *context, ham_key_t *key)
+{
+ if (m_state == kStateUncoupled) {
+ if (m_uncoupled_key.size != key->size)
+ return (false);
+ return (0 == m_btree->compare_keys(key, &m_uncoupled_key));
+ }
+
+ if (m_state == kStateCoupled) {
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ return (node->equals(context, key, m_coupled_index));
+ }
+
+ ham_assert(!"shouldn't be here");
+ return (false);
+}
+
+ham_status_t
+BtreeCursor::move_to_next_page(Context *context)
+{
+ LocalEnvironment *env = m_parent->get_db()->lenv();
+
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ return (HAM_CURSOR_IS_NIL);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ // if there is no right sibling then couple the cursor to the right-most
+ // key in the last page and return KEY_NOT_FOUND
+ if (!node->get_right()) {
+ couple_to_page(m_coupled_page, node->get_count() - 1, 0);
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ Page *page = env->page_manager()->fetch(context, node->get_right(),
+ PageManager::kReadOnly);
+ couple_to_page(page, 0, 0);
+ return (0);
+}
+
+int
+BtreeCursor::get_record_count(Context *context, uint32_t flags)
+{
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ throw Exception(HAM_CURSOR_IS_NIL);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ return (node->get_record_count(context, m_coupled_index));
+}
+
+uint64_t
+BtreeCursor::get_record_size(Context *context)
+{
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ throw Exception(HAM_CURSOR_IS_NIL);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+ return (node->get_record_size(context, m_coupled_index, m_duplicate_index));
+}
+
+void
+BtreeCursor::couple(Context *context)
+{
+ ham_assert(m_state == kStateUncoupled);
+
+ /*
+ * Make a 'find' on the cached key; if we succeed, the cursor
+ * is automatically coupled. Since |find()| overwrites and modifies
+ * the cursor's state, keep a backup and restore it afterwards.
+ */
+ int duplicate_index = m_duplicate_index;
+ ByteArray uncoupled_arena = m_uncoupled_arena;
+ ham_key_t uncoupled_key = m_uncoupled_key;
+ m_uncoupled_arena = ByteArray();
+
+ find(context, &uncoupled_key, 0, 0, 0, 0);
+
+ m_duplicate_index = duplicate_index;
+ m_uncoupled_key = uncoupled_key;
+ m_uncoupled_arena = uncoupled_arena;
+ uncoupled_arena.disown(); // do not free when going out of scope
+}
+
+ham_status_t
+BtreeCursor::move_first(Context *context, uint32_t flags)
+{
+ LocalDatabase *db = m_parent->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ // get a NIL cursor
+ set_to_nil();
+
+ // get the root page
+ Page *page = env->page_manager()->fetch(context,
+ m_btree->get_root_address(), PageManager::kReadOnly);
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ // traverse down to the leafs
+ while (!node->is_leaf()) {
+ page = env->page_manager()->fetch(context, node->get_ptr_down(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // and to the next page that is NOT empty
+ while (node->get_count() == 0) {
+ if (node->get_right() == 0)
+ return (HAM_KEY_NOT_FOUND);
+ page = env->page_manager()->fetch(context, node->get_right(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // couple this cursor to the smallest key in this page
+ couple_to_page(page, 0, 0);
+
+ return (0);
+}
+
+ham_status_t
+BtreeCursor::move_next(Context *context, uint32_t flags)
+{
+ LocalDatabase *db = m_parent->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ return (HAM_CURSOR_IS_NIL);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+
+ // if this key has duplicates: get the next duplicate; otherwise
+ // (and if there's no duplicate): fall through
+ if (!(flags & HAM_SKIP_DUPLICATES)) {
+ if (m_duplicate_index
+ < node->get_record_count(context, m_coupled_index) - 1) {
+ m_duplicate_index++;
+ return (0);
+ }
+ }
+
+ // don't continue if ONLY_DUPLICATES is set
+ if (flags & HAM_ONLY_DUPLICATES)
+ return (HAM_KEY_NOT_FOUND);
+
+ // if the index+1 is still in the coupled page, just increment the index
+ if (m_coupled_index + 1 < (int)node->get_count()) {
+ couple_to_page(m_coupled_page, m_coupled_index + 1, 0);
+ return (0);
+ }
+
+ // otherwise uncouple the cursor and load the right sibling page
+ if (!node->get_right())
+ return (HAM_KEY_NOT_FOUND);
+
+ Page *page = env->page_manager()->fetch(context, node->get_right(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+
+ // if the right node is empty then continue searching for the next
+ // non-empty page
+ while (node->get_count() == 0) {
+ if (!node->get_right())
+ return (HAM_KEY_NOT_FOUND);
+ page = env->page_manager()->fetch(context, node->get_right(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // couple this cursor to the smallest key in this page
+ couple_to_page(page, 0, 0);
+
+ return (0);
+}
+
+ham_status_t
+BtreeCursor::move_previous(Context *context, uint32_t flags)
+{
+ LocalDatabase *db = m_parent->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ // uncoupled cursor: couple it
+ if (m_state == kStateUncoupled)
+ couple(context);
+ else if (m_state != kStateCoupled)
+ return (HAM_CURSOR_IS_NIL);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(m_coupled_page);
+
+ // if this key has duplicates: get the previous duplicate; otherwise
+ // (and if there's no duplicate): fall through
+ if (!(flags & HAM_SKIP_DUPLICATES) && m_duplicate_index > 0) {
+ m_duplicate_index--;
+ return (0);
+ }
+
+ // don't continue if ONLY_DUPLICATES is set
+ if (flags & HAM_ONLY_DUPLICATES)
+ return (HAM_KEY_NOT_FOUND);
+
+ // if the index-1 is till in the coupled page, just decrement the index
+ if (m_coupled_index != 0) {
+ couple_to_page(m_coupled_page, m_coupled_index - 1);
+ }
+ // otherwise load the left sibling page
+ else {
+ if (!node->get_left())
+ return (HAM_KEY_NOT_FOUND);
+
+ Page *page = env->page_manager()->fetch(context, node->get_left(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+
+ // if the left node is empty then continue searching for the next
+ // non-empty page
+ while (node->get_count() == 0) {
+ if (!node->get_left())
+ return (HAM_KEY_NOT_FOUND);
+ page = env->page_manager()->fetch(context, node->get_left(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // couple this cursor to the highest key in this page
+ couple_to_page(page, node->get_count() - 1);
+ }
+ m_duplicate_index = 0;
+
+ // if duplicates are enabled: move to the end of the duplicate-list
+ if (!(flags & HAM_SKIP_DUPLICATES))
+ m_duplicate_index = node->get_record_count(context, m_coupled_index) - 1;
+
+ return (0);
+}
+
+ham_status_t
+BtreeCursor::move_last(Context *context, uint32_t flags)
+{
+ LocalDatabase *db = m_parent->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ // get a NIL cursor
+ set_to_nil();
+
+ // get the root page
+ if (!m_btree->get_root_address())
+ return (HAM_KEY_NOT_FOUND);
+
+ Page *page = env->page_manager()->fetch(context,
+ m_btree->get_root_address(), PageManager::kReadOnly);
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ // traverse down to the leafs
+ while (!node->is_leaf()) {
+ if (node->get_count() == 0)
+ page = env->page_manager()->fetch(context, node->get_ptr_down(),
+ PageManager::kReadOnly);
+ else
+ page = env->page_manager()->fetch(context,
+ node->get_record_id(context, node->get_count() - 1),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // and to the last page that is NOT empty
+ while (node->get_count() == 0) {
+ if (node->get_left() == 0)
+ return (HAM_KEY_NOT_FOUND);
+ page = env->page_manager()->fetch(context, node->get_left(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // couple this cursor to the largest key in this page
+ couple_to_page(page, node->get_count() - 1, 0);
+
+ // if duplicates are enabled: move to the end of the duplicate-list
+ if (!(flags & HAM_SKIP_DUPLICATES))
+ m_duplicate_index = node->get_record_count(context, m_coupled_index) - 1;
+
+ return (0);
+}
+
+void
+BtreeCursor::couple_to_page(Page *page, uint32_t index)
+{
+ ham_assert(page != 0);
+
+ if (m_state == kStateCoupled && m_coupled_page != page)
+ remove_cursor_from_page(m_coupled_page);
+
+ m_coupled_index = index;
+ m_state = kStateCoupled;
+ if (m_coupled_page == page)
+ return;
+
+ m_coupled_page = page;
+
+ // add the cursor to the page
+ if (page->cursor_list()) {
+ m_next_in_page = page->cursor_list();
+ m_previous_in_page = 0;
+ page->cursor_list()->m_previous_in_page = this;
+ }
+ page->set_cursor_list(this);
+}
+
+void
+BtreeCursor::remove_cursor_from_page(Page *page)
+{
+ BtreeCursor *n, *p;
+
+ if (this == page->cursor_list()) {
+ n = m_next_in_page;
+ if (n)
+ n->m_previous_in_page = 0;
+ page->set_cursor_list(n);
+ }
+ else {
+ n = m_next_in_page;
+ p = m_previous_in_page;
+ if (p)
+ p->m_next_in_page = n;
+ if (n)
+ n->m_previous_in_page = p;
+ }
+
+ m_coupled_page = 0;
+ m_next_in_page = 0;
+ m_previous_in_page = 0;
+}
+
+void
+BtreeCursor::uncouple_all_cursors(Context *context, Page *page, int start)
+{
+ bool skipped = false;
+ Cursor *cursors = page->cursor_list()
+ ? page->cursor_list()->get_parent()
+ : 0;
+
+ while (cursors) {
+ BtreeCursor *btc = cursors->get_btree_cursor();
+ BtreeCursor *next = btc->m_next_in_page;
+
+ // ignore all cursors which are already uncoupled or which are
+ // coupled to a key in the Transaction
+ if (btc->m_state == kStateCoupled) {
+ // skip this cursor if its position is < start
+ if (btc->m_coupled_index < start) {
+ cursors = next ? next->m_parent : 0;
+ skipped = true;
+ continue;
+ }
+
+ // otherwise: uncouple the cursor from the page
+ btc->uncouple_from_page(context);
+ }
+
+ cursors = next ? next->m_parent : 0;
+ }
+
+ if (!skipped)
+ page->set_cursor_list(0);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.h
new file mode 100644
index 0000000000..1754371875
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_cursor.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree cursors
+ *
+ * A Btree-Cursor is an object which is used to traverse a Btree.
+ * It is a random access iterator.
+ *
+ * Btree-Cursors are used in Cursor structures as defined in cursor.h. But
+ * some routines use them directly, mostly for performance reasons. Over
+ * time these layers will be cleaned up and the separation will be improved.
+ *
+ * The cursor implementation is very fast. Most of the operations (i.e.
+ * move previous/next) will not cause any disk access but are O(1) and
+ * in-memory only. That's because a cursor is directly "coupled" to a
+ * btree page (Page) that resides in memory. If the page is removed
+ * from memory (i.e. because the cache decides that it needs to purge the
+ * cache, or if there's a page split) then the cursor is "uncoupled", and a
+ * copy of the current key is stored in the cursor. On first access, the
+ * cursor is "coupled" again and basically performs a normal lookup of the key.
+ *
+ * The three states of a BtreeCursor("nil", "coupled", "uncoupled") can be
+ * retrieved with the method get_state(), and can be modified with
+ * set_to_nil(), couple_to_page() and uncouple_from_page().
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_CURSORS_H
+#define HAM_BTREE_CURSORS_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "1base/error.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class Cursor;
+class BtreeIndex;
+class Page;
+
+//
+// The Cursor structure for a b+tree cursor
+//
+class BtreeCursor
+{
+ public:
+ enum {
+ // Cursor does not point to any key
+ kStateNil = 0,
+ // Cursor flag: the cursor is coupled
+ kStateCoupled = 1,
+ // Cursor flag: the cursor is uncoupled
+ kStateUncoupled = 2
+ };
+
+ // Constructor
+ BtreeCursor(Cursor *parent = 0);
+
+ // Destructor; asserts that the cursor is nil
+ ~BtreeCursor() {
+ ham_assert(m_state == kStateNil);
+ }
+
+ // Returns the parent cursor
+ // TODO this should be private
+ Cursor *get_parent() {
+ return (m_parent);
+ }
+
+ // Clones another BtreeCursor
+ void clone(BtreeCursor *other);
+
+ // Returns the cursor's state (kStateCoupled, kStateUncoupled, kStateNil)
+ uint32_t get_state() const {
+ return (m_state);
+ }
+
+ // Reset's the cursor's state and uninitializes it. After this call
+ // the cursor no longer points to any key.
+ void set_to_nil();
+
+ // Returns the page, index in this page and the duplicate index that this
+ // cursor is coupled to. This is used by Btree functions to optimize
+ // certain algorithms, i.e. when erasing the current key.
+ // Asserts that the cursor is coupled.
+ void get_coupled_key(Page **page, int *index = 0,
+ int *duplicate_index = 0) const {
+ ham_assert(m_state == kStateCoupled);
+ if (page)
+ *page = m_coupled_page;
+ if (index)
+ *index = m_coupled_index;
+ if (duplicate_index)
+ *duplicate_index = m_duplicate_index;
+ }
+
+ // Returns the uncoupled key of this cursor.
+ // Asserts that the cursor is uncoupled.
+ ham_key_t *get_uncoupled_key() {
+ ham_assert(m_state == kStateUncoupled);
+ return (&m_uncoupled_key);
+ }
+
+ // Couples the cursor to a key directly in a page. Also sets the
+ // duplicate index.
+ void couple_to_page(Page *page, uint32_t index,
+ int duplicate_index) {
+ couple_to_page(page, index);
+ m_duplicate_index = duplicate_index;
+ }
+
+ // Returns the duplicate index that this cursor points to.
+ int get_duplicate_index() const {
+ return (m_duplicate_index);
+ }
+
+ // Sets the duplicate key we're pointing to
+ void set_duplicate_index(int duplicate_index) {
+ m_duplicate_index = duplicate_index;
+ }
+
+ // Uncouples the cursor
+ void uncouple_from_page(Context *context);
+
+ // Returns true if a cursor points to this btree key
+ bool points_to(Context *context, Page *page, int slot);
+
+ // Returns true if a cursor points to this external key
+ bool points_to(Context *context, ham_key_t *key);
+
+ // Moves the btree cursor to the next page
+ ham_status_t move_to_next_page(Context *context);
+
+ // Positions the cursor on a key and retrieves the record (if |record|
+ // is a valid pointer)
+ ham_status_t find(Context *context, ham_key_t *key, ByteArray *key_arena,
+ ham_record_t *record, ByteArray *record_arena,
+ uint32_t flags);
+
+ // Moves the cursor to the first, last, next or previous element
+ ham_status_t move(Context *context, ham_key_t *key, ByteArray *key_arena,
+ ham_record_t *record, ByteArray *record_arena,
+ uint32_t flags);
+
+ // Returns the number of records of the referenced key
+ int get_record_count(Context *context, uint32_t flags);
+
+ // Overwrite the record of this cursor
+ void overwrite(Context *context, ham_record_t *record, uint32_t flags);
+
+ // retrieves the record size of the current record
+ uint64_t get_record_size(Context *context);
+
+ // Closes the cursor
+ void close() {
+ set_to_nil();
+ }
+
+ // Uncouples all cursors from a page
+ // This method is called whenever the page is deleted or becomes invalid
+ static void uncouple_all_cursors(Context *context, Page *page,
+ int start = 0);
+
+ private:
+ // Sets the key we're pointing to - if the cursor is coupled. Also
+ // links the Cursor with |page| (and vice versa).
+ void couple_to_page(Page *page, uint32_t index);
+
+ // Removes this cursor from a page
+ void remove_cursor_from_page(Page *page);
+
+ // Couples the cursor to the current page/key
+ // Asserts that the cursor is uncoupled. After this call the cursor
+ // will be coupled.
+ void couple(Context *context);
+
+ // move cursor to the very first key
+ ham_status_t move_first(Context *context, uint32_t flags);
+
+ // move cursor to the very last key
+ ham_status_t move_last(Context *context, uint32_t flags);
+
+ // move cursor to the next key
+ ham_status_t move_next(Context *context, uint32_t flags);
+
+ // move cursor to the previous key
+ ham_status_t move_previous(Context *context, uint32_t flags);
+
+ // the parent cursor
+ Cursor *m_parent;
+
+ // The BtreeIndex instance
+ BtreeIndex *m_btree;
+
+ // "coupled" or "uncoupled" states; coupled means that the
+ // cursor points into a Page object, which is in
+ // memory. "uncoupled" means that the cursor has a copy
+ // of the key on which it points (i.e. because the coupled page was
+ // flushed to disk and removed from the cache)
+ int m_state;
+
+ // the id of the duplicate key to which this cursor is coupled
+ int m_duplicate_index;
+
+ // for coupled cursors: the page we're pointing to
+ Page *m_coupled_page;
+
+ // ... and the index of the key in that page
+ int m_coupled_index;
+
+ // for uncoupled cursors: a copy of the key at which we're pointing
+ ham_key_t m_uncoupled_key;
+
+ // a ByteArray which backs |m_uncoupled_key.data|
+ ByteArray m_uncoupled_arena;
+
+ // Linked list of cursors which point to the same page
+ BtreeCursor *m_next_in_page, *m_previous_in_page;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_CURSORS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_erase.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_erase.cc
new file mode 100644
index 0000000000..1222cac8fe
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_erase.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3page_manager/page_manager.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_update.h"
+#include "3btree/btree_node_proxy.h"
+#include "4db/db.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/*
+ * Erases key/value pairs from a btree
+ */
+class BtreeEraseAction : public BtreeUpdateAction
+{
+ public:
+ BtreeEraseAction(BtreeIndex *btree, Context *context, Cursor *cursor,
+ ham_key_t *key, int duplicate_index = 0, uint32_t flags = 0)
+ : BtreeUpdateAction(btree, context, cursor
+ ? cursor->get_btree_cursor()
+ : 0, duplicate_index),
+ m_key(key), m_flags(flags) {
+ if (m_cursor)
+ m_duplicate_index = m_cursor->get_duplicate_index() + 1;
+ }
+
+ // This is the entry point for the erase operation
+ ham_status_t run() {
+ // Coupled cursor: try to remove the key directly from the page
+ if (m_cursor) {
+ if (m_cursor->get_state() == BtreeCursor::kStateCoupled) {
+ Page *coupled_page;
+ int coupled_index;
+ m_cursor->get_coupled_key(&coupled_page, &coupled_index);
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(coupled_page);
+ ham_assert(node->is_leaf());
+
+ // Now try to delete the key. This can require a page split if the
+ // KeyList is not "delete-stable" (some compressed lists can
+ // grow when keys are deleted).
+ try {
+ remove_entry(coupled_page, 0, coupled_index);
+ }
+ catch (Exception &ex) {
+ if (ex.code != HAM_LIMITS_REACHED)
+ throw ex;
+ goto fall_through;
+ }
+ // TODO if the page is empty then ask the janitor to clean it up
+ return (0);
+
+fall_through:
+ m_cursor->uncouple_from_page(m_context);
+ }
+
+ if (m_cursor->get_state() == BtreeCursor::kStateUncoupled)
+ m_key = m_cursor->get_uncoupled_key();
+ }
+
+ return (erase());
+ }
+
+ private:
+ ham_status_t erase() {
+ // traverse the tree to the leaf, splitting/merging nodes as required
+ Page *parent;
+ BtreeStatistics::InsertHints hints;
+ Page *page = traverse_tree(m_key, hints, &parent);
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ // we have reached the leaf; search the leaf for the key
+ int slot = node->find_exact(m_context, m_key);
+ if (slot < 0) {
+ m_btree->get_statistics()->erase_failed();
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ // remove the key from the leaf
+ return (remove_entry(page, parent, slot));
+ }
+
+ ham_status_t remove_entry(Page *page, Page *parent, int slot) {
+ LocalDatabase *db = m_btree->get_db();
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ ham_assert(slot >= 0);
+ ham_assert(slot < (int)node->get_count());
+
+ // delete the record, but only on leaf nodes! internal nodes don't have
+ // records; they point to pages instead, and we do not want to delete
+ // those.
+ bool has_duplicates_left = false;
+ if (node->is_leaf()) {
+ // only delete a duplicate?
+ if (m_duplicate_index > 0)
+ node->erase_record(m_context, slot, m_duplicate_index - 1, false,
+ &has_duplicates_left);
+ else
+ node->erase_record(m_context, slot, 0, true, 0);
+ }
+
+ page->set_dirty(true);
+
+ // still got duplicates left? then adjust all cursors
+ if (node->is_leaf() && has_duplicates_left && db->cursor_list()) {
+ Cursor *cursors = db->cursor_list();
+ BtreeCursor *btcur = cursors->get_btree_cursor();
+
+ int duplicate_index =
+ m_cursor
+ ? m_cursor->get_duplicate_index()
+ : m_duplicate_index;
+
+ while (btcur) {
+ BtreeCursor *next = 0;
+ if (cursors->get_next()) {
+ cursors = cursors->get_next();
+ next = cursors->get_btree_cursor();
+ }
+
+ if (btcur != m_cursor && btcur->points_to(m_context, page, slot)) {
+ if (btcur->get_duplicate_index() == duplicate_index)
+ btcur->set_to_nil();
+ else if (btcur->get_duplicate_index() > duplicate_index)
+ btcur->set_duplicate_index(btcur->get_duplicate_index() - 1);
+ }
+ btcur = next;
+ }
+ // all cursors were adjusted, the duplicate was deleted. return
+ // to caller!
+ return (0);
+ }
+
+ // no duplicates left, the key was deleted; all cursors pointing to
+ // this key are set to nil, all cursors pointing to a key in the same
+ // page are adjusted, if necessary
+ if (node->is_leaf() && !has_duplicates_left && db->cursor_list()) {
+ Cursor *cursors = db->cursor_list();
+ BtreeCursor *btcur = cursors->get_btree_cursor();
+
+ /* 'nil' every cursor which points to the deleted key, and adjust
+ * other cursors attached to the same page */
+ while (btcur) {
+ BtreeCursor *cur = btcur;
+ BtreeCursor *next = 0;
+ if (cursors->get_next()) {
+ cursors = cursors->get_next();
+ next = cursors->get_btree_cursor();
+ }
+ if (btcur != m_cursor && cur->points_to(m_context, page, slot))
+ cur->set_to_nil();
+ else if (btcur != m_cursor
+ && (cur->get_state() & BtreeCursor::kStateCoupled)) {
+ Page *coupled_page;
+ int coupled_slot;
+ cur->get_coupled_key(&coupled_page, &coupled_slot);
+ if (coupled_page == page && coupled_slot > slot)
+ cur->uncouple_from_page(m_context);
+ }
+ btcur = next;
+ }
+ }
+
+ if (has_duplicates_left)
+ return (0);
+
+ // We've reached the leaf; it's still possible that we have to
+ // split the page, therefore this case has to be handled
+ try {
+ node->erase(m_context, slot);
+ }
+ catch (Exception &ex) {
+ if (ex.code != HAM_LIMITS_REACHED)
+ throw ex;
+
+ // Split the page in the middle. This will invalidate the |node| pointer
+ // and the |slot| of the key, therefore restart the whole operation
+ BtreeStatistics::InsertHints hints = {0};
+ split_page(page, parent, m_key, hints);
+ return (erase());
+ }
+
+ return (0);
+ }
+
+ // the key that is retrieved
+ ham_key_t *m_key;
+
+ // flags of ham_db_erase()
+ uint32_t m_flags;
+};
+
+ham_status_t
+BtreeIndex::erase(Context *context, Cursor *cursor, ham_key_t *key,
+ int duplicate, uint32_t flags)
+{
+ context->db = get_db();
+
+ BtreeEraseAction bea(this, context, cursor, key, duplicate, flags);
+ return (bea.run());
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_find.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_find.cc
new file mode 100644
index 0000000000..05c99b5818
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_find.cc
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree searching
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_node_proxy.h"
+#include "3page_manager/page_manager.h"
+#include "4cursor/cursor.h"
+#include "4db/db.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class BtreeFindAction
+{
+ public:
+ BtreeFindAction(BtreeIndex *btree, Context *context, Cursor *cursor,
+ ham_key_t *key, ByteArray *key_arena,
+ ham_record_t *record, ByteArray *record_arena,
+ uint32_t flags)
+ : m_btree(btree), m_context(context), m_cursor(0), m_key(key),
+ m_record(record), m_flags(flags), m_key_arena(key_arena),
+ m_record_arena(record_arena) {
+ if (cursor && cursor->get_btree_cursor()->get_parent())
+ m_cursor = cursor->get_btree_cursor();
+ }
+
+ ham_status_t run() {
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+ Page *page = 0;
+ int slot = -1;
+ BtreeNodeProxy *node = 0;
+
+ BtreeStatistics *stats = m_btree->get_statistics();
+ BtreeStatistics::FindHints hints = stats->get_find_hints(m_flags);
+
+ if (hints.try_fast_track) {
+ /*
+ * see if we get a sure hit within this btree leaf; if not, revert to
+ * regular scan
+ *
+ * As this is a speed-improvement hint re-using recent material, the
+ * page should still sit in the cache, or we're using old info, which
+ * should be discarded.
+ */
+ page = env->page_manager()->fetch(m_context, hints.leaf_page_addr,
+ PageManager::kOnlyFromCache
+ | PageManager::kReadOnly);
+ if (page) {
+ node = m_btree->get_node_from_page(page);
+ ham_assert(node->is_leaf());
+
+ uint32_t approx_match;
+ slot = m_btree->find_leaf(m_context, page, m_key, m_flags,
+ &approx_match);
+
+ /*
+ * if we didn't hit a match OR a match at either edge, FAIL.
+ * A match at one of the edges is very risky, as this can also
+ * signal a match far away from the current node, so we need
+ * the full tree traversal then.
+ */
+ if (approx_match || slot <= 0 || slot >= (int)node->get_count() - 1)
+ slot = -1;
+
+ /* fall through */
+ }
+ }
+
+ uint32_t approx_match = 0;
+
+ if (slot == -1) {
+ /* load the root page */
+ page = env->page_manager()->fetch(m_context,
+ m_btree->get_root_address(), PageManager::kReadOnly);
+
+ /* now traverse the root to the leaf nodes till we find a leaf */
+ node = m_btree->get_node_from_page(page);
+ while (!node->is_leaf()) {
+ page = m_btree->find_child(m_context, page, m_key,
+ PageManager::kReadOnly, 0);
+ if (!page) {
+ stats->find_failed();
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ node = m_btree->get_node_from_page(page);
+ }
+
+ /* check the leaf page for the key (shortcut w/o approx. matching) */
+ if (m_flags == 0) {
+ slot = node->find_exact(m_context, m_key);
+ if (slot == -1) {
+ stats->find_failed();
+ return (HAM_KEY_NOT_FOUND);
+ }
+ }
+
+ /* check the leaf page for the key (long path w/ approx. matching),
+ * then fall through */
+ slot = m_btree->find_leaf(m_context, page, m_key, m_flags,
+ &approx_match);
+ }
+
+ if (slot == -1) {
+ // find the left sibling
+ if (node->get_left() > 0) {
+ page = env->page_manager()->fetch(m_context, node->get_left(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ slot = node->get_count() - 1;
+ approx_match = BtreeKey::kLower;
+ }
+ }
+
+ else if (slot >= (int)node->get_count()) {
+ // find the right sibling
+ if (node->get_right() > 0) {
+ page = env->page_manager()->fetch(m_context, node->get_right(),
+ PageManager::kReadOnly);
+ node = m_btree->get_node_from_page(page);
+ slot = 0;
+ approx_match = BtreeKey::kGreater;
+ }
+ else
+ slot = -1;
+ }
+
+ if (slot < 0) {
+ stats->find_failed();
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ ham_assert(node->is_leaf());
+
+ /* set the cursor-position to this key */
+ if (m_cursor) {
+ m_cursor->couple_to_page(page, slot, 0);
+ }
+
+ /* approx. match: patch the key flags */
+ if (approx_match) {
+ ham_key_set_intflags(m_key, approx_match);
+ }
+
+ /* no need to load the key if we have an exact match, or if KEY_DONT_LOAD
+ * is set: */
+ if (m_key && approx_match && !(m_flags & Cursor::kSyncDontLoadKey)) {
+ node->get_key(m_context, slot, m_key_arena, m_key);
+ }
+
+ if (m_record) {
+ node->get_record(m_context, slot, m_record_arena, m_record, m_flags);
+ }
+
+ return (0);
+ }
+
+ private:
+ // the current btree
+ BtreeIndex *m_btree;
+
+ // The caller's Context
+ Context *m_context;
+
+ // the current cursor
+ BtreeCursor *m_cursor;
+
+ // the key that is retrieved
+ ham_key_t *m_key;
+
+ // the record that is retrieved
+ ham_record_t *m_record;
+
+ // flags of ham_db_find()
+ uint32_t m_flags;
+
+ // allocator for the key data
+ ByteArray *m_key_arena;
+
+ // allocator for the record data
+ ByteArray *m_record_arena;
+};
+
+ham_status_t
+BtreeIndex::find(Context *context, Cursor *cursor, ham_key_t *key,
+ ByteArray *key_arena, ham_record_t *record,
+ ByteArray *record_arena, uint32_t flags)
+{
+ BtreeFindAction bfa(this, context, cursor, key, key_arena, record,
+ record_arena, flags);
+ return (bfa.run());
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_flags.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_flags.h
new file mode 100644
index 0000000000..e0d77d8ae0
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_flags.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_FLAGS_H
+#define HAM_BTREE_FLAGS_H
+
+#include "0root/root.h"
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// A helper class wrapping key-related constants into a common namespace.
+// This class does not contain any logic.
+//
+struct BtreeKey
+{
+ // persisted btree key flags; also used in combination with ham_key_t._flags
+ enum {
+ // key is extended with overflow area
+ kExtendedKey = 0x01,
+
+ // PRO: key is compressed; the original size is stored in the payload
+ kCompressed = 0x08
+ };
+
+ // flags used with the ham_key_t::_flags (note the underscore - this
+ // field is for INTERNAL USE!)
+ //
+ // Note: these flags should NOT overlap with the persisted flags above!
+ //
+ // As these flags NEVER will be persisted, they should be located outside
+ // the range of a uint16_t, i.e. outside the mask 0x0000ffff.
+ enum {
+ // Actual key is lower than the requested key
+ kLower = 0x00010000,
+
+ // Actual key is greater than the requested key
+ kGreater = 0x00020000,
+
+ // Actual key is an "approximate match"
+ kApproximate = (kLower | kGreater)
+ };
+};
+
+//
+// A helper class wrapping record-related constants into a common namespace.
+// This class does not contain any logic.
+//
+struct BtreeRecord
+{
+ enum {
+ // record size < 8; length is encoded at byte[7] of key->ptr
+ kBlobSizeTiny = 0x01,
+
+ // record size == 8; record is stored in key->ptr
+ kBlobSizeSmall = 0x02,
+
+ // record size == 0; key->ptr == 0
+ kBlobSizeEmpty = 0x04,
+
+ // key has duplicates in an overflow area; this is the msb of 1 byte;
+ // the lower bits are the counter for the inline duplicate list
+ kExtendedDuplicates = 0x80
+ };
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_FLAGS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_base.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_base.h
new file mode 100644
index 0000000000..d75d2a7be2
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_base.h
@@ -0,0 +1,475 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Base class for btree node implementations
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_IMPL_BASE_H
+#define HAM_BTREE_IMPL_BASE_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_keys_base.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+
+template<typename KeyList, typename RecordList>
+class BaseNodeImpl
+{
+ public:
+ // Constructor
+ BaseNodeImpl(Page *page)
+ : m_page(page), m_node(PBtreeNode::from_page(page)),
+ m_estimated_capacity(0), m_keys(page->get_db()),
+ m_records(page->get_db(), m_node) {
+ }
+
+ // Returns the estimated page's capacity
+ size_t estimate_capacity() const {
+ return (m_estimated_capacity);
+ }
+
+ // Checks this node's integrity
+ virtual void check_integrity(Context *context) const {
+ }
+
+ // Returns a copy of a key and stores it in |dest|
+ void get_key(Context *context, int slot, ByteArray *arena,
+ ham_key_t *dest) {
+ // copy (or assign) the key data
+ m_keys.get_key(context, slot, arena, dest, true);
+ }
+
+ // Returns the record size of a key or one of its duplicates
+ uint64_t get_record_size(Context *context, int slot, int duplicate_index) {
+ return (m_records.get_record_size(context, slot, duplicate_index));
+ }
+
+ // Returns the record counter of a key
+ int get_record_count(Context *context, int slot) {
+ return (m_records.get_record_count(context, slot));
+ }
+
+ // Returns the full record and stores it in |dest|
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags, int duplicate_index) {
+ // copy the record data
+ m_records.get_record(context, slot, arena, record,
+ flags, duplicate_index);
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, ham_record_t *record,
+ int duplicate_index, uint32_t flags,
+ uint32_t *new_duplicate_index) {
+ // automatically overwrite an existing key unless this is a
+ // duplicate operation
+ if ((flags & (HAM_DUPLICATE
+ | HAM_DUPLICATE
+ | HAM_DUPLICATE_INSERT_BEFORE
+ | HAM_DUPLICATE_INSERT_AFTER
+ | HAM_DUPLICATE_INSERT_FIRST
+ | HAM_DUPLICATE_INSERT_LAST)) == 0)
+ flags |= HAM_OVERWRITE;
+
+ m_records.set_record(context, slot, duplicate_index, record, flags,
+ new_duplicate_index);
+ }
+
+ // Erases the extended part of a key
+ void erase_extended_key(Context *context, int slot) {
+ m_keys.erase_extended_key(context, slot);
+ }
+
+ // Erases the record
+ void erase_record(Context *context, int slot, int duplicate_index,
+ bool all_duplicates) {
+ m_records.erase_record(context, slot, duplicate_index, all_duplicates);
+ }
+
+ // Erases a key
+ void erase(Context *context, int slot) {
+ size_t node_count = m_node->get_count();
+
+ m_keys.erase(context, node_count, slot);
+ m_records.erase(context, node_count, slot);
+ }
+
+ // Inserts a new key
+ //
+ // Most KeyLists first calculate the slot of the new key, then insert
+ // the key at this slot. Both operations are separate from each other.
+ // However, compressed KeyLists can overwrite this behaviour and
+ // combine both calls into one to save performance.
+ template<typename Cmp>
+ PBtreeNode::InsertResult insert(Context *context, ham_key_t *key,
+ uint32_t flags, Cmp &comparator) {
+ PBtreeNode::InsertResult result(0, 0);
+ size_t node_count = m_node->get_count();
+
+ if (node_count == 0)
+ result.slot = 0;
+ else if (flags & PBtreeNode::kInsertPrepend)
+ result.slot = 0;
+ else if (flags & PBtreeNode::kInsertAppend)
+ result.slot = node_count;
+ else {
+ int cmp;
+ result.slot = find_lowerbound_impl(context, key, comparator, &cmp);
+
+ /* insert the new key at the beginning? */
+ if (result.slot == -1) {
+ result.slot = 0;
+ ham_assert(cmp != 0);
+ }
+ /* key exists already */
+ else if (cmp == 0) {
+ result.status = HAM_DUPLICATE_KEY;
+ return (result);
+ }
+ /* if the new key is > than the slot key: move to the next slot */
+ else if (cmp > 0)
+ result.slot++;
+ }
+
+ // Uncouple the cursors.
+ //
+ // for custom inserts we have to uncouple all cursors, because the
+ // KeyList doesn't have access to the cursors in the page. In this
+ // case result.slot is 0.
+ if ((int)node_count > result.slot)
+ BtreeCursor::uncouple_all_cursors(context, m_page, result.slot);
+
+ // make space for 1 additional element.
+ // only store the key data; flags and record IDs are set by the caller
+ result = m_keys.insert(context, node_count, key, flags, comparator,
+ result.slot);
+ m_records.insert(context, node_count, result.slot);
+ return (result);
+ }
+
+ // Compares two keys using the supplied comparator
+ template<typename Cmp>
+ int compare(Context *context, const ham_key_t *lhs,
+ uint32_t rhs, Cmp &cmp) {
+ if (KeyList::kHasSequentialData) {
+ return (cmp(lhs->data, lhs->size, m_keys.get_key_data(rhs),
+ m_keys.get_key_size(rhs)));
+ }
+ else {
+ ham_key_t tmp = {0};
+ m_keys.get_key(context, rhs, &m_arena, &tmp, false);
+ return (cmp(lhs->data, lhs->size, tmp.data, tmp.size));
+ }
+ }
+
+ // Searches the node for the key and returns the slot of this key
+ template<typename Cmp>
+ int find_child(Context *context, ham_key_t *key, Cmp &comparator,
+ uint64_t *precord_id, int *pcmp) {
+ int slot = find_lowerbound_impl(context, key, comparator, pcmp);
+ if (precord_id) {
+ if (slot == -1)
+ *precord_id = m_node->get_ptr_down();
+ else
+ *precord_id = m_records.get_record_id(slot);
+ }
+ return (slot);
+ }
+
+ // Searches the node for the key and returns the slot of this key
+ // - only for exact matches!
+ template<typename Cmp>
+ int find_exact(Context *context, ham_key_t *key, Cmp &comparator) {
+ int cmp = 0;
+ int r = find_exact_impl(context, key, comparator, &cmp);
+ return (cmp ? -1 : r);
+ }
+
+ // Splits a node and moves parts of the current node into |other|, starting
+ // at the |pivot| slot
+ void split(Context *context, BaseNodeImpl<KeyList, RecordList> *other,
+ int pivot) {
+ size_t node_count = m_node->get_count();
+ size_t other_node_count = other->m_node->get_count();
+
+ //
+ // if a leaf page is split then the pivot element must be inserted in
+ // the leaf page AND in the internal node. the internal node update
+ // is handled by the caller.
+ //
+ // in internal nodes the pivot element is only propagated to the
+ // parent node. the pivot element is skipped.
+ //
+ if (m_node->is_leaf()) {
+ m_keys.copy_to(pivot, node_count, other->m_keys,
+ other_node_count, 0);
+ m_records.copy_to(pivot, node_count, other->m_records,
+ other_node_count, 0);
+ }
+ else {
+ m_keys.copy_to(pivot + 1, node_count, other->m_keys,
+ other_node_count, 0);
+ m_records.copy_to(pivot + 1, node_count, other->m_records,
+ other_node_count, 0);
+ }
+ }
+
+ // Returns true if the node requires a merge or a shift
+ bool requires_merge() const {
+ return (m_node->get_count() <= 3);
+ }
+
+ // Merges this node with the |other| node
+ void merge_from(Context *context,
+ BaseNodeImpl<KeyList, RecordList> *other) {
+ size_t node_count = m_node->get_count();
+ size_t other_node_count = other->m_node->get_count();
+
+ // shift items from the sibling to this page
+ if (other_node_count > 0) {
+ other->m_keys.copy_to(0, other_node_count, m_keys,
+ node_count, node_count);
+ other->m_records.copy_to(0, other_node_count, m_records,
+ node_count, node_count);
+ }
+ }
+
+ // Reorganize this node; re-arranges capacities of KeyList and RecordList
+ // in order to free space and avoid splits
+ bool reorganize(Context *context, const ham_key_t *key) const {
+ return (false);
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ metrics->number_of_pages++;
+ metrics->number_of_keys += node_count;
+
+ BtreeStatistics::update_min_max_avg(&metrics->keys_per_page, node_count);
+
+ m_keys.fill_metrics(metrics, node_count);
+ m_records.fill_metrics(metrics, node_count);
+ }
+
+ // Prints a slot to stdout (for debugging)
+ void print(Context *context, int slot) {
+ std::stringstream ss;
+ ss << " ";
+ m_keys.print(context, slot, ss);
+ ss << " -> ";
+ m_records.print(context, slot, ss);
+ std::cout << ss.str() << std::endl;
+ }
+
+ // Returns the record id
+ uint64_t get_record_id(Context *context, int slot) const {
+ return (m_records.get_record_id(slot));
+ }
+
+ // Sets the record id
+ void set_record_id(Context *context, int slot, uint64_t ptr) {
+ m_records.set_record_id(slot, ptr);
+ }
+
+ // The page we're operating on
+ Page *m_page;
+
+ // The node we're operating on
+ PBtreeNode *m_node;
+
+ // Capacity of this node (maximum number of key/record pairs that
+ // can be stored)
+ size_t m_estimated_capacity;
+
+ // for accessing the keys
+ KeyList m_keys;
+
+ // for accessing the records
+ RecordList m_records;
+
+ private:
+ // Implementation of the find method for lower-bound matches. If there
+ // is no exact match then the lower bound is returned, and the compare value
+ // is returned in |*pcmp|.
+ template<typename Cmp>
+ int find_lowerbound_impl(Context *context, const ham_key_t *key,
+ Cmp &comparator, int *pcmp) {
+ switch ((int)KeyList::kSearchImplementation) {
+ case BaseKeyList::kBinaryLinear:
+ return (find_impl_binlin(context, key, comparator, pcmp));
+ case BaseKeyList::kCustomSearch:
+ return (m_keys.find(context, m_node->get_count(), key,
+ comparator, pcmp));
+ default: // BaseKeyList::kBinarySearch
+ return (find_impl_binary(context, key, comparator, pcmp));
+ }
+ }
+
+ // Implementation of the find method for exact matches. Supports a custom
+ // search implementation in the KeyList (i.e. for SIMD).
+ template<typename Cmp>
+ int find_exact_impl(Context *context, const ham_key_t *key,
+ Cmp &comparator, int *pcmp) {
+ switch ((int)KeyList::kSearchImplementation) {
+ case BaseKeyList::kBinaryLinear:
+ return (find_impl_binlin(context, key, comparator, pcmp));
+ case BaseKeyList::kCustomSearch:
+ case BaseKeyList::kCustomExactImplementation:
+ return (m_keys.find(context, m_node->get_count(), key,
+ comparator, pcmp));
+ default: // BaseKeyList::kBinarySearch
+ return (find_impl_binary(context, key, comparator, pcmp));
+ }
+ }
+
+ // Binary search
+ template<typename Cmp>
+ int find_impl_binary(Context *context, const ham_key_t *key,
+ Cmp &comparator, int *pcmp) {
+ size_t node_count = m_node->get_count();
+ ham_assert(node_count > 0);
+
+ int i, l = 0, r = (int)node_count;
+ int last = node_count + 1;
+ int cmp = -1;
+
+ /* repeat till we found the key or the remaining range is so small that
+ * we rather perform a linear search (which is faster for small ranges) */
+ while (r - l > 0) {
+ /* get the median item; if it's identical with the "last" item,
+ * we've found the slot */
+ i = (l + r) / 2;
+
+ if (i == last) {
+ ham_assert(i >= 0);
+ ham_assert(i < (int)node_count);
+ *pcmp = 1;
+ return (i);
+ }
+
+ /* compare it against the key */
+ cmp = compare(context, key, i, comparator);
+
+ /* found it? */
+ if (cmp == 0) {
+ *pcmp = cmp;
+ return (i);
+ }
+ /* if the key is bigger than the item: search "to the left" */
+ else if (cmp < 0) {
+ if (r == 0) {
+ ham_assert(i == 0);
+ *pcmp = cmp;
+ return (-1);
+ }
+ r = i;
+ }
+ /* otherwise search "to the right" */
+ else {
+ last = i;
+ l = i;
+ }
+ }
+
+ *pcmp = cmp;
+ return (-1);
+ }
+
+ // Binary search combined with linear search
+ template<typename Cmp>
+ int find_impl_binlin(Context *context, const ham_key_t *key,
+ Cmp &comparator, int *pcmp) {
+ size_t node_count = m_node->get_count();
+ ham_assert(node_count > 0);
+
+ int i, l = 0, r = (int)node_count;
+ int last = node_count + 1;
+ int cmp = -1;
+
+ // Run a binary search, but fall back to linear search as soon as
+ // the remaining range is too small. Sets threshold to 0 if linear
+ // search is disabled for this KeyList.
+ int threshold = m_keys.get_linear_search_threshold();
+
+ /* repeat till we found the key or the remaining range is so small that
+ * we rather perform a linear search (which is faster for small ranges) */
+ while (r - l > threshold) {
+ /* get the median item; if it's identical with the "last" item,
+ * we've found the slot */
+ i = (l + r) / 2;
+
+ if (i == last) {
+ ham_assert(i >= 0);
+ ham_assert(i < (int)node_count);
+ *pcmp = 1;
+ return (i);
+ }
+
+ /* compare it against the key */
+ cmp = compare(context, key, i, comparator);
+
+ /* found it? */
+ if (cmp == 0) {
+ *pcmp = cmp;
+ return (i);
+ }
+ /* if the key is bigger than the item: search "to the left" */
+ else if (cmp < 0) {
+ if (r == 0) {
+ ham_assert(i == 0);
+ *pcmp = cmp;
+ return (-1);
+ }
+ r = i;
+ }
+ /* otherwise search "to the right" */
+ else {
+ last = i;
+ l = i;
+ }
+ }
+
+ // still here? then perform a linear search for the remaining range
+ ham_assert(r - l <= threshold);
+ return (m_keys.linear_search(l, r - l, key, comparator, pcmp));
+ }
+
+ // A memory arena for various tasks
+ ByteArray m_arena;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_IMPL_BASE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_default.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_default.h
new file mode 100644
index 0000000000..0e7e5618cc
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_default.h
@@ -0,0 +1,532 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Btree node layout for variable length keys/records and/or duplicates
+ * ====================================================================
+ *
+ * This is the default hamsterdb layout. It is chosen for
+ * 1. variable length keys (with or without duplicates)
+ * 2. fixed length keys with duplicates
+ *
+ * Like the PAX layout implemented in btree_impl_pax.h, the layout implemented
+ * here stores key data and records separated from each other. This layout is
+ * more complex, because it is capable of resizing the KeyList and RecordList
+ * if the node becomes full.
+ *
+ * The flat memory layout looks like this:
+ *
+ * |Idx1|Idx2|...|Idxn|F1|F2|...|Fn|...(space)...|Key1|Key2|...|Keyn|
+ *
+ * ... where Idx<n> are the indices (of slot <n>)
+ * where F<n> are freelist entries
+ * where Key<n> is the key data of slot <n>.
+ *
+ * In addition, the first few bytes in the node store the following
+ * information:
+ * 0 (4 bytes): total capacity of index keys (used keys + freelist)
+ * 4 (4 bytes): number of used freelist entries
+ * 8 (4 bytes): offset for the next key at the end of the page
+ *
+ * In total, |capacity| contains the number of maximum keys (and index
+ * entries) that can be stored in the node. The number of used index keys
+ * is in |m_node->get_count()|. The number of used freelist entries is
+ * returned by |get_freelist_count()|. The freelist indices start directly
+ * after the key indices. The key space (with key data and records) starts at
+ * N * capacity, where |N| is the size of an index entry (the size depends
+ * on the actual btree configuration, i.e. whether key size is fixed,
+ * duplicates are used etc).
+ *
+ * If records have fixed length then all records of a key (with duplicates)
+ * are stored next to each other. If they have variable length then each of
+ * these records is stored with 1 byte for flags:
+ * Rec1|F1|Rec2|F2|...
+ * where Recn is an 8 bytes record-ID (offset in the file) OR inline record,
+ * and F1 is 1 byte for flags (kBlobSizeSmall etc).
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_IMPL_DEFAULT_H
+#define HAM_BTREE_IMPL_DEFAULT_H
+
+#include "0root/root.h"
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <map>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_impl_base.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_visitor.h"
+#include "4env/env_local.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// A BtreeNodeProxy layout which can handle...
+//
+// 1. fixed length keys w/ duplicates
+// 2. variable length keys w/ duplicates
+// 3. variable length keys w/o duplicates
+//
+// Fixed length keys are stored sequentially and reuse the layout from pax.
+// Same for the distinct RecordList (if duplicates are disabled).
+//
+template<typename KeyList, typename RecordList>
+class DefaultNodeImpl : public BaseNodeImpl<KeyList, RecordList>
+{
+ // C++ does not allow access to members of base classes unless they're
+ // explicitly named; this typedef helps to make the code "less" ugly,
+ // but it still sucks that i have to use it
+ //
+ // http://stackoverflow.com/questions/1120833/derived-template-class-access-to-base-class-member-data
+ typedef BaseNodeImpl<KeyList, RecordList> P;
+
+ // the type of |this| object
+ typedef DefaultNodeImpl<KeyList, RecordList> NodeType;
+
+ enum {
+ // for capacity
+ kPayloadOffset = 4
+ };
+
+ public:
+ // Constructor
+ DefaultNodeImpl(Page *page)
+ : BaseNodeImpl<KeyList, RecordList>(page) {
+ initialize();
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ virtual void check_integrity(Context *context) const {
+ size_t node_count = P::m_node->get_count();
+ if (node_count == 0)
+ return;
+
+ check_index_integrity(context, node_count);
+ }
+
+ // Iterates all keys, calls the |visitor| on each
+ void scan(Context *context, ScanVisitor *visitor, uint32_t start,
+ bool distinct) {
+#ifdef HAM_DEBUG
+ check_index_integrity(context, P::m_node->get_count());
+#endif
+
+ // a distinct scan over fixed-length keys can be moved to the KeyList
+ if (KeyList::kSupportsBlockScans && distinct) {
+ P::m_keys.scan(context, visitor, start, P::m_node->get_count() - start);
+ return;
+ }
+
+ // otherwise iterate over the keys, call visitor for each key
+ ham_key_t key = {0};
+ ByteArray arena;
+ size_t node_count = P::m_node->get_count() - start;
+
+ for (size_t i = start; i < node_count; i++) {
+ P::m_keys.get_key(context, i, &arena, &key, false);
+ (*visitor)(key.data, key.size, distinct
+ ? 1
+ : P::get_record_count(context, i));
+ }
+ }
+
+ // Returns the full record and stores it in |dest|
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags, int duplicate_index) {
+#ifdef HAM_DEBUG
+ check_index_integrity(context, P::m_node->get_count());
+#endif
+ P::get_record(context, slot, arena, record, flags, duplicate_index);
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, ham_record_t *record,
+ int duplicate_index, uint32_t flags,
+ uint32_t *new_duplicate_index) {
+ P::set_record(context, slot, record, duplicate_index,
+ flags, new_duplicate_index);
+#ifdef HAM_DEBUG
+ check_index_integrity(context, P::m_node->get_count());
+#endif
+ }
+
+ // Erases the record
+ void erase_record(Context *context, int slot, int duplicate_index,
+ bool all_duplicates) {
+ P::erase_record(context, slot, duplicate_index, all_duplicates);
+#ifdef HAM_DEBUG
+ check_index_integrity(context, P::m_node->get_count());
+#endif
+ }
+
+ // Erases a key
+ void erase(Context *context, int slot) {
+ P::erase(context, slot);
+#ifdef HAM_DEBUG
+ check_index_integrity(context, P::m_node->get_count() - 1);
+#endif
+ }
+
+ // Returns true if |key| cannot be inserted because a split is required.
+ // This function will try to re-arrange the node in order for the new
+ // key to fit in.
+ bool requires_split(Context *context, const ham_key_t *key) {
+ size_t node_count = P::m_node->get_count();
+
+ // the node is empty? that's either because nothing was inserted yet,
+ // or because all keys were erased. For the latter case make sure
+ // that no garbage remains behind, otherwise it's possible that
+ // following inserts can fail
+ if (node_count == 0) {
+ P::m_records.vacuumize(node_count, true);
+ P::m_keys.vacuumize(node_count, true);
+ return (false);
+ }
+
+ bool keys_require_split = P::m_keys.requires_split(node_count, key);
+ bool records_require_split = P::m_records.requires_split(node_count);
+ if (!keys_require_split && !records_require_split)
+ return (false);
+
+ // first try to vaccumize the lists without rearranging them
+ if (keys_require_split) {
+ P::m_keys.vacuumize(node_count, false);
+ keys_require_split = P::m_keys.requires_split(node_count, key);
+ }
+
+ if (records_require_split) {
+ P::m_records.vacuumize(node_count, false);
+ records_require_split = P::m_records.requires_split(node_count);
+ }
+
+ if (!keys_require_split && !records_require_split)
+ return (false);
+
+ // now adjust the ranges and the capacity
+ if (reorganize(context, key)) {
+#ifdef HAM_DEBUG
+ check_index_integrity(context, node_count);
+#endif
+ return (false);
+ }
+
+#ifdef HAM_DEBUG
+ check_index_integrity(context, node_count);
+#endif
+
+ // still here? then there's no way to avoid the split
+ BtreeIndex *bi = P::m_page->get_db()->btree_index();
+ bi->get_statistics()->set_keylist_range_size(P::m_node->is_leaf(),
+ load_range_size());
+ bi->get_statistics()->set_keylist_capacities(P::m_node->is_leaf(),
+ node_count);
+ return (true);
+ }
+
+ // Splits this node and moves some/half of the keys to |other|
+ void split(Context *context, DefaultNodeImpl *other, int pivot) {
+ size_t node_count = P::m_node->get_count();
+
+#ifdef HAM_DEBUG
+ check_index_integrity(context, node_count);
+ ham_assert(other->m_node->get_count() == 0);
+#endif
+
+ // make sure that the other node has enough free space
+ other->initialize(this);
+
+ P::split(context, other, pivot);
+
+ P::m_keys.vacuumize(pivot, true);
+ P::m_records.vacuumize(pivot, true);
+
+#ifdef HAM_DEBUG
+ check_index_integrity(context, pivot);
+ if (P::m_node->is_leaf())
+ other->check_index_integrity(context, node_count - pivot);
+ else
+ other->check_index_integrity(context, node_count - pivot - 1);
+#endif
+ }
+
+ // Merges keys from |other| to this node
+ void merge_from(Context *context, DefaultNodeImpl *other) {
+ size_t node_count = P::m_node->get_count();
+
+ P::m_keys.vacuumize(node_count, true);
+ P::m_records.vacuumize(node_count, true);
+
+ P::merge_from(context, other);
+
+#ifdef HAM_DEBUG
+ check_index_integrity(context, node_count + other->m_node->get_count());
+#endif
+ }
+
+ // Adjusts the size of both lists; either increases it or decreases
+ // it (in order to free up space for variable length data).
+ // Returns true if |key| and an additional record can be inserted, or
+ // false if not; in this case the caller must perform a split.
+ bool reorganize(Context *context, const ham_key_t *key) {
+ size_t node_count = P::m_node->get_count();
+
+ // One of the lists must be resizable (otherwise they would be managed
+ // by the PaxLayout)
+ ham_assert(!KeyList::kHasSequentialData
+ || !RecordList::kHasSequentialData);
+
+ // Retrieve the minimum sizes that both lists require to store their
+ // data
+ size_t capacity_hint;
+ size_t old_key_range_size = load_range_size();
+ size_t key_range_size, record_range_size;
+ size_t required_key_range, required_record_range;
+ size_t usable_size = usable_range_size();
+ required_key_range = P::m_keys.get_required_range_size(node_count)
+ + P::m_keys.get_full_key_size(key);
+ required_record_range = P::m_records.get_required_range_size(node_count)
+ + P::m_records.get_full_record_size();
+
+ uint8_t *p = P::m_node->get_data();
+ p += sizeof(uint32_t);
+
+ // no records? then there's no way to change the ranges. but maybe we
+ // can increase the capacity
+ if (required_record_range == 0) {
+ if (required_key_range > usable_size)
+ return (false);
+ P::m_keys.change_range_size(node_count, p, usable_size,
+ node_count + 5);
+ return (!P::m_keys.requires_split(node_count, key));
+ }
+
+ int remainder = usable_size
+ - (required_key_range + required_record_range);
+ if (remainder < 0)
+ return (false);
+
+ // Now split the remainder between both lists
+ size_t additional_capacity = remainder
+ / (P::m_keys.get_full_key_size(0) +
+ P::m_records.get_full_record_size());
+ if (additional_capacity == 0)
+ return (false);
+
+ key_range_size = required_key_range + additional_capacity
+ * P::m_keys.get_full_key_size(0);
+ record_range_size = usable_size - key_range_size;
+
+ ham_assert(key_range_size + record_range_size <= usable_size);
+
+ // Check if the required record space is large enough, and make sure
+ // there is enough room for a new item
+ if (key_range_size > usable_size
+ || record_range_size > usable_size
+ || key_range_size == old_key_range_size
+ || key_range_size < required_key_range
+ || record_range_size < required_record_range
+ || key_range_size + record_range_size > usable_size)
+ return (false);
+
+ capacity_hint = get_capacity_hint(key_range_size, record_range_size);
+
+ // sanity check: make sure that the new capacity would be big
+ // enough for all the keys
+ if (capacity_hint > 0 && capacity_hint < node_count)
+ return (false);
+
+ if (capacity_hint == 0) {
+ BtreeStatistics *bstats = P::m_page->get_db()->btree_index()->get_statistics();
+ capacity_hint = bstats->get_keylist_capacities(P::m_node->is_leaf());
+ }
+
+ if (capacity_hint < node_count)
+ capacity_hint = node_count + 1;
+
+ // Get a pointer to the data area and persist the new range size
+ // of the KeyList
+ store_range_size(key_range_size);
+
+ // Now update the lists. If the KeyList grows then start with resizing
+ // the RecordList, otherwise the moved KeyList will overwrite the
+ // beginning of the RecordList.
+ if (key_range_size > old_key_range_size) {
+ P::m_records.change_range_size(node_count, p + key_range_size,
+ usable_size - key_range_size,
+ capacity_hint);
+ P::m_keys.change_range_size(node_count, p, key_range_size,
+ capacity_hint);
+ }
+ // And vice versa if the RecordList grows
+ else {
+ P::m_keys.change_range_size(node_count, p, key_range_size,
+ capacity_hint);
+ P::m_records.change_range_size(node_count, p + key_range_size,
+ usable_size - key_range_size,
+ capacity_hint);
+ }
+
+ // make sure that the page is flushed to disk
+ P::m_page->set_dirty(true);
+
+#ifdef HAM_DEBUG
+ check_index_integrity(context, node_count);
+#endif
+
+ // finally check if the new space is sufficient for the new key
+ // TODO this shouldn't be required if the check above is implemented
+ // -> change to an assert, then return true
+ return (!P::m_records.requires_split(node_count)
+ && !P::m_keys.requires_split(node_count, key));
+ }
+
+ private:
+ // Initializes the node
+ void initialize(NodeType *other = 0) {
+ LocalDatabase *db = P::m_page->get_db();
+ size_t usable_size = usable_range_size();
+
+ // initialize this page in the same way as |other| was initialized
+ if (other) {
+ size_t key_range_size = other->load_range_size();
+
+ // persist the range size
+ store_range_size(key_range_size);
+ uint8_t *p = P::m_node->get_data();
+ p += sizeof(uint32_t);
+
+ // create the KeyList and RecordList
+ P::m_keys.create(p, key_range_size);
+ P::m_records.create(p + key_range_size,
+ usable_size - key_range_size);
+ }
+ // initialize a new page from scratch
+ else if ((P::m_node->get_count() == 0
+ && !(db->get_flags() & HAM_READ_ONLY))) {
+ size_t key_range_size;
+ size_t record_range_size;
+
+ // if yes then ask the btree for the default range size (it keeps
+ // track of the average range size of older pages).
+ BtreeStatistics *bstats = db->btree_index()->get_statistics();
+ key_range_size = bstats->get_keylist_range_size(P::m_node->is_leaf());
+
+ // no data so far? then come up with a good default
+ if (key_range_size == 0) {
+ // no records? then assign the full range to the KeyList
+ if (P::m_records.get_full_record_size() == 0) {
+ key_range_size = usable_size;
+ }
+ // Otherwise split the range between both lists
+ else {
+ size_t capacity = usable_size
+ / (P::m_keys.get_full_key_size(0) +
+ P::m_records.get_full_record_size());
+ key_range_size = capacity * P::m_keys.get_full_key_size(0);
+ }
+ }
+
+ record_range_size = usable_size - key_range_size;
+
+ ham_assert(key_range_size + record_range_size <= usable_size);
+
+ // persist the key range size
+ store_range_size(key_range_size);
+ uint8_t *p = P::m_node->get_data();
+ p += sizeof(uint32_t);
+
+ // and create the lists
+ P::m_keys.create(p, key_range_size);
+ P::m_records.create(p + key_range_size, record_range_size);
+
+ P::m_estimated_capacity = key_range_size
+ / (size_t)P::m_keys.get_full_key_size();
+ }
+ // open a page; read initialization parameters from persisted storage
+ else {
+ size_t key_range_size = load_range_size();
+ size_t record_range_size = usable_size - key_range_size;
+ uint8_t *p = P::m_node->get_data();
+ p += sizeof(uint32_t);
+
+ P::m_keys.open(p, key_range_size, P::m_node->get_count());
+ P::m_records.open(p + key_range_size, record_range_size,
+ P::m_node->get_count());
+
+ P::m_estimated_capacity = key_range_size
+ / (size_t)P::m_keys.get_full_key_size();
+ }
+ }
+
+ // Try to get a clue about the capacity of the lists; this will help
+ // those lists with an UpfrontIndex to better arrange their layout
+ size_t get_capacity_hint(size_t key_range_size, size_t record_range_size) {
+ if (KeyList::kHasSequentialData)
+ return (key_range_size / P::m_keys.get_full_key_size());
+ if (RecordList::kHasSequentialData && P::m_records.get_full_record_size())
+ return (record_range_size / P::m_records.get_full_record_size());
+ return (0);
+ }
+
+ // Checks the integrity of the key- and record-ranges. Throws an exception
+ // if there's a problem.
+ void check_index_integrity(Context *context, size_t node_count) const {
+ P::m_keys.check_integrity(context, node_count);
+ P::m_records.check_integrity(context, node_count);
+ }
+
+ // Returns the usable page size that can be used for actually
+ // storing the data
+ size_t usable_range_size() const {
+ return (Page::usable_page_size(P::m_page->get_db()->lenv()->config().page_size_bytes)
+ - kPayloadOffset
+ - PBtreeNode::get_entry_offset()
+ - sizeof(uint32_t));
+ }
+
+ // Persists the KeyList's range size
+ void store_range_size(size_t key_range_size) {
+ uint8_t *p = P::m_node->get_data();
+ *(uint32_t *)p = (uint32_t)key_range_size;
+ }
+
+ // Load the stored KeyList's range size
+ size_t load_range_size() const {
+ uint8_t *p = P::m_node->get_data();
+ return (*(uint32_t *)p);
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_IMPL_DEFAULT_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_pax.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_pax.h
new file mode 100644
index 0000000000..3a87f1c914
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_impl_pax.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Btree node layout for fixed length keys WITHOUT duplicates
+ * ==========================================================
+ *
+ * This layout supports fixed length keys and fixed length records. It does
+ * not support duplicates and extended keys. Keys and records are always
+ * inlined, but records can refer to blobs (in this case the "fixed length"
+ * record is the 8 byte record ID).
+ *
+ * Unlike the academic PAX paper, which stored multiple columns in one page,
+ * hamsterdb stores only one column (= database) in a page, but keys and
+ * records are separated from each other. The keys (flags + key data) are
+ * stored in the beginning of the page, the records start somewhere in the
+ * middle (the exact start position depends on key size, page size and other
+ * parameters).
+ *
+ * This layout's implementation is relatively simple because the offset
+ * of the key data and record data is easy to calculate since all keys
+ * and records have the same size.
+ *
+ * This separation of keys and records allows a more compact layout and a
+ * high density of the key data, which better exploits CPU caches and allows
+ * very tight loops when searching through the keys.
+ *
+ * This layout has two incarnations:
+ * 1. Fixed length keys, fixed length inline records
+ * -> does not require additional flags
+ * 2. Fixed length keys, variable length records (8 byte record id)
+ * -> requires a 1 byte flag per key
+ *
+ * The flat memory layout looks like this:
+ *
+ * |Flag1|Flag2|...|Flagn|...|Key1|Key2|...|Keyn|...|Rec1|Rec2|...|Recn|
+ *
+ * Flags are optional, as described above.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_IMPL_PAX_H
+#define HAM_BTREE_IMPL_PAX_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_impl_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// A BtreeNodeProxy layout which stores key data, key flags and
+// and the record pointers in a PAX style layout.
+//
+template<typename KeyList, typename RecordList>
+class PaxNodeImpl : public BaseNodeImpl<KeyList, RecordList>
+{
+ // C++ does not allow access to members of base classes unless they're
+ // explicitly named; this typedef helps to make the code "less" ugly,
+ // but it still sucks that i have to use it
+ //
+ // http://stackoverflow.com/questions/1120833/derived-template-class-access-to-base-class-member-data
+ typedef BaseNodeImpl<KeyList, RecordList> P;
+
+ public:
+ // Constructor
+ PaxNodeImpl(Page *page)
+ : BaseNodeImpl<KeyList, RecordList>(page) {
+ initialize();
+ }
+
+ // Iterates all keys, calls the |visitor| on each
+ void scan(Context *context, ScanVisitor *visitor, uint32_t start,
+ bool distinct) {
+ P::m_keys.scan(context, visitor, start, P::m_node->get_count() - start);
+ }
+
+ // Returns true if |key| cannot be inserted because a split is required
+ bool requires_split(Context *context, const ham_key_t *key) const {
+ return (P::m_node->get_count() >= P::m_estimated_capacity);
+ }
+
+ private:
+ void initialize() {
+ uint32_t usable_nodesize
+ = Page::usable_page_size(P::m_page->get_db()->lenv()->config().page_size_bytes)
+ - PBtreeNode::get_entry_offset();
+ size_t ks = P::m_keys.get_full_key_size();
+ size_t rs = P::m_records.get_full_record_size();
+ size_t capacity = usable_nodesize / (ks + rs);
+
+ uint8_t *p = P::m_node->get_data();
+ if (P::m_node->get_count() == 0) {
+ P::m_keys.create(&p[0], capacity * ks);
+ P::m_records.create(&p[capacity * ks], capacity * rs);
+ }
+ else {
+ size_t key_range_size = capacity * ks;
+ size_t record_range_size = capacity * rs;
+
+ P::m_keys.open(p, key_range_size, P::m_node->get_count());
+ P::m_records.open(p + key_range_size, record_range_size,
+ P::m_node->get_count());
+ }
+
+ P::m_estimated_capacity = capacity;
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_IMPL_PAX_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.cc
new file mode 100644
index 0000000000..a934ba441b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.cc
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "2page/page.h"
+#include "3page_manager/page_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_index_factory.h"
+#include "3btree/btree_node_proxy.h"
+#include "4db/db.h"
+#include "4env/env.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+uint64_t BtreeIndex::ms_btree_smo_split = 0;
+uint64_t BtreeIndex::ms_btree_smo_merge = 0;
+uint64_t BtreeIndex::ms_btree_smo_shift = 0;
+
+BtreeIndex::BtreeIndex(LocalDatabase *db, PBtreeHeader *btree_header,
+ uint32_t flags, uint32_t key_type, uint32_t key_size)
+ : m_db(db), m_key_size(0), m_key_type(key_type), m_rec_size(0),
+ m_btree_header(btree_header), m_flags(flags), m_root_address(0)
+{
+ m_leaf_traits = BtreeIndexFactory::create(db, flags, key_type,
+ key_size, true);
+ m_internal_traits = BtreeIndexFactory::create(db, flags, key_type,
+ key_size, false);
+}
+
+void
+BtreeIndex::create(Context *context, uint16_t key_type, uint32_t key_size,
+ uint32_t rec_size)
+{
+ ham_assert(key_size != 0);
+
+ /* allocate a new root page */
+ Page *root = m_db->lenv()->page_manager()->alloc(context,
+ Page::kTypeBroot, PageManager::kClearWithZero);
+
+ // initialize the new page
+ PBtreeNode *node = PBtreeNode::from_page(root);
+ node->set_flags(PBtreeNode::kLeafNode);
+
+ m_key_size = key_size;
+ m_key_type = key_type;
+ m_rec_size = rec_size;
+ m_root_address = root->get_address();
+
+ flush_descriptor(context);
+}
+
+void
+BtreeIndex::open()
+{
+ uint64_t rootadd;
+ uint16_t key_size;
+ uint16_t key_type;
+ uint32_t flags;
+ uint32_t rec_size;
+
+ key_size = m_btree_header->get_key_size();
+ key_type = m_btree_header->get_key_type();
+ rec_size = m_btree_header->get_record_size();
+ rootadd = m_btree_header->get_root_address();
+ flags = m_btree_header->get_flags();
+
+ ham_assert(key_size > 0);
+ ham_assert(rootadd > 0);
+
+ m_root_address = rootadd;
+ m_key_size = key_size;
+ m_key_type = key_type;
+ m_flags = flags;
+ m_rec_size = rec_size;
+}
+
+void
+BtreeIndex::set_record_compression(Context *context, int algo)
+{
+ m_btree_header->set_record_compression(algo);
+ flush_descriptor(context);
+}
+
+int
+BtreeIndex::get_record_compression()
+{
+ return (m_btree_header->get_record_compression());
+}
+
+void
+BtreeIndex::set_key_compression(Context *context, int algo)
+{
+ m_btree_header->set_key_compression(algo);
+ flush_descriptor(context);
+}
+
+int
+BtreeIndex::get_key_compression()
+{
+ return (m_btree_header->get_key_compression());
+}
+
+void
+BtreeIndex::flush_descriptor(Context *context)
+{
+ if (m_db->get_flags() & HAM_READ_ONLY)
+ return;
+
+ m_btree_header->set_dbname(m_db->name());
+ m_btree_header->set_key_size(get_key_size());
+ m_btree_header->set_rec_size(get_record_size());
+ m_btree_header->set_key_type(get_key_type());
+ m_btree_header->set_root_address(get_root_address());
+ m_btree_header->set_flags(get_flags());
+}
+
+Page *
+BtreeIndex::find_child(Context *context, Page *page, const ham_key_t *key,
+ uint32_t page_manager_flags, int *idxptr)
+{
+ BtreeNodeProxy *node = get_node_from_page(page);
+
+ // make sure that we're not in a leaf page, and that the
+ // page is not empty
+ ham_assert(node->get_ptr_down() != 0);
+
+ uint64_t record_id;
+ int slot = node->find_child(context, (ham_key_t *)key, &record_id);
+
+ if (idxptr)
+ *idxptr = slot;
+
+ return (m_db->lenv()->page_manager()->fetch(context,
+ record_id, page_manager_flags));
+}
+
+int
+BtreeIndex::find_leaf(Context *context, Page *page, ham_key_t *key,
+ uint32_t flags, uint32_t *approx_match)
+{
+ *approx_match = 0;
+
+ /* ensure the approx flag is NOT set by anyone yet */
+ BtreeNodeProxy *node = get_node_from_page(page);
+ if (node->get_count() == 0)
+ return (-1);
+
+ int cmp;
+ int slot = node->find_child(context, key, 0, &cmp);
+
+ /* successfull match */
+ if (cmp == 0 && (flags == 0 || flags & HAM_FIND_EXACT_MATCH))
+ return (slot);
+
+ /* approx. matching: smaller key is required */
+ if (flags & HAM_FIND_LT_MATCH) {
+ if (cmp == 0 && (flags & HAM_FIND_GT_MATCH)) {
+ *approx_match = BtreeKey::kLower;
+ return (slot + 1);
+ }
+
+ if (slot < 0 && (flags & HAM_FIND_GT_MATCH)) {
+ *approx_match = BtreeKey::kGreater;
+ return (0);
+ }
+ *approx_match = BtreeKey::kLower;
+ if (cmp <= 0)
+ return (slot - 1);
+ return (slot);
+ }
+
+ /* approx. matching: greater key is required */
+ if (flags & HAM_FIND_GT_MATCH) {
+ *approx_match = BtreeKey::kGreater;
+ return (slot + 1);
+ }
+
+ return (cmp ? -1 : slot);
+}
+
+//
+// visitor object for estimating / counting the number of keys
+///
+class CalcKeysVisitor : public BtreeVisitor {
+ public:
+ CalcKeysVisitor(LocalDatabase *db, bool distinct)
+ : m_db(db), m_distinct(distinct), m_count(0) {
+ }
+
+ virtual bool is_read_only() const {
+ return (true);
+ }
+
+ virtual void operator()(Context *context, BtreeNodeProxy *node) {
+ size_t node_count = node->get_count();
+
+ if (m_distinct
+ || (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) == 0) {
+ m_count += node_count;
+ return;
+ }
+
+ for (size_t i = 0; i < node_count; i++)
+ m_count += node->get_record_count(context, i);
+ }
+
+ uint64_t get_result() const {
+ return (m_count);
+ }
+
+ private:
+ LocalDatabase *m_db;
+ bool m_distinct;
+ uint64_t m_count;
+};
+
+uint64_t
+BtreeIndex::count(Context *context, bool distinct)
+{
+ CalcKeysVisitor visitor(m_db, distinct);
+ visit_nodes(context, visitor, false);
+ return (visitor.get_result());
+}
+
+//
+// visitor object to free all allocated blobs
+///
+class FreeBlobsVisitor : public BtreeVisitor {
+ public:
+ virtual void operator()(Context *context, BtreeNodeProxy *node) {
+ node->remove_all_entries(context);
+ }
+
+ virtual bool is_read_only() const {
+ return (false);
+ }
+};
+
+void
+BtreeIndex::release(Context *context)
+{
+ FreeBlobsVisitor visitor;
+ visit_nodes(context, visitor, true);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.h
new file mode 100644
index 0000000000..f325f7915f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index.h
@@ -0,0 +1,455 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_INDEX_H
+#define HAM_BTREE_INDEX_H
+
+#include "0root/root.h"
+
+#include <algorithm>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/abi.h"
+#include "1base/dynamic_array.h"
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_node.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+
+#include "1base/packstart.h"
+
+//
+// The persistent btree index descriptor. This structure manages the
+// persistent btree metadata.
+//
+HAM_PACK_0 class HAM_PACK_1 PBtreeHeader
+{
+ public:
+ PBtreeHeader() {
+ memset(this, 0, sizeof(*this));
+ }
+
+ // Returns the database name
+ uint16_t get_dbname() const {
+ return (m_dbname);
+ }
+
+ // Sets the database name
+ void set_dbname(uint16_t name) {
+ m_dbname = name;
+ }
+
+ // Returns the btree's max. key_size
+ size_t get_key_size() const {
+ return (m_key_size);
+ }
+
+ // Sets the btree's max. key_size
+ void set_key_size(uint16_t key_size) {
+ m_key_size = key_size;
+ }
+
+ // Returns the record size (or 0 if none was specified)
+ uint32_t get_record_size() const {
+ return (m_rec_size);
+ }
+
+ // Sets the record size
+ void set_rec_size(uint32_t rec_size) {
+ m_rec_size = rec_size;
+ }
+
+ // Returns the btree's key type
+ uint16_t get_key_type() const {
+ return (m_key_type);
+ }
+
+ // Sets the btree's key type
+ void set_key_type(uint16_t key_type) {
+ m_key_type = key_type;
+ }
+
+ // Returns the address of the btree's root page.
+ uint64_t get_root_address() const {
+ return (m_root_address);
+ }
+
+ // Sets the address of the btree's root page.
+ void set_root_address(uint64_t root_address) {
+ m_root_address = root_address;
+ }
+
+ // Returns the btree's flags
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // Sets the btree's flags
+ void set_flags(uint32_t flags) {
+ m_flags = flags;
+ }
+
+ // PRO: Returns the record compression
+ uint8_t get_record_compression() const {
+ return (m_compression >> 4);
+ }
+
+ // PRO: Sets the record compression
+ void set_record_compression(int algorithm) {
+ m_compression |= algorithm << 4;
+ }
+
+ // PRO: Returns the key compression
+ uint8_t get_key_compression() const {
+ return (m_compression & 0xf);
+ }
+
+ // PRO: Sets the key compression
+ void set_key_compression(int algorithm) {
+ m_compression |= algorithm & 0xf;
+ }
+
+ private:
+ // address of the root-page
+ uint64_t m_root_address;
+
+ // flags for this database
+ uint32_t m_flags;
+
+ // The name of the database
+ uint16_t m_dbname;
+
+ // key size used in the pages
+ uint16_t m_key_size;
+
+ // key type
+ uint16_t m_key_type;
+
+ // PRO: for storing key and record compression algorithm */
+ uint8_t m_compression;
+
+ // reserved
+ uint8_t m_reserved1;
+
+ // the record size
+ uint32_t m_rec_size;
+
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+struct Context;
+class LocalDatabase;
+class BtreeNodeProxy;
+struct PDupeEntry;
+struct BtreeVisitor;
+
+//
+// Abstract base class, overwritten by a templated version
+//
+class BtreeIndexTraits
+{
+ public:
+ // virtual destructor
+ virtual ~BtreeIndexTraits() { }
+
+ // Compares two keys
+ // Returns -1, 0, +1 or higher positive values are the result of a
+ // successful key comparison (0 if both keys match, -1 when
+ // LHS < RHS key, +1 when LHS > RHS key).
+ virtual int compare_keys(LocalDatabase *db, ham_key_t *lhs,
+ ham_key_t *rhs) const = 0;
+
+ // Returns the class name (for testing)
+ virtual std::string test_get_classname() const = 0;
+
+ // Implementation of get_node_from_page()
+ virtual BtreeNodeProxy *get_node_from_page_impl(Page *page) const = 0;
+};
+
+//
+// The Btree. Derived by BtreeIndexImpl, which uses template policies to
+// define the btree node layout.
+//
+class BtreeIndex
+{
+ public:
+ enum {
+ // for get_node_from_page(): Page is a leaf
+ kLeafPage = 1,
+
+ // for get_node_from_page(): Page is an internal node
+ kInternalPage = 2
+ };
+
+ // Constructor; creates and initializes a new btree
+ BtreeIndex(LocalDatabase *db, PBtreeHeader *btree_header,
+ uint32_t flags, uint32_t key_type, uint32_t key_size);
+
+ ~BtreeIndex() {
+ delete m_leaf_traits;
+ m_leaf_traits = 0;
+ delete m_internal_traits;
+ m_internal_traits = 0;
+ }
+
+ // Returns the database pointer
+ LocalDatabase *get_db() {
+ return (m_db);
+ }
+
+ // Returns the database pointer
+ LocalDatabase *get_db() const {
+ return (m_db);
+ }
+
+ // Returns the internal key size
+ size_t get_key_size() const {
+ return (m_key_size);
+ }
+
+ // Returns the record size
+ size_t get_record_size() const {
+ return (m_rec_size);
+ }
+
+ // Returns the internal key type
+ uint16_t get_key_type() const {
+ return (m_key_type);
+ }
+
+ // Returns the address of the root page
+ uint64_t get_root_address() const {
+ return (m_root_address);
+ }
+
+ // Returns the btree flags
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // Creates and initializes the btree
+ //
+ // This function is called after the ham_db_t structure was allocated
+ // and the file was opened
+ void create(Context *context, uint16_t key_type, uint32_t key_size,
+ uint32_t rec_size);
+
+ // Opens and initializes the btree
+ //
+ // This function is called after the ham_db_t structure was allocated
+ // and the file was opened
+ void open();
+
+ // Sets the record compression algorithm
+ void set_record_compression(Context *context, int algo);
+
+ // Returns the record compression algorithm
+ int get_record_compression();
+
+ // Sets the key compression algorithm
+ void set_key_compression(Context *context, int algo);
+
+ // Returns the key compression algorithm
+ int get_key_compression();
+
+ // Lookup a key in the index (ham_db_find)
+ ham_status_t find(Context *context, Cursor *cursor, ham_key_t *key,
+ ByteArray *key_arena, ham_record_t *record,
+ ByteArray *record_arena, uint32_t flags);
+
+ // Inserts (or updates) a key/record in the index (ham_db_insert)
+ ham_status_t insert(Context *context, Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+ // Erases a key/record from the index (ham_db_erase).
+ // If |duplicate_index| is 0 then all duplicates are erased, otherwise only
+ // the specified duplicate is erased.
+ ham_status_t erase(Context *context, Cursor *cursor, ham_key_t *key,
+ int duplicate_index, uint32_t flags);
+
+ // Iterates over the whole index and calls |visitor| on every node
+ void visit_nodes(Context *context, BtreeVisitor &visitor,
+ bool visit_internal_nodes);
+
+ // Checks the integrity of the btree (ham_db_check_integrity)
+ void check_integrity(Context *context, uint32_t flags);
+
+ // Counts the keys in the btree
+ uint64_t count(Context *context, bool distinct);
+
+ // Erases all records, overflow areas, extended keys etc from the index;
+ // used to avoid memory leaks when closing in-memory Databases and to
+ // clean up when deleting on-disk Databases.
+ void release(Context *context);
+
+ // Compares two keys
+ // Returns -1, 0, +1 or higher positive values are the result of a
+ // successful key comparison (0 if both keys match, -1 when
+ // LHS < RHS key, +1 when LHS > RHS key).
+ int compare_keys(ham_key_t *lhs, ham_key_t *rhs) const {
+ return (m_leaf_traits->compare_keys(m_db, lhs, rhs));
+ }
+
+ // Returns a BtreeNodeProxy for a Page
+ BtreeNodeProxy *get_node_from_page(Page *page) {
+ if (page->get_node_proxy())
+ return (page->get_node_proxy());
+
+ BtreeNodeProxy *proxy;
+ PBtreeNode *node = PBtreeNode::from_page(page);
+ if (node->is_leaf())
+ proxy = get_leaf_node_from_page_impl(page);
+ else
+ proxy = get_internal_node_from_page_impl(page);
+
+ page->set_node_proxy(proxy);
+ return (proxy);
+ }
+
+ // Returns the usage metrics
+ static void fill_metrics(ham_env_metrics_t *metrics) {
+ metrics->btree_smo_split = ms_btree_smo_split;
+ metrics->btree_smo_merge = ms_btree_smo_merge;
+ metrics->extended_keys = Globals::ms_extended_keys;
+ metrics->extended_duptables = Globals::ms_extended_duptables;
+ metrics->key_bytes_before_compression
+ = Globals::ms_bytes_before_compression;
+ metrics->key_bytes_after_compression
+ = Globals::ms_bytes_after_compression;
+ }
+
+ // Returns the btree usage statistics
+ BtreeStatistics *get_statistics() {
+ return (&m_statistics);
+ }
+
+ // Returns the class name (for testing)
+ std::string test_get_classname() const {
+ return (m_leaf_traits->test_get_classname());
+ }
+
+ private:
+ friend class BtreeUpdateAction;
+ friend class BtreeCheckAction;
+ friend class BtreeEnumAction;
+ friend class BtreeEraseAction;
+ friend class BtreeFindAction;
+ friend class BtreeInsertAction;
+ friend class BtreeCursor;
+ friend struct MiscFixture;
+ friend struct BtreeKeyFixture;
+ friend struct BtreeCursorFixture;
+ friend struct DbFixture;
+ friend struct DuplicateFixture;
+
+ // Implementation of get_node_from_page() (for leaf nodes)
+ BtreeNodeProxy *get_leaf_node_from_page_impl(Page *page) const {
+ return (m_leaf_traits->get_node_from_page_impl(page));
+ }
+
+ // Implementation of get_node_from_page() (for internal nodes)
+ BtreeNodeProxy *get_internal_node_from_page_impl(Page *page) const {
+ return (m_internal_traits->get_node_from_page_impl(page));
+ }
+
+ // Sets the address of the root page
+ void set_root_address(Context *context, uint64_t address) {
+ m_root_address = address;
+ flush_descriptor(context);
+ }
+
+ // Flushes the PBtreeHeader to the Environment's header page
+ void flush_descriptor(Context *context);
+
+ // Searches |parent| page for key |key| and returns the child
+ // page in |child|.
+ //
+ // |page_manager_flags| are forwarded to PageManager::fetch.
+ //
+ // if |idxptr| is a valid pointer then it will return the anchor index
+ // of the loaded page.
+ Page *find_child(Context *context, Page *parent, const ham_key_t *key,
+ uint32_t page_manager_flags, int *idxptr);
+
+ // Searches a leaf node for a key.
+ //
+ // !!!
+ // only works with leaf nodes!!
+ //
+ // Returns the index of the key, or -1 if the key was not found, or
+ // another negative status code value when an unexpected error occurred.
+ int find_leaf(Context *context, Page *page, ham_key_t *key, uint32_t flags,
+ uint32_t *approx_match);
+
+ // pointer to the database object
+ LocalDatabase *m_db;
+
+ // the Traits class wrapping the template parameters (factory for
+ // leaf nodes)
+ BtreeIndexTraits *m_leaf_traits;
+
+ // the Traits class wrapping the template parameters (factory for
+ // internal nodes)
+ BtreeIndexTraits *m_internal_traits;
+
+ // the key_size of this btree index
+ uint16_t m_key_size;
+
+ // the key_type of this btree index
+ uint16_t m_key_type;
+
+ // the record size (or 0 if none was specified)
+ uint32_t m_rec_size;
+
+ // the index of the PBtreeHeader in the Environment's header page
+ PBtreeHeader *m_btree_header;
+
+ // the persistent flags of this btree index
+ uint32_t m_flags;
+
+ // address of the root-page
+ uint64_t m_root_address;
+
+ // the btree statistics
+ BtreeStatistics m_statistics;
+
+ // usage metrics - number of page splits
+ static uint64_t ms_btree_smo_split;
+
+ // usage metrics - number of page merges
+ static uint64_t ms_btree_smo_merge;
+
+ // usage metrics - number of page shifts
+ static uint64_t ms_btree_smo_shift;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_INDEX_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index_factory.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index_factory.h
new file mode 100644
index 0000000000..49d1ea8189
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_index_factory.h
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_INDEX_FACTORY_H
+#define HAM_BTREE_INDEX_FACTORY_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3btree/btree_index.h"
+#include "3btree/btree_impl_default.h"
+#include "3btree/btree_impl_pax.h"
+#include "3btree/btree_keys_pod.h"
+#include "3btree/btree_keys_binary.h"
+#include "3btree/btree_keys_varlen.h"
+#include "3btree/btree_records_default.h"
+#include "3btree/btree_records_inline.h"
+#include "3btree/btree_records_internal.h"
+#include "3btree/btree_records_duplicate.h"
+#include "3btree/btree_node_proxy.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// A specialied Traits class using template parameters
+//
+template<class NodeLayout, class Comparator>
+class BtreeIndexTraitsImpl : public BtreeIndexTraits
+{
+ public:
+ // Compares two keys
+ // Returns -1, 0, +1 or higher positive values are the result of a
+ // successful key comparison (0 if both keys match, -1 when
+ // LHS < RHS key, +1 when LHS > RHS key).
+ virtual int compare_keys(LocalDatabase *db, ham_key_t *lhs,
+ ham_key_t *rhs) const {
+ Comparator cmp(db);
+ return (cmp(lhs->data, lhs->size, rhs->data, rhs->size));
+ }
+
+ // Returns the class name (for testing)
+ virtual std::string test_get_classname() const {
+ return (get_classname(*this));
+ }
+
+ // Implementation of get_node_from_page()
+ virtual BtreeNodeProxy *get_node_from_page_impl(Page *page) const {
+ return (new BtreeNodeProxyImpl<NodeLayout, Comparator>(page));
+ }
+};
+
+//
+// A BtreeIndexFactory creates BtreeIndexProxy objects depending on the
+// Database configuration
+//
+struct BtreeIndexFactory
+{
+ static BtreeIndexTraits *create(LocalDatabase *db, uint32_t flags,
+ uint16_t key_type, uint16_t key_size, bool is_leaf) {
+ bool inline_records = (is_leaf && (flags & HAM_FORCE_RECORDS_INLINE));
+ bool fixed_keys = (key_size != HAM_KEY_SIZE_UNLIMITED);
+ bool use_duplicates = (flags & HAM_ENABLE_DUPLICATES) != 0;
+
+ switch (key_type) {
+ // 8bit unsigned integer
+ case HAM_TYPE_UINT8:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint8_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<uint8_t> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<uint8_t> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint8_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<uint8_t> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint8_t>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<uint8_t> >());
+ }
+ // 16bit unsigned integer
+ case HAM_TYPE_UINT16:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint16_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<uint16_t> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<uint16_t> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint16_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<uint16_t> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint16_t>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<uint16_t> >());
+ }
+ // 32bit unsigned integer
+ case HAM_TYPE_UINT32:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint32_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<uint32_t> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<uint32_t> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint32_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<uint32_t> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint32_t>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<uint32_t> >());
+ }
+ // 64bit unsigned integer
+ case HAM_TYPE_UINT64:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint64_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<uint64_t> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<uint64_t> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<uint64_t> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<uint64_t> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<uint64_t>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<uint64_t> >());
+ }
+ // 32bit float
+ case HAM_TYPE_REAL32:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<float>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<float> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<float>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<float> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<float>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<float> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<float>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<float> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<float>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<float> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<float>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<float> >());
+ }
+ // 64bit double
+ case HAM_TYPE_REAL64:
+ if (use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::PodKeyList<double>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<double> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<double>,
+ DefLayout::DuplicateInlineRecordList>,
+ NumericCompare<double> >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::PodKeyList<double>,
+ DefLayout::DuplicateDefaultRecordList>,
+ NumericCompare<double> >());
+ }
+ else {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<double>,
+ PaxLayout::InternalRecordList>,
+ NumericCompare<double> >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<double>,
+ PaxLayout::InlineRecordList>,
+ NumericCompare<double> >());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::PodKeyList<double>,
+ PaxLayout::DefaultRecordList>,
+ NumericCompare<double> >());
+ }
+ // Callback function provided by user?
+ case HAM_TYPE_CUSTOM:
+ // Fixed keys, no duplicates
+ if (fixed_keys && !use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InternalRecordList>,
+ CallbackCompare>());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InlineRecordList>,
+ CallbackCompare>());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::DefaultRecordList>,
+ CallbackCompare>());
+ }
+ // Fixed keys WITH duplicates
+ if (fixed_keys && use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InternalRecordList>,
+ CallbackCompare >());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::BinaryKeyList,
+ DefLayout::DuplicateInlineRecordList>,
+ CallbackCompare >());
+ else
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::BinaryKeyList,
+ DefLayout::DuplicateDefaultRecordList>,
+ CallbackCompare >());
+ }
+ // Variable keys with or without duplicates
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::InternalRecordList>,
+ CallbackCompare >());
+ if (inline_records && !use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::InlineRecordList>,
+ CallbackCompare >());
+ if (inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ DefLayout::DuplicateInlineRecordList>,
+ CallbackCompare >());
+ if (!inline_records && !use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::DefaultRecordList>,
+ CallbackCompare >());
+ if (!inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ DefLayout::DuplicateDefaultRecordList>,
+ CallbackCompare >());
+ ham_assert(!"shouldn't be here");
+ // BINARY is the default:
+ case HAM_TYPE_BINARY:
+ // Fixed keys, no duplicates
+ if (fixed_keys && !use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InternalRecordList>,
+ FixedSizeCompare>());
+ if (inline_records)
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InlineRecordList>,
+ FixedSizeCompare>());
+ else
+ return (new BtreeIndexTraitsImpl
+ <PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::DefaultRecordList>,
+ FixedSizeCompare>());
+ }
+ // fixed keys with duplicates
+ if (fixed_keys && use_duplicates) {
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ PaxNodeImpl<PaxLayout::BinaryKeyList,
+ PaxLayout::InternalRecordList>,
+ FixedSizeCompare >());
+ if (inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::BinaryKeyList,
+ DefLayout::DuplicateInlineRecordList>,
+ FixedSizeCompare >());
+ if (!inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<PaxLayout::BinaryKeyList,
+ DefLayout::DuplicateDefaultRecordList>,
+ FixedSizeCompare >());
+ }
+ // variable length keys, with and without duplicates
+ if (!is_leaf)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::InternalRecordList>,
+ VariableSizeCompare >());
+ if (inline_records && !use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::InlineRecordList>,
+ VariableSizeCompare >());
+ if (inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ DefLayout::DuplicateInlineRecordList>,
+ VariableSizeCompare >());
+ if (!inline_records && !use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ PaxLayout::DefaultRecordList>,
+ VariableSizeCompare >());
+ if (!inline_records && use_duplicates)
+ return (new BtreeIndexTraitsImpl<
+ DefaultNodeImpl<DefLayout::VariableLengthKeyList,
+ DefLayout::DuplicateDefaultRecordList>,
+ VariableSizeCompare >());
+ ham_assert(!"shouldn't be here");
+ default:
+ break;
+ }
+
+ ham_assert(!"shouldn't be here");
+ return (0);
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_INDEX_FACTORY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_insert.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_insert.cc
new file mode 100644
index 0000000000..7dac8365d7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_insert.cc
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree inserting
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+#include <algorithm>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3page_manager/page_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_node_proxy.h"
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_update.h"
+#include "4cursor/cursor.h"
+#include "4db/db.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace std;
+
+namespace hamsterdb {
+
+class BtreeInsertAction : public BtreeUpdateAction
+{
+ public:
+ BtreeInsertAction(BtreeIndex *btree, Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags)
+ : BtreeUpdateAction(btree, context, cursor
+ ? cursor->get_btree_cursor()
+ : 0, 0),
+ m_key(key), m_record(record), m_flags(flags) {
+ if (m_cursor)
+ m_duplicate_index = m_cursor->get_duplicate_index();
+ }
+
+ // This is the entry point for the actual insert operation
+ ham_status_t run() {
+ BtreeStatistics *stats = m_btree->get_statistics();
+
+ m_hints = stats->get_insert_hints(m_flags);
+
+ ham_assert((m_hints.flags & (HAM_DUPLICATE_INSERT_BEFORE
+ | HAM_DUPLICATE_INSERT_AFTER
+ | HAM_DUPLICATE_INSERT_FIRST
+ | HAM_DUPLICATE_INSERT_LAST))
+ ? (m_hints.flags & HAM_DUPLICATE)
+ : 1);
+
+ /*
+ * append the key? append_or_prepend_key() will try to append or
+ * prepend the key; if this fails because the key is NOT the largest
+ * (or smallest) key in the database or because the current page is
+ * already full, it will remove the HINT_APPEND (or HINT_PREPEND)
+ * flag and call insert()
+ */
+ ham_status_t st;
+ if (m_hints.leaf_page_addr
+ && (m_hints.flags & HAM_HINT_APPEND
+ || m_hints.flags & HAM_HINT_PREPEND))
+ st = append_or_prepend_key();
+ else
+ st = insert();
+
+ if (st == HAM_LIMITS_REACHED)
+ st = insert();
+
+ if (st)
+ stats->insert_failed();
+ else {
+ if (m_hints.processed_leaf_page)
+ stats->insert_succeeded(m_hints.processed_leaf_page,
+ m_hints.processed_slot);
+ }
+
+ return (st);
+ }
+
+ private:
+ // Appends a key at the "end" of the btree, or prepends it at the
+ // "beginning"
+ ham_status_t append_or_prepend_key() {
+ Page *page;
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+ bool force_append = false;
+ bool force_prepend = false;
+
+ /*
+ * see if we get this btree leaf; if not, revert to regular scan
+ *
+ * As this is a speed-improvement hint re-using recent material, the page
+ * should still sit in the cache, or we're using old info, which should
+ * be discarded.
+ */
+ page = env->page_manager()->fetch(m_context, m_hints.leaf_page_addr,
+ PageManager::kOnlyFromCache);
+ /* if the page is not in cache: do a regular insert */
+ if (!page)
+ return (insert());
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ ham_assert(node->is_leaf());
+
+ /*
+ * if the page is already full OR this page is not the right-most page
+ * when we APPEND or the left-most node when we PREPEND
+ * OR the new key is not the highest key: perform a normal insert
+ */
+ if ((m_hints.flags & HAM_HINT_APPEND && node->get_right() != 0)
+ || (m_hints.flags & HAM_HINT_PREPEND && node->get_left() != 0)
+ || node->requires_split(m_context, m_key))
+ return (insert());
+
+ /*
+ * if the page is not empty: check if we append the key at the end/start
+ * (depending on the flags), or if it's actually inserted in the middle.
+ */
+ if (node->get_count() != 0) {
+ if (m_hints.flags & HAM_HINT_APPEND) {
+ int cmp_hi = node->compare(m_context, m_key, node->get_count() - 1);
+ /* key is at the end */
+ if (cmp_hi > 0) {
+ ham_assert(node->get_right() == 0);
+ force_append = true;
+ }
+ }
+
+ if (m_hints.flags & HAM_HINT_PREPEND) {
+ int cmp_lo = node->compare(m_context, m_key, 0);
+ /* key is at the start of page */
+ if (cmp_lo < 0) {
+ ham_assert(node->get_left() == 0);
+ force_prepend = true;
+ }
+ }
+ }
+
+ /* OK - we're really appending/prepending the new key. */
+ if (force_append || force_prepend)
+ return (insert_in_page(page, m_key, m_record, m_hints,
+ force_prepend, force_append));
+
+ /* otherwise reset the hints because they are no longer valid */
+ m_hints.flags &= ~HAM_HINT_APPEND;
+ m_hints.flags &= ~HAM_HINT_PREPEND;
+ return (insert());
+ }
+
+ ham_status_t insert() {
+ // traverse the tree till a leaf is reached
+ Page *parent;
+ Page *page = traverse_tree(m_key, m_hints, &parent);
+
+ // We've reached the leaf; it's still possible that we have to
+ // split the page, therefore this case has to be handled
+ ham_status_t st = insert_in_page(page, m_key, m_record, m_hints);
+ if (st == HAM_LIMITS_REACHED) {
+ page = split_page(page, parent, m_key, m_hints);
+ return (insert_in_page(page, m_key, m_record, m_hints));
+ }
+ return (st);
+ }
+
+ // the key that is inserted
+ ham_key_t *m_key;
+
+ // the record that is inserted
+ ham_record_t *m_record;
+
+ // flags of ham_db_insert()
+ uint32_t m_flags;
+
+ // statistical hints for this operation
+ BtreeStatistics::InsertHints m_hints;
+};
+
+ham_status_t
+BtreeIndex::insert(Context *context, Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ context->db = get_db();
+
+ BtreeInsertAction bia(this, context, cursor, key, record, flags);
+ return (bia.run());
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_base.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_base.h
new file mode 100644
index 0000000000..da5804ad04
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_base.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Base class for KeyLists
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_KEYS_BASE_H
+#define HAM_BTREE_KEYS_BASE_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct BaseKeyList
+{
+ enum {
+ // This KeyList cannot reduce its capacity in order to release storage
+ kCanReduceCapacity = 0,
+
+ // This KeyList uses binary search combined with linear search
+ kBinaryLinear,
+
+ // This KeyList has a custom search implementation
+ kCustomSearch,
+
+ // This KeyList has a custom search implementation for exact matches
+ // *only*
+ kCustomExactImplementation,
+
+ // This KeyList uses binary search (this is the default)
+ kBinarySearch,
+
+ // Specifies the search implementation:
+ kSearchImplementation = kBinarySearch,
+
+ // This KeyList does NOT have a custom insert implementation
+ kCustomInsert = 0,
+ };
+
+ BaseKeyList()
+ : m_range_size(0) {
+ }
+
+ // Erases the extended part of a key; nothing to do here
+ void erase_extended_key(Context *context, int slot) const {
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ void check_integrity(Context *context, size_t node_count) const {
+ }
+
+ // Rearranges the list
+ void vacuumize(size_t node_count, bool force) const {
+ }
+
+ // Finds a key
+ template<typename Cmp>
+ int find(Context *, size_t node_count, const ham_key_t *key, Cmp &comparator,
+ int *pcmp) {
+ ham_assert(!"shouldn't be here");
+ return (0);
+ }
+
+ // Returns the threshold when switching from binary search to
+ // linear search. Disabled by default
+ size_t get_linear_search_threshold() const {
+ return ((size_t)-1);
+ }
+
+ // Performs a linear search in a given range between |start| and
+ // |start + length|. Disabled by default.
+ template<typename Cmp>
+ int linear_search(size_t start, size_t length, const ham_key_t *hkey,
+ Cmp &comparator, int *pcmp) {
+ ham_assert(!"shouldn't be here");
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BtreeStatistics::update_min_max_avg(&metrics->keylist_ranges, m_range_size);
+ }
+
+ // The size of the range (in bytes)
+ size_t m_range_size;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_KEYS_BASE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_binary.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_binary.h
new file mode 100644
index 0000000000..faea959ec5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_binary.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Fixed length KeyList for binary data
+ *
+ * This KeyList stores binary keys of fixed length size. It is implemented
+ * as a plain C array of type uint8_t[]. It has fast random access, i.e.
+ * key #N starts at data[N * keysize].
+ *
+ * This KeyList cannot be resized.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_KEYS_BINARY_H
+#define HAM_BTREE_KEYS_BINARY_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3btree/btree_node.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_keys_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The template classes in this file are wrapped in a separate namespace
+// to avoid naming clashes with btree_impl_default.h
+//
+namespace PaxLayout {
+
+//
+// Same as the PodKeyList, but for binary arrays of fixed length
+//
+class BinaryKeyList : public BaseKeyList
+{
+ public:
+ enum {
+ // A flag whether this KeyList has sequential data
+ kHasSequentialData = 1,
+
+ // A flag whether this KeyList supports the scan() call
+ kSupportsBlockScans = 1,
+
+ // This KeyList uses binary search in combination with linear search
+ kSearchImplementation = kBinaryLinear,
+ };
+
+ // Constructor
+ BinaryKeyList(LocalDatabase *db)
+ : m_data(0) {
+ m_key_size = db->config().key_size;
+ ham_assert(m_key_size != 0);
+ }
+
+ // Creates a new KeyList starting at |data|, total size is
+ // |range_size| (in bytes)
+ void create(uint8_t *data, size_t range_size) {
+ m_data = data;
+ m_range_size = range_size;
+ }
+
+ // Opens an existing KeyList starting at |data|
+ void open(uint8_t *data, size_t range_size, size_t node_count) {
+ m_data = data;
+ m_range_size = range_size;
+ }
+
+ // Calculates the required size for this range
+ size_t get_required_range_size(size_t node_count) const {
+ return (node_count * m_key_size);
+ }
+
+ // Returns the actual key size including overhead
+ size_t get_full_key_size(const ham_key_t *key = 0) const {
+ return (m_key_size);
+ }
+
+ // Copies a key into |dest|
+ void get_key(Context *context, int slot, ByteArray *arena, ham_key_t *dest,
+ bool deep_copy = true) const {
+ dest->size = (uint16_t)m_key_size;
+ if (likely(deep_copy == false)) {
+ dest->data = &m_data[slot * m_key_size];
+ return;
+ }
+
+ // allocate memory (if required)
+ if (!(dest->flags & HAM_KEY_USER_ALLOC)) {
+ arena->resize(dest->size);
+ dest->data = arena->get_ptr();
+ }
+
+ memcpy(dest->data, &m_data[slot * m_key_size], m_key_size);
+ }
+
+ // Returns the threshold when switching from binary search to
+ // linear search
+ size_t get_linear_search_threshold() const {
+ if (m_key_size > 32)
+ return (-1); // disable linear search for large keys
+ return (128 / m_key_size);
+ }
+
+ // Performs a linear search in a given range between |start| and
+ // |start + length|
+ template<typename Cmp>
+ int linear_search(size_t start, size_t length, const ham_key_t *key,
+ Cmp &comparator, int *pcmp) {
+ uint8_t *begin = &m_data[start * m_key_size];
+ uint8_t *end = &m_data[(start + length) * m_key_size];
+ uint8_t *current = begin;
+
+ int c = start;
+
+ while (current < end) {
+ /* compare it against the key */
+ int cmp = comparator(key->data, key->size, current, m_key_size);
+
+ /* found it, or moved past the key? */
+ if (cmp <= 0) {
+ if (cmp < 0) {
+ if (c == 0)
+ *pcmp = -1; // key is < #m_data[0]
+ else
+ *pcmp = +1; // key is > #m_data[c - 1]!
+ return (c - 1);
+ }
+ *pcmp = 0;
+ return (c);
+ }
+
+ current += m_key_size;
+ c++;
+ }
+
+ /* the new key is > the last key in the page */
+ *pcmp = 1;
+ return (start + length - 1);
+ }
+
+ // Iterates all keys, calls the |visitor| on each
+ void scan(Context *context, ScanVisitor *visitor, uint32_t start,
+ size_t length) {
+ (*visitor)(&m_data[start * m_key_size], length);
+ }
+
+ // Erases a whole slot by shifting all larger keys to the "left"
+ void erase(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count - 1)
+ memmove(&m_data[slot * m_key_size], &m_data[(slot + 1) * m_key_size],
+ m_key_size * (node_count - slot - 1));
+ }
+
+ // Inserts a key
+ template<typename Cmp>
+ PBtreeNode::InsertResult insert(Context *context, size_t node_count,
+ const ham_key_t *key, uint32_t flags, Cmp &comparator,
+ int slot) {
+ if (node_count > (size_t)slot)
+ memmove(&m_data[(slot + 1) * m_key_size], &m_data[slot * m_key_size],
+ m_key_size * (node_count - slot));
+ set_key_data(slot, key->data, key->size);
+ return (PBtreeNode::InsertResult(0, slot));
+ }
+
+ // Returns true if the |key| no longer fits into the node
+ bool requires_split(size_t node_count, const ham_key_t *key) const {
+ return ((node_count + 1) * m_key_size >= m_range_size);
+ }
+
+ // Copies |count| key from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count, BinaryKeyList &dest,
+ size_t other_count, int dstart) {
+ memcpy(&dest.m_data[dstart * m_key_size], &m_data[sstart * m_key_size],
+ m_key_size * (node_count - sstart));
+ }
+
+ // Change the capacity; for PAX layouts this just means copying the
+ // data from one place to the other
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ memmove(new_data_ptr, m_data, node_count * m_key_size);
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseKeyList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->keylist_unused,
+ m_range_size - (node_count * m_key_size));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) const {
+ for (size_t i = 0; i < m_key_size; i++)
+ out << (char)m_data[slot * m_key_size + i];
+ }
+
+ // Returns the key size
+ size_t get_key_size(int slot) const {
+ return (m_key_size);
+ }
+
+ // Returns the pointer to a key's data
+ uint8_t *get_key_data(int slot) {
+ return (&m_data[slot * m_key_size]);
+ }
+
+ // Has support for SIMD style search?
+ bool has_simd_support() const {
+ return (false);
+ }
+
+ // Returns the pointer to the key's inline data - for SIMD calculations
+ // Not implemented by this KeyList
+ uint8_t *get_simd_data() {
+ return (0);
+ }
+
+ private:
+ // Returns the pointer to a key's data (const flavour)
+ uint8_t *get_key_data(int slot) const {
+ return (&m_data[slot * m_key_size]);
+ }
+
+ // Overwrites a key's data. The |size| of the new data HAS
+ // to be identical to the "official" key size
+ void set_key_data(int slot, const void *ptr, size_t size) {
+ ham_assert(size == get_key_size(slot));
+ memcpy(&m_data[slot * m_key_size], ptr, size);
+ }
+
+ // The size of a single key
+ size_t m_key_size;
+
+ // Pointer to the actual key data
+ uint8_t *m_data;
+};
+
+} // namespace PaxLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_KEYS_BINARY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_pod.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_pod.h
new file mode 100644
index 0000000000..1a0582da69
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_pod.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Fixed length KeyList for built-in data types ("POD types")
+ *
+ * This is the fastest KeyList available. It stores POD data sequentially
+ * in an array, i.e. PodKeyList<uint32_t> is simply a plain
+ * C array of type uint32_t[]. Each key has zero overhead.
+ *
+ * This KeyList cannot be resized.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_KEYS_POD_H
+#define HAM_BTREE_KEYS_POD_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_keys_base.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The template classes in this file are wrapped in a separate namespace
+// to avoid naming clashes with btree_impl_default.h
+//
+namespace PaxLayout {
+
+//
+// The PodKeyList provides simplified access to a list of keys where each
+// key is of type T (i.e. uint32_t).
+//
+template<typename T>
+class PodKeyList : public BaseKeyList
+{
+ public:
+ enum {
+ // A flag whether this KeyList has sequential data
+ kHasSequentialData = 1,
+
+ // A flag whether this KeyList supports the scan() call
+ kSupportsBlockScans = 1,
+
+ // This KeyList uses a custom SIMD implementation if possible,
+ // otherwise binary search in combination with linear search
+ kSearchImplementation = kBinaryLinear,
+ };
+
+ // Constructor
+ PodKeyList(LocalDatabase *db)
+ : m_data(0) {
+ }
+
+ // Creates a new PodKeyList starting at |ptr|, total size is
+ // |range_size| (in bytes)
+ void create(uint8_t *data, size_t range_size) {
+ m_data = (T *)data;
+ m_range_size = range_size;
+ }
+
+ // Opens an existing PodKeyList starting at |ptr|
+ void open(uint8_t *data, size_t range_size, size_t node_count) {
+ m_data = (T *)data;
+ m_range_size = range_size;
+ }
+
+ // Returns the required size for the current set of keys
+ size_t get_required_range_size(size_t node_count) const {
+ return (node_count * sizeof(T));
+ }
+
+ // Returns the actual key size including overhead
+ size_t get_full_key_size(const ham_key_t *key = 0) const {
+ return (sizeof(T));
+ }
+
+ // Copies a key into |dest|
+ void get_key(Context *context, int slot, ByteArray *arena, ham_key_t *dest,
+ bool deep_copy = true) const {
+ dest->size = sizeof(T);
+ if (deep_copy == false) {
+ dest->data = &m_data[slot];
+ return;
+ }
+
+ // allocate memory (if required)
+ if (!(dest->flags & HAM_KEY_USER_ALLOC)) {
+ arena->resize(dest->size);
+ dest->data = arena->get_ptr();
+ }
+
+ memcpy(dest->data, &m_data[slot], sizeof(T));
+ }
+
+ // Returns the threshold when switching from binary search to
+ // linear search
+ size_t get_linear_search_threshold() const {
+ return (128 / sizeof(T));
+ }
+
+ // Performs a linear search in a given range between |start| and
+ // |start + length|
+ template<typename Cmp>
+ int linear_search(size_t start, size_t length, const ham_key_t *hkey,
+ Cmp &comparator, int *pcmp) {
+ T key = *(T *)hkey->data;
+ size_t c = start;
+ size_t end = start + length;
+
+ #undef COMPARE
+ #define COMPARE(c) if (key <= m_data[c]) { \
+ if (key < m_data[c]) { \
+ if (c == 0) \
+ *pcmp = -1; /* key < m_data[0] */ \
+ else \
+ *pcmp = +1; /* key > m_data[c - 1] */ \
+ return ((c) - 1); \
+ } \
+ *pcmp = 0; \
+ return (c); \
+ }
+
+ while (c + 8 < end) {
+ COMPARE(c)
+ COMPARE(c + 1)
+ COMPARE(c + 2)
+ COMPARE(c + 3)
+ COMPARE(c + 4)
+ COMPARE(c + 5)
+ COMPARE(c + 6)
+ COMPARE(c + 7)
+ c += 8;
+ }
+
+ while (c < end) {
+ COMPARE(c)
+ c++;
+ }
+
+ /* the new key is > the last key in the page */
+ *pcmp = 1;
+ return (start + length - 1);
+ }
+
+ // Iterates all keys, calls the |visitor| on each
+ void scan(Context *context, ScanVisitor *visitor, uint32_t start,
+ size_t length) {
+ (*visitor)(&m_data[start], length);
+ }
+
+ // Erases a whole slot by shifting all larger keys to the "left"
+ void erase(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count - 1)
+ memmove(&m_data[slot], &m_data[slot + 1],
+ sizeof(T) * (node_count - slot - 1));
+ }
+
+ // Inserts a key
+ template<typename Cmp>
+ PBtreeNode::InsertResult insert(Context *context, size_t node_count,
+ const ham_key_t *key, uint32_t flags, Cmp &comparator,
+ int slot) {
+ if (node_count > (size_t)slot)
+ memmove(&m_data[slot + 1], &m_data[slot],
+ sizeof(T) * (node_count - slot));
+ set_key_data(slot, key->data, key->size);
+ return (PBtreeNode::InsertResult(0, slot));
+ }
+
+ // Copies |count| key from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count, PodKeyList<T> &dest,
+ size_t other_count, int dstart) {
+ memcpy(&dest.m_data[dstart], &m_data[sstart],
+ sizeof(T) * (node_count - sstart));
+ }
+
+ // Returns true if the |key| no longer fits into the node
+ bool requires_split(size_t node_count, const ham_key_t *key) const {
+ return ((node_count + 1) * sizeof(T) >= m_range_size);
+ }
+
+ // Change the range size; just copy the data from one place to the other
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ memmove(new_data_ptr, m_data, node_count * sizeof(T));
+ m_data = (T *)new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseKeyList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->keylist_unused,
+ m_range_size - (node_count * sizeof(T)));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) const {
+ out << m_data[slot];
+ }
+
+ // Returns the size of a key
+ size_t get_key_size(int slot) const {
+ return (sizeof(T));
+ }
+
+ // Returns a pointer to the key's data
+ uint8_t *get_key_data(int slot) {
+ return ((uint8_t *)&m_data[slot]);
+ }
+
+ private:
+ // Returns a pointer to the key's data (const flavour)
+ uint8_t *get_key_data(int slot) const {
+ return ((uint8_t *)&m_data[slot]);
+ }
+
+ // Overwrites an existing key; the |size| of the new data HAS to be
+ // identical with the key size specified when the database was created!
+ void set_key_data(int slot, const void *ptr, size_t size) {
+ ham_assert(size == sizeof(T));
+ m_data[slot] = *(T *)ptr;
+ }
+
+ // The actual array of T's
+ T *m_data;
+};
+
+} // namespace PaxLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_KEYS_POD_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_varlen.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_varlen.h
new file mode 100644
index 0000000000..5f85676c56
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_keys_varlen.h
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Variable length KeyList
+ *
+ * Each key is stored in a "chunk", and the chunks are managed by an upfront
+ * index which contains offset and size of each chunk. The index also keeps
+ * track of deleted chunks.
+ *
+ * The actual chunk data contains the key's data (which can be a 64bit blob
+ * ID if the key is too big).
+ *
+ * If the key is too big (exceeds |m_extkey_threshold|) then it's offloaded
+ * to an external blob, and only the 64bit record id of this blob is stored
+ * in the node. These "extended keys" are cached; the cache's lifetime is
+ * coupled to the lifetime of the node.
+ *
+ * To avoid expensive memcpy-operations, erasing a key only affects this
+ * upfront index: the relevant slot is moved to a "freelist". This freelist
+ * contains the same meta information as the index table.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_KEYS_VARLEN_H
+#define HAM_BTREE_KEYS_VARLEN_H
+
+#include "0root/root.h"
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <map>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "1base/scoped_ptr.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_index.h"
+#include "3btree/upfront_index.h"
+#include "3btree/btree_keys_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+namespace DefLayout {
+
+//
+// Variable length keys
+//
+// This KeyList uses an UpfrontIndex to manage the variable length data
+// chunks. The UpfrontIndex knows the sizes of the chunks, and therefore
+// the VariableLengthKeyList does *not* store additional size information.
+//
+// The format of a single key is:
+// |Flags|Data...|
+// where Flags are 8 bit.
+//
+// The key size (as specified by the user when inserting the key) therefore
+// is UpfrontIndex::get_chunk_size() - 1.
+//
+class VariableLengthKeyList : public BaseKeyList
+{
+ // for caching external keys
+ typedef std::map<uint64_t, ByteArray> ExtKeyCache;
+
+ public:
+ enum {
+ // A flag whether this KeyList has sequential data
+ kHasSequentialData = 0,
+
+ // A flag whether this KeyList supports the scan() call
+ kSupportsBlockScans = 0,
+
+ // This KeyList can reduce its capacity in order to release storage
+ kCanReduceCapacity = 1,
+
+ // This KeyList uses binary search
+ kSearchImplementation = kBinarySearch,
+ };
+
+ // Constructor
+ VariableLengthKeyList(LocalDatabase *db)
+ : m_db(db), m_index(db), m_data(0) {
+ size_t page_size = db->lenv()->config().page_size_bytes;
+ if (Globals::ms_extended_threshold)
+ m_extkey_threshold = Globals::ms_extended_threshold;
+ else {
+ if (page_size == 1024)
+ m_extkey_threshold = 64;
+ else if (page_size <= 1024 * 8)
+ m_extkey_threshold = 128;
+ else {
+ // UpfrontIndex's chunk size has 8 bit (max 255), and reserve
+ // a few bytes for metadata (flags)
+ m_extkey_threshold = 250;
+ }
+ }
+ }
+
+ // Creates a new KeyList starting at |ptr|, total size is
+ // |range_size| (in bytes)
+ void create(uint8_t *data, size_t range_size) {
+ m_data = data;
+ m_range_size = range_size;
+ m_index.create(m_data, range_size, range_size / get_full_key_size());
+ }
+
+ // Opens an existing KeyList
+ void open(uint8_t *data, size_t range_size, size_t node_count) {
+ m_data = data;
+ m_range_size = range_size;
+ m_index.open(m_data, range_size);
+ }
+
+ // Calculates the required size for a range
+ size_t get_required_range_size(size_t node_count) const {
+ return (m_index.get_required_range_size(node_count));
+ }
+
+ // Returns the actual key size including overhead. This is an estimate
+ // since we don't know how large the keys will be
+ size_t get_full_key_size(const ham_key_t *key = 0) const {
+ if (!key)
+ return (24 + m_index.get_full_index_size() + 1);
+ // always make sure to have enough space for an extkey id
+ if (key->size < 8 || key->size > m_extkey_threshold)
+ return (sizeof(uint64_t) + m_index.get_full_index_size() + 1);
+ return (key->size + m_index.get_full_index_size() + 1);
+ }
+
+ // Copies a key into |dest|
+ void get_key(Context *context, int slot, ByteArray *arena, ham_key_t *dest,
+ bool deep_copy = true) {
+ ham_key_t tmp;
+ uint32_t offset = m_index.get_chunk_offset(slot);
+ uint8_t *p = m_index.get_chunk_data_by_offset(offset);
+
+ if (unlikely(*p & BtreeKey::kExtendedKey)) {
+ memset(&tmp, 0, sizeof(tmp));
+ get_extended_key(context, get_extended_blob_id(slot), &tmp);
+ }
+ else {
+ tmp.size = get_key_size(slot);
+ tmp.data = p + 1;
+ }
+
+ dest->size = tmp.size;
+
+ if (likely(deep_copy == false)) {
+ dest->data = tmp.data;
+ return;
+ }
+
+ // allocate memory (if required)
+ if (!(dest->flags & HAM_KEY_USER_ALLOC)) {
+ arena->resize(tmp.size);
+ dest->data = arena->get_ptr();
+ }
+ memcpy(dest->data, tmp.data, tmp.size);
+ }
+
+ // Iterates all keys, calls the |visitor| on each. Not supported by
+ // this KeyList implementation. For variable length keys, the caller
+ // must iterate over all keys. The |scan()| interface is only implemented
+ // for PAX style layouts.
+ void scan(Context *context, ScanVisitor *visitor, size_t node_count,
+ uint32_t start) {
+ ham_assert(!"shouldn't be here");
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ // Erases a key's payload. Does NOT remove the chunk from the UpfrontIndex
+ // (see |erase()|).
+ void erase_extended_key(Context *context, int slot) {
+ uint8_t flags = get_key_flags(slot);
+ if (flags & BtreeKey::kExtendedKey) {
+ // delete the extended key from the cache
+ erase_extended_key(context, get_extended_blob_id(slot));
+ // and transform into a key which is non-extended and occupies
+ // the same space as before, when it was extended
+ set_key_flags(slot, flags & (~BtreeKey::kExtendedKey));
+ set_key_size(slot, sizeof(uint64_t));
+ }
+ }
+
+ // Erases a key, including extended blobs
+ void erase(Context *context, size_t node_count, int slot) {
+ erase_extended_key(context, slot);
+ m_index.erase(node_count, slot);
+ }
+
+ // Inserts the |key| at the position identified by |slot|.
+ // This method cannot fail; there MUST be sufficient free space in the
+ // node (otherwise the caller would have split the node).
+ template<typename Cmp>
+ PBtreeNode::InsertResult insert(Context *context, size_t node_count,
+ const ham_key_t *key, uint32_t flags,
+ Cmp &comparator, int slot) {
+ m_index.insert(node_count, slot);
+
+ // now there's one additional slot
+ node_count++;
+
+ uint32_t key_flags = 0;
+
+ // When inserting the data: always add 1 byte for key flags
+ if (key->size <= m_extkey_threshold
+ && m_index.can_allocate_space(node_count, key->size + 1)) {
+ uint32_t offset = m_index.allocate_space(node_count, slot,
+ key->size + 1);
+ uint8_t *p = m_index.get_chunk_data_by_offset(offset);
+ *p = key_flags;
+ memcpy(p + 1, key->data, key->size); // and data
+ }
+ else {
+ uint64_t blob_id = add_extended_key(context, key);
+ m_index.allocate_space(node_count, slot, 8 + 1);
+ set_extended_blob_id(slot, blob_id);
+ set_key_flags(slot, key_flags | BtreeKey::kExtendedKey);
+ }
+
+ return (PBtreeNode::InsertResult(0, slot));
+ }
+
+ // Returns true if the |key| no longer fits into the node and a split
+ // is required. Makes sure that there is ALWAYS enough headroom
+ // for an extended key!
+ //
+ // If there's no key specified then always assume the worst case and
+ // pretend that the key has the maximum length
+ bool requires_split(size_t node_count, const ham_key_t *key) {
+ size_t required;
+ if (key) {
+ required = key->size + 1;
+ // add 1 byte for flags
+ if (key->size > m_extkey_threshold || key->size < 8 + 1)
+ required = 8 + 1;
+ }
+ else
+ required = m_extkey_threshold + 1;
+ return (m_index.requires_split(node_count, required));
+ }
+
+ // Copies |count| key from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count,
+ VariableLengthKeyList &dest, size_t other_node_count,
+ int dstart) {
+ size_t to_copy = node_count - sstart;
+ ham_assert(to_copy > 0);
+
+ // make sure that the other node has sufficient capacity in its
+ // UpfrontIndex
+ dest.m_index.change_range_size(other_node_count, 0, 0,
+ m_index.get_capacity());
+
+ for (size_t i = 0; i < to_copy; i++) {
+ size_t size = get_key_size(sstart + i);
+
+ uint8_t *p = m_index.get_chunk_data_by_offset(
+ m_index.get_chunk_offset(sstart + i));
+ uint8_t flags = *p;
+ uint8_t *data = p + 1;
+
+ dest.m_index.insert(other_node_count + i, dstart + i);
+ // Add 1 byte for key flags
+ uint32_t offset = dest.m_index.allocate_space(other_node_count + i + 1,
+ dstart + i, size + 1);
+ p = dest.m_index.get_chunk_data_by_offset(offset);
+ *p = flags; // sets flags
+ memcpy(p + 1, data, size); // and data
+ }
+
+ // A lot of keys will be invalidated after copying, therefore make
+ // sure that the next_offset is recalculated when it's required
+ m_index.invalidate_next_offset();
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ void check_integrity(Context *context, size_t node_count) const {
+ ByteArray arena;
+
+ // verify that the offsets and sizes are not overlapping
+ m_index.check_integrity(node_count);
+
+ // make sure that extkeys are handled correctly
+ for (size_t i = 0; i < node_count; i++) {
+ if (get_key_size(i) > m_extkey_threshold
+ && !(get_key_flags(i) & BtreeKey::kExtendedKey)) {
+ ham_log(("key size %d, but key is not extended", get_key_size(i)));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+
+ if (get_key_flags(i) & BtreeKey::kExtendedKey) {
+ uint64_t blobid = get_extended_blob_id(i);
+ if (!blobid) {
+ ham_log(("integrity check failed: item %u "
+ "is extended, but has no blob", i));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+
+ // make sure that the extended blob can be loaded
+ ham_record_t record = {0};
+ m_db->lenv()->blob_manager()->read(context, blobid,
+ &record, 0, &arena);
+
+ // compare it to the cached key (if there is one)
+ if (m_extkey_cache) {
+ ExtKeyCache::iterator it = m_extkey_cache->find(blobid);
+ if (it != m_extkey_cache->end()) {
+ if (record.size != it->second.get_size()) {
+ ham_log(("Cached extended key differs from real key"));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ if (memcmp(record.data, it->second.get_ptr(), record.size)) {
+ ham_log(("Cached extended key differs from real key"));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ // Rearranges the list
+ void vacuumize(size_t node_count, bool force) {
+ if (force)
+ m_index.increase_vacuumize_counter(100);
+ m_index.maybe_vacuumize(node_count);
+ }
+
+ // Change the range size; the capacity will be adjusted, the data is
+ // copied as necessary
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ // no capacity given? then try to find a good default one
+ if (capacity_hint == 0) {
+ capacity_hint = (new_range_size - m_index.get_next_offset(node_count)
+ - get_full_key_size()) / m_index.get_full_index_size();
+ if (capacity_hint <= node_count)
+ capacity_hint = node_count + 1;
+ }
+
+ // if there's not enough space for the new capacity then try to reduce
+ // the capacity
+ if (m_index.get_next_offset(node_count) + get_full_key_size(0)
+ + capacity_hint * m_index.get_full_index_size()
+ + UpfrontIndex::kPayloadOffset
+ > new_range_size)
+ capacity_hint = node_count + 1;
+
+ m_index.change_range_size(node_count, new_data_ptr, new_range_size,
+ capacity_hint);
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseKeyList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->keylist_index,
+ (uint32_t)(m_index.get_capacity()
+ * m_index.get_full_index_size()));
+ BtreeStatistics::update_min_max_avg(&metrics->keylist_unused,
+ m_range_size
+ - (uint32_t)m_index.get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) {
+ ham_key_t tmp = {0};
+ if (get_key_flags(slot) & BtreeKey::kExtendedKey) {
+ get_extended_key(context, get_extended_blob_id(slot), &tmp);
+ }
+ else {
+ tmp.size = get_key_size(slot);
+ tmp.data = get_key_data(slot);
+ }
+ out << (const char *)tmp.data;
+ }
+
+ // Returns the pointer to a key's inline data (const flavour)
+ uint8_t *get_key_data(int slot) const {
+ uint32_t offset = m_index.get_chunk_offset(slot);
+ return (m_index.get_chunk_data_by_offset(offset) + 1);
+ }
+
+ // Returns the size of a key
+ size_t get_key_size(int slot) const {
+ return (m_index.get_chunk_size(slot) - 1);
+ }
+
+ private:
+ // Returns the flags of a key. Flags are defined in btree_flags.h
+ uint8_t get_key_flags(int slot) const {
+ uint32_t offset = m_index.get_chunk_offset(slot);
+ return (*m_index.get_chunk_data_by_offset(offset));
+ }
+
+ // Sets the flags of a key. Flags are defined in btree_flags.h
+ void set_key_flags(int slot, uint8_t flags) {
+ uint32_t offset = m_index.get_chunk_offset(slot);
+ *m_index.get_chunk_data_by_offset(offset) = flags;
+ }
+
+ // Overwrites the (inline) data of the key
+ void set_key_data(int slot, const void *ptr, size_t size) {
+ ham_assert(m_index.get_chunk_size(slot) >= size);
+ set_key_size(slot, (uint16_t)size);
+ memcpy(get_key_data(slot), ptr, size);
+ }
+
+ // Sets the size of a key
+ void set_key_size(int slot, size_t size) {
+ ham_assert(size + 1 <= m_index.get_chunk_size(slot));
+ m_index.set_chunk_size(slot, size + 1);
+ }
+
+ // Returns the record address of an extended key overflow area
+ uint64_t get_extended_blob_id(int slot) const {
+ return (*(uint64_t *)get_key_data(slot));
+ }
+
+ // Sets the record address of an extended key overflow area
+ void set_extended_blob_id(int slot, uint64_t blobid) {
+ *(uint64_t *)get_key_data(slot) = blobid;
+ }
+
+ // Erases an extended key from disk and from the cache
+ void erase_extended_key(Context *context, uint64_t blobid) {
+ m_db->lenv()->blob_manager()->erase(context, blobid);
+ if (m_extkey_cache) {
+ ExtKeyCache::iterator it = m_extkey_cache->find(blobid);
+ if (it != m_extkey_cache->end())
+ m_extkey_cache->erase(it);
+ }
+ }
+
+ // Retrieves the extended key at |blobid| and stores it in |key|; will
+ // use the cache.
+ void get_extended_key(Context *context, uint64_t blob_id, ham_key_t *key) {
+ if (!m_extkey_cache)
+ m_extkey_cache.reset(new ExtKeyCache());
+ else {
+ ExtKeyCache::iterator it = m_extkey_cache->find(blob_id);
+ if (it != m_extkey_cache->end()) {
+ key->size = it->second.get_size();
+ key->data = it->second.get_ptr();
+ return;
+ }
+ }
+
+ ByteArray arena;
+ ham_record_t record = {0};
+ m_db->lenv()->blob_manager()->read(context, blob_id, &record,
+ HAM_FORCE_DEEP_COPY, &arena);
+ (*m_extkey_cache)[blob_id] = arena;
+ arena.disown();
+ key->data = record.data;
+ key->size = record.size;
+ }
+
+ // Allocates an extended key and stores it in the cache
+ uint64_t add_extended_key(Context *context, const ham_key_t *key) {
+ if (!m_extkey_cache)
+ m_extkey_cache.reset(new ExtKeyCache());
+
+ ham_record_t rec = {0};
+ rec.data = key->data;
+ rec.size = key->size;
+
+ uint64_t blob_id = m_db->lenv()->blob_manager()->allocate(
+ context, &rec, 0);
+ ham_assert(blob_id != 0);
+ ham_assert(m_extkey_cache->find(blob_id) == m_extkey_cache->end());
+
+ ByteArray arena;
+ arena.resize(key->size);
+ memcpy(arena.get_ptr(), key->data, key->size);
+ (*m_extkey_cache)[blob_id] = arena;
+ arena.disown();
+
+ // increment counter (for statistics)
+ Globals::ms_extended_keys++;
+
+ return (blob_id);
+ }
+
+ // The database
+ LocalDatabase *m_db;
+
+ // The index for managing the variable-length chunks
+ UpfrontIndex m_index;
+
+ // Pointer to the data of the node
+ uint8_t *m_data;
+
+ // Cache for extended keys
+ ScopedPtr<ExtKeyCache> m_extkey_cache;
+
+ // Threshold for extended keys; if key size is > threshold then the
+ // key is moved to a blob
+ size_t m_extkey_threshold;
+};
+
+} // namespace DefLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_KEYS_VARLEN_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node.h
new file mode 100644
index 0000000000..854e68e1a5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_NODE_H
+#define HAM_BTREE_NODE_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "2page/page.h"
+#include "3btree/btree_flags.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class PBtreeKeyDefault;
+
+#include "1base/packstart.h"
+
+/*
+ * A BtreeNode structure spans the persistent part of a Page
+ *
+ * This structure is directly written to/read from the file.
+ */
+HAM_PACK_0 struct HAM_PACK_1 PBtreeNode
+{
+ public:
+ // Result of the insert() operation
+ struct InsertResult {
+ InsertResult(ham_status_t _status = 0, int _slot = 0)
+ : status(_status), slot(_slot) {
+ }
+
+ // hamsterdb status code
+ ham_status_t status;
+
+ // the slot of the new (or existing) key
+ int slot;
+ };
+
+ enum {
+ // insert key at the beginning of the page
+ kInsertPrepend = 1,
+
+ // append key to the end of the page
+ kInsertAppend = 2,
+ };
+
+ enum {
+ // node is a leaf
+ kLeafNode = 1
+ };
+
+ // Returns a PBtreeNode from a Page
+ static PBtreeNode *from_page(Page *page) {
+ return ((PBtreeNode *)page->get_payload());
+ }
+
+ // Returns the offset (in bytes) of the member |m_data|
+ static uint32_t get_entry_offset() {
+ return (sizeof(PBtreeNode) - 1);
+ }
+
+ // Returns the flags of the btree node (|kLeafNode|)
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // Sets the flags of the btree node (|kLeafNode|)
+ void set_flags(uint32_t flags) {
+ m_flags = flags;
+ }
+
+ // Returns the number of entries in a BtreeNode
+ uint32_t get_count() const {
+ return (m_count);
+ }
+
+ // Sets the number of entries in a BtreeNode
+ void set_count(uint32_t count) {
+ m_count = count;
+ }
+
+ // Returns the address of the left sibling of this node
+ uint64_t get_left() const {
+ return (m_left);
+ }
+
+ // Sets the address of the left sibling of this node
+ void set_left(uint64_t left) {
+ m_left = left;
+ }
+
+ // Returns the address of the right sibling of this node
+ uint64_t get_right() const {
+ return (m_right);
+ }
+
+ // Sets the address of the right sibling of this node
+ void set_right(uint64_t right) {
+ m_right = right;
+ }
+
+ // Returns the ptr_down of this node
+ uint64_t get_ptr_down() const {
+ return (m_ptr_down);
+ }
+
+ // Returns true if this btree node is a leaf node
+ bool is_leaf() const {
+ return (m_flags & kLeafNode);
+ }
+
+ // Sets the ptr_down of this node
+ void set_ptr_down(uint64_t ptr_down) {
+ m_ptr_down = ptr_down;
+ }
+
+ // Returns a pointer to the key data
+ uint8_t *get_data() {
+ return (&m_data[0]);
+ }
+
+ const uint8_t *get_data() const {
+ return (&m_data[0]);
+ }
+
+ private:
+ // flags of this node
+ uint32_t m_flags;
+
+ // number of used entries in the node
+ uint32_t m_count;
+
+ // address of left sibling
+ uint64_t m_left;
+
+ // address of right sibling
+ uint64_t m_right;
+
+ // address of child node whose items are smaller than all items
+ // in this node
+ uint64_t m_ptr_down;
+
+ // the entries of this node
+ uint8_t m_data[1];
+
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_NODE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node_proxy.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node_proxy.h
new file mode 100644
index 0000000000..110bd05f08
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_node_proxy.h
@@ -0,0 +1,609 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_NODE_PROXY_H
+#define HAM_BTREE_NODE_PROXY_H
+
+#include "0root/root.h"
+
+#include <set>
+#include <string.h>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/abi.h"
+#include "1base/dynamic_array.h"
+#include "1base/error.h"
+#include "2page/page.h"
+#include "3btree/btree_node.h"
+#include "3blob_manager/blob_manager.h"
+#include "4env/env_local.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+struct ScanVisitor;
+
+//
+// A BtreeNodeProxy wraps a PBtreeNode structure and defines the actual
+// format of the btree payload.
+//
+// The BtreeNodeProxy class provides access to the actual Btree nodes. The
+// layout of those nodes depends heavily on the database configuration,
+// and is implemented by template classes (btree_impl_default.h,
+// btree_impl_pax.h.).
+//
+class BtreeNodeProxy
+{
+ public:
+ // Constructor
+ BtreeNodeProxy(Page *page)
+ : m_page(page) {
+ }
+
+ // Destructor
+ virtual ~BtreeNodeProxy() {
+ }
+
+ // Returns the flags of the btree node (|kLeafNode|)
+ uint32_t get_flags() const {
+ return (PBtreeNode::from_page(m_page)->get_flags());
+ }
+
+ // Sets the flags of the btree node (|kLeafNode|)
+ void set_flags(uint32_t flags) {
+ PBtreeNode::from_page(m_page)->set_flags(flags);
+ }
+
+ // Returns the number of entries in the BtreeNode
+ size_t get_count() const {
+ return (PBtreeNode::from_page(m_page)->get_count());
+ }
+
+ // Sets the number of entries in the BtreeNode
+ void set_count(size_t count) {
+ PBtreeNode::from_page(m_page)->set_count((uint32_t)count);
+ }
+
+ // Returns true if this btree node is a leaf node
+ bool is_leaf() const {
+ return (PBtreeNode::from_page(m_page)->is_leaf());
+ }
+
+ // Returns the address of the left sibling of this node
+ uint64_t get_left() const {
+ return (PBtreeNode::from_page(m_page)->get_left());
+ }
+
+ // Sets the address of the left sibling of this node
+ void set_left(uint64_t address) {
+ PBtreeNode::from_page(m_page)->set_left(address);
+ }
+
+ // Returns the address of the right sibling of this node
+ uint64_t get_right() const {
+ return (PBtreeNode::from_page(m_page)->get_right());
+ }
+
+ // Sets the address of the right sibling of this node
+ void set_right(uint64_t address) {
+ PBtreeNode::from_page(m_page)->set_right(address);
+ }
+
+ // Returns the ptr_down of this node
+ uint64_t get_ptr_down() const {
+ return (PBtreeNode::from_page(m_page)->get_ptr_down());
+ }
+
+ // Sets the ptr_down of this node
+ void set_ptr_down(uint64_t address) {
+ PBtreeNode::from_page(m_page)->set_ptr_down(address);
+ }
+
+ // Returns the page pointer - const version
+ const Page *get_page() const {
+ return (m_page);
+ }
+
+ // Returns the page pointer
+ Page *get_page() {
+ return (m_page);
+ }
+
+ // Returns the estimated capacity of this node
+ virtual size_t estimate_capacity() const = 0;
+
+ // Checks the integrity of the node. Throws an exception if it is
+ // not. Called by ham_db_check_integrity().
+ virtual void check_integrity(Context *context) const = 0;
+
+ // Iterates all keys, calls the |visitor| on each
+ virtual void scan(Context *context, ScanVisitor *visitor,
+ size_t start, bool distinct) = 0;
+
+ // Compares the two keys. Returns 0 if both are equal, otherwise -1 (if
+ // |lhs| is greater) or +1 (if |rhs| is greater).
+ virtual int compare(const ham_key_t *lhs, const ham_key_t *rhs) const = 0;
+
+ // Compares a public key and an internal key
+ virtual int compare(Context *context, const ham_key_t *lhs, int rhs) = 0;
+
+ // Returns true if the public key (|lhs|) and an internal key (slot
+ // |rhs|) are equal
+ virtual bool equals(Context *context, const ham_key_t *lhs, int rhs) = 0;
+
+ // Searches the node for the |key|, and returns the slot of this key.
+ // If |record_id| is not null then it will store the result of the last
+ // compare operation.
+ // If |pcmp| is not null then it will store the result of the last
+ // compare operation.
+ virtual int find_child(Context *context, ham_key_t *key,
+ uint64_t *record_id = 0, int *pcmp = 0) = 0;
+
+ // Searches the node for the |key|, but will always return -1 if
+ // an exact match was not found
+ virtual int find_exact(Context *context, ham_key_t *key) = 0;
+
+ // Returns the full key at the |slot|. Also resolves extended keys
+ // and respects HAM_KEY_USER_ALLOC in dest->flags.
+ virtual void get_key(Context *context, int slot, ByteArray *arena,
+ ham_key_t *dest) = 0;
+
+ // Returns the number of records of a key at the given |slot|. This is
+ // either 1 or higher, but only if duplicate keys exist.
+ virtual int get_record_count(Context *context, int slot) = 0;
+
+ // Returns the record size of a key or one of its duplicates.
+ virtual uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index) = 0;
+
+ // Returns the record id of the key at the given |slot|
+ // Only for internal nodes!
+ virtual uint64_t get_record_id(Context *context, int slot) const = 0;
+
+ // Sets the record id of the key at the given |slot|
+ // Only for internal nodes!
+ virtual void set_record_id(Context *context, int slot, uint64_t id) = 0;
+
+ // Returns the full record and stores it in |dest|. The record is identified
+ // by |slot| and |duplicate_index|. TINY and SMALL records are handled
+ // correctly, as well as HAM_DIRECT_ACCESS.
+ virtual void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index = 0) = 0;
+
+ // High-level function to set a new record
+ //
+ // flags can be
+ // - HAM_OVERWRITE
+ // - HAM_DUPLICATE*
+ //
+ // a previously existing blob will be deleted if necessary
+ virtual void set_record(Context *context, int slot, ham_record_t *record,
+ int duplicate_index, uint32_t flags,
+ uint32_t *new_duplicate_index) = 0;
+
+ // Removes the record (or the duplicate of it, if |duplicate_index| is > 0).
+ // If |all_duplicates| is set then all duplicates of this key are deleted.
+ // |has_duplicates_left| will return true if there are more duplicates left
+ // after the current one was deleted.
+ virtual void erase_record(Context *context, int slot, int duplicate_index,
+ bool all_duplicates, bool *has_duplicates_left) = 0;
+
+ // High level function to remove an existing entry
+ virtual void erase(Context *context, int slot) = 0;
+
+ // Erases all extended keys, overflow areas and records that are
+ // linked from this page; usually called when the Database is deleted
+ // or an In-Memory Database is freed
+ virtual void remove_all_entries(Context *context) = 0;
+
+ // High level function to insert a new key. Only inserts the key. The
+ // actual record is then updated with |set_record|.
+ virtual PBtreeNode::InsertResult insert(Context *context, ham_key_t *key,
+ uint32_t flags) = 0;
+
+ // Returns true if a node requires a split to insert a new |key|
+ virtual bool requires_split(Context *context, const ham_key_t *key = 0) = 0;
+
+ // Returns true if a node requires a merge or a shift
+ virtual bool requires_merge() const = 0;
+
+ // Splits a page and moves all elements at a position >= |pivot|
+ // to the |other| page. If the node is a leaf node then the pivot element
+ // is also copied, otherwise it is not because it will be propagated
+ // to the parent node instead (by the caller).
+ virtual void split(Context *context, BtreeNodeProxy *other, int pivot) = 0;
+
+ // Merges all keys from the |other| node to this node
+ virtual void merge_from(Context *context, BtreeNodeProxy *other) = 0;
+
+ // Fills the btree_metrics structure
+ virtual void fill_metrics(btree_metrics_t *metrics) = 0;
+
+ // Prints the node to stdout. Only for testing and debugging!
+ virtual void print(Context *context, size_t node_count = 0) = 0;
+
+ // Returns the class name. Only for testing! Uses the functions exported
+ // by abi.h, which are only available on assorted platforms. Other
+ // platforms will return empty strings.
+ virtual std::string test_get_classname() const = 0;
+
+ protected:
+ Page *m_page;
+};
+
+//
+// A comparator which uses a user-supplied callback function (installed
+// with |ham_db_set_compare_func|) to compare two keys
+//
+struct CallbackCompare
+{
+ CallbackCompare(LocalDatabase *db)
+ : m_db(db) {
+ }
+
+ int operator()(const void *lhs_data, uint32_t lhs_size,
+ const void *rhs_data, uint32_t rhs_size) const {
+ return (m_db->compare_func()((::ham_db_t *)m_db, (uint8_t *)lhs_data,
+ lhs_size, (uint8_t *)rhs_data, rhs_size));
+ }
+
+ LocalDatabase *m_db;
+};
+
+//
+// A comparator for numeric keys.
+// The actual type for the key is supplied with a template parameter.
+// This has to be a POD type with support for operators < and >.
+//
+template<typename T>
+struct NumericCompare
+{
+ NumericCompare(LocalDatabase *) {
+ }
+
+ int operator()(const void *lhs_data, uint32_t lhs_size,
+ const void *rhs_data, uint32_t rhs_size) const {
+ ham_assert(lhs_size == rhs_size);
+ ham_assert(lhs_size == sizeof(T));
+ T l = *(T *)lhs_data;
+ T r = *(T *)rhs_data;
+ return (l < r ? -1 : (l > r ? +1 : 0));
+ }
+};
+
+//
+// The default comparator for two keys, implemented with memcmp(3).
+// Both keys have the same size!
+//
+struct FixedSizeCompare
+{
+ FixedSizeCompare(LocalDatabase *) {
+ }
+
+ int operator()(const void *lhs_data, uint32_t lhs_size,
+ const void *rhs_data, uint32_t rhs_size) const {
+ ham_assert(lhs_size == rhs_size);
+ return (::memcmp(lhs_data, rhs_data, lhs_size));
+ }
+};
+
+//
+// The default comparator for two keys, implemented with memcmp(3).
+// Both keys can have different sizes! shorter strings are treated as
+// "greater"
+//
+struct VariableSizeCompare
+{
+ VariableSizeCompare(LocalDatabase *) {
+ }
+
+ int operator()(const void *lhs_data, uint32_t lhs_size,
+ const void *rhs_data, uint32_t rhs_size) const {
+ if (lhs_size < rhs_size) {
+ int m = ::memcmp(lhs_data, rhs_data, lhs_size);
+ return (m == 0 ? -1 : m);
+ }
+ if (rhs_size < lhs_size) {
+ int m = ::memcmp(lhs_data, rhs_data, rhs_size);
+ return (m == 0 ? +1 : m);
+ }
+ return (::memcmp(lhs_data, rhs_data, lhs_size));
+ }
+};
+
+//
+// An implementation of the BtreeNodeProxy interface declared above.
+// Its actual memory implementation of the btree keys/records is delegated
+// to a template parameter |NodeImpl|, and the key comparisons are
+// delegated to |Comparator|.
+//
+template<class NodeImpl, class Comparator>
+class BtreeNodeProxyImpl : public BtreeNodeProxy
+{
+ typedef BtreeNodeProxyImpl<NodeImpl, Comparator> ClassType;
+
+ public:
+ // Constructor
+ BtreeNodeProxyImpl(Page *page)
+ : BtreeNodeProxy(page), m_impl(page) {
+ }
+
+ // Returns the estimated capacity of this node
+ virtual size_t estimate_capacity() const {
+ return (m_impl.estimate_capacity());
+ }
+
+ // Checks the integrity of the node
+ virtual void check_integrity(Context *context) const {
+ m_impl.check_integrity(context);
+ }
+
+ // Iterates all keys, calls the |visitor| on each
+ virtual void scan(Context *context, ScanVisitor *visitor,
+ size_t start, bool distinct) {
+ m_impl.scan(context, visitor, start, distinct);
+ }
+
+ // Compares two internal keys using the supplied comparator
+ virtual int compare(const ham_key_t *lhs, const ham_key_t *rhs) const {
+ Comparator cmp(m_page->get_db());
+ return (cmp(lhs->data, lhs->size, rhs->data, rhs->size));
+ }
+
+ // Compares a public key and an internal key
+ virtual int compare(Context *context, const ham_key_t *lhs, int rhs) {
+ Comparator cmp(m_page->get_db());
+ return (m_impl.compare(context, lhs, rhs, cmp));
+ }
+
+ // Returns true if the public key and an internal key are equal
+ virtual bool equals(Context *context, const ham_key_t *lhs, int rhs) {
+ return (0 == compare(context, lhs, rhs));
+ }
+
+ // Searches the node for the key and returns the slot of this key.
+ // If |pcmp| is not null then it will store the result of the last
+ // compare operation.
+ virtual int find_child(Context *context, ham_key_t *key,
+ uint64_t *precord_id = 0, int *pcmp = 0) {
+ int dummy;
+ if (get_count() == 0) {
+ if (pcmp)
+ *pcmp = 1;
+ if (precord_id)
+ *precord_id = get_ptr_down();
+ return (-1);
+ }
+ Comparator cmp(m_page->get_db());
+ return (m_impl.find_child(context, key, cmp,
+ precord_id ? precord_id : 0,
+ pcmp ? pcmp : &dummy));
+ }
+
+ // Searches the node for the |key|, but will always return -1 if
+ // an exact match was not found
+ virtual int find_exact(Context *context, ham_key_t *key) {
+ if (get_count() == 0)
+ return (-1);
+ Comparator cmp(m_page->get_db());
+ return (m_impl.find_exact(context, key, cmp));
+ }
+
+ // Returns the full key at the |slot|. Also resolves extended keys
+ // and respects HAM_KEY_USER_ALLOC in dest->flags.
+ virtual void get_key(Context *context, int slot, ByteArray *arena,
+ ham_key_t *dest) {
+ m_impl.get_key(context, slot, arena, dest);
+ }
+
+ // Returns the number of records of a key at the given |slot|
+ virtual int get_record_count(Context *context, int slot) {
+ ham_assert(slot < (int)get_count());
+ return (m_impl.get_record_count(context, slot));
+ }
+
+ // Returns the full record and stores it in |dest|. The record is identified
+ // by |slot| and |duplicate_index|. TINY and SMALL records are handled
+ // correctly, as well as HAM_DIRECT_ACCESS.
+ virtual void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index = 0) {
+ ham_assert(slot < (int)get_count());
+ m_impl.get_record(context, slot, arena, record, flags, duplicate_index);
+ }
+
+ virtual void set_record(Context *context, int slot, ham_record_t *record,
+ int duplicate_index, uint32_t flags,
+ uint32_t *new_duplicate_index) {
+ m_impl.set_record(context, slot, record, duplicate_index, flags,
+ new_duplicate_index);
+ }
+
+ // Returns the record size of a key or one of its duplicates
+ virtual uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index) {
+ ham_assert(slot < (int)get_count());
+ return (m_impl.get_record_size(context, slot, duplicate_index));
+ }
+
+ // Returns the record id of the key at the given |slot|
+ // Only for internal nodes!
+ virtual uint64_t get_record_id(Context *context, int slot) const {
+ ham_assert(slot < (int)get_count());
+ return (m_impl.get_record_id(context, slot));
+ }
+
+ // Sets the record id of the key at the given |slot|
+ // Only for internal nodes!
+ virtual void set_record_id(Context *context, int slot, uint64_t id) {
+ return (m_impl.set_record_id(context, slot, id));
+ }
+
+ // High level function to remove an existing entry. Will call
+ // |erase_extended_key| to clean up (a potential) extended key,
+ // and |erase_record| on each record that is associated with the key.
+ virtual void erase(Context *context, int slot) {
+ ham_assert(slot < (int)get_count());
+ m_impl.erase(context, slot);
+ set_count(get_count() - 1);
+ }
+
+ // Removes the record (or the duplicate of it, if |duplicate_index| is > 0).
+ // If |all_duplicates| is set then all duplicates of this key are deleted.
+ // |has_duplicates_left| will return true if there are more duplicates left
+ // after the current one was deleted.
+ virtual void erase_record(Context *context, int slot, int duplicate_index,
+ bool all_duplicates, bool *has_duplicates_left) {
+ ham_assert(slot < (int)get_count());
+ m_impl.erase_record(context, slot, duplicate_index, all_duplicates);
+ if (has_duplicates_left)
+ *has_duplicates_left = get_record_count(context, slot) > 0;
+ }
+
+ // Erases all extended keys, overflow areas and records that are
+ // linked from this page; usually called when the Database is deleted
+ // or an In-Memory Database is closed
+ virtual void remove_all_entries(Context *context) {
+ size_t node_count = get_count();
+ for (size_t i = 0; i < node_count; i++) {
+ m_impl.erase_extended_key(context, i);
+
+ // If we're in the leaf page, delete the associated record. (Only
+ // leaf nodes have records; internal nodes have record IDs that
+ // reference other pages, and these pages must not be deleted.)
+ if (is_leaf())
+ erase_record(context, i, 0, true, 0);
+ }
+ }
+
+ // High level function to insert a new key. Only inserts the key. The
+ // actual record is then updated with |set_record|.
+ virtual PBtreeNode::InsertResult insert(Context *context,
+ ham_key_t *key, uint32_t flags) {
+ PBtreeNode::InsertResult result(0, 0);
+ if (m_impl.requires_split(context, key)) {
+ result.status = HAM_LIMITS_REACHED;
+ return (result);
+ }
+
+ Comparator cmp(m_page->get_db());
+ try {
+ result = m_impl.insert(context, key, flags, cmp);
+ }
+ catch (Exception &ex) {
+ result.status = ex.code;
+ }
+
+ // split required? then reorganize the node, try again
+ if (result.status == HAM_LIMITS_REACHED) {
+ try {
+ if (m_impl.reorganize(context, key))
+ result = m_impl.insert(context, key, flags, cmp);
+ }
+ catch (Exception &ex) {
+ result.status = ex.code;
+ }
+ }
+
+ if (result.status == HAM_SUCCESS)
+ set_count(get_count() + 1);
+
+ return (result);
+ }
+
+ // Returns true if a node requires a split to insert |key|
+ virtual bool requires_split(Context *context, const ham_key_t *key = 0) {
+ return (m_impl.requires_split(context, key));
+ }
+
+ // Returns true if a node requires a merge or a shift
+ virtual bool requires_merge() const {
+ return (m_impl.requires_merge());
+ }
+
+ // Splits the node
+ virtual void split(Context *context, BtreeNodeProxy *other_node,
+ int pivot) {
+ ClassType *other = dynamic_cast<ClassType *>(other_node);
+ ham_assert(other != 0);
+
+ m_impl.split(context, &other->m_impl, pivot);
+
+ size_t node_count = get_count();
+ set_count(pivot);
+
+ if (is_leaf())
+ other->set_count(node_count - pivot);
+ else
+ other->set_count(node_count - pivot - 1);
+ }
+
+ // Merges all keys from the |other| node into this node
+ virtual void merge_from(Context *context, BtreeNodeProxy *other_node) {
+ ClassType *other = dynamic_cast<ClassType *>(other_node);
+ ham_assert(other != 0);
+
+ m_impl.merge_from(context, &other->m_impl);
+
+ set_count(get_count() + other->get_count());
+ other->set_count(0);
+ }
+
+ // Fills the btree_metrics structure
+ virtual void fill_metrics(btree_metrics_t *metrics) {
+ m_impl.fill_metrics(metrics, get_count());
+ }
+
+ // Prints the node to stdout (for debugging)
+ virtual void print(Context *context, size_t node_count = 0) {
+ std::cout << "page " << m_page->get_address() << ": " << get_count()
+ << " elements (leaf: " << (is_leaf() ? 1 : 0) << ", left: "
+ << get_left() << ", right: " << get_right() << ", ptr_down: "
+ << get_ptr_down() << ")" << std::endl;
+ if (!node_count)
+ node_count = get_count();
+ for (size_t i = 0; i < node_count; i++)
+ m_impl.print(context, i);
+ }
+
+ // Returns the class name. Only for testing! Uses the functions exported
+ // by abi.h, which are only available on assorted platforms. Other
+ // platforms will return empty strings.
+ virtual std::string test_get_classname() const {
+ return (get_classname(*this));
+ }
+
+ private:
+ NodeImpl m_impl;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_NODE_PROXY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_base.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_base.h
new file mode 100644
index 0000000000..6128c8834d
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_base.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Base class for RecordLists
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_RECORDS_BASE_H
+#define HAM_BTREE_RECORDS_BASE_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct BaseRecordList
+{
+ BaseRecordList()
+ : m_range_size(0) {
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ void check_integrity(Context *context, size_t node_count) const {
+ }
+
+ // Rearranges the list
+ void vacuumize(size_t node_count, bool force) const {
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_ranges,
+ m_range_size);
+ }
+
+ // The size of the range (in bytes)
+ size_t m_range_size;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_RECORDS_BASE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_default.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_default.h
new file mode 100644
index 0000000000..6fcb6f1cb7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_default.h
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The DefaultRecordList provides simplified access to a list of records,
+ * where each record is either a 8-byte record identifier (specifying the
+ * address of a blob) or is stored inline, if the record's size is <= 8 bytes.
+ *
+ * Stores 1 byte of flags per record (see btree_flags.h).
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_RECORDS_DEFAULT_H
+#define HAM_BTREE_RECORDS_DEFAULT_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_records_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The template classes in this file are wrapped in a separate namespace
+// to avoid naming clashes with btree_impl_default.h
+//
+namespace PaxLayout {
+
+class DefaultRecordList : public BaseRecordList
+{
+ public:
+ enum {
+ // A flag whether this RecordList has sequential data
+ kHasSequentialData = 1
+ };
+
+ // Constructor
+ DefaultRecordList(LocalDatabase *db, PBtreeNode *node)
+ : m_db(db), m_flags(0), m_data(0) {
+ }
+
+ // Sets the data pointer; required for initialization
+ void create(uint8_t *data, size_t range_size) {
+ size_t capacity = range_size / get_full_record_size();
+ m_range_size = range_size;
+
+ if (m_db->config().record_size == HAM_RECORD_SIZE_UNLIMITED) {
+ m_flags = data;
+ m_data = (uint64_t *)&data[capacity];
+ }
+ else {
+ m_flags = 0;
+ m_data = (uint64_t *)data;
+ }
+ }
+
+ // Opens an existing RecordList
+ void open(uint8_t *data, size_t range_size, size_t node_count) {
+ size_t capacity = range_size / get_full_record_size();
+ m_range_size = range_size;
+
+ if (m_db->config().record_size == HAM_RECORD_SIZE_UNLIMITED) {
+ m_flags = data;
+ m_data = (uint64_t *)&data[capacity];
+ }
+ else {
+ m_flags = 0;
+ m_data = (uint64_t *)data;
+ }
+ }
+
+ // Calculates the required size for a range
+ size_t get_required_range_size(size_t node_count) {
+ return (node_count * get_full_record_size());
+ }
+
+ // Returns the actual record size including overhead
+ size_t get_full_record_size() const {
+ return (sizeof(uint64_t) +
+ (m_db->config().record_size == HAM_RECORD_SIZE_UNLIMITED
+ ? 1
+ : 0));
+ }
+
+ // Returns the record counter of a key
+ int get_record_count(Context *context, int slot) const {
+ if (unlikely(!is_record_inline(slot) && get_record_id(slot) == 0))
+ return (0);
+ return (1);
+ }
+
+ // Returns the record size
+ uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index = 0) const {
+ if (is_record_inline(slot))
+ return (get_inline_record_size(slot));
+
+ LocalEnvironment *env = m_db->lenv();
+ return (env->blob_manager()->get_blob_size(context, get_record_id(slot)));
+ }
+
+ // Returns the full record and stores it in |dest|; memory must be
+ // allocated by the caller
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index) const {
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ // the record is stored inline
+ if (is_record_inline(slot)) {
+ record->size = get_inline_record_size(slot);
+ if (record->size == 0) {
+ record->data = 0;
+ return;
+ }
+ if (flags & HAM_PARTIAL) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record is "
+ "stored inline"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+ if (direct_access)
+ record->data = (void *)&m_data[slot];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &m_data[slot], record->size);
+ }
+ return;
+ }
+
+ // the record is stored as a blob
+ LocalEnvironment *env = m_db->lenv();
+ env->blob_manager()->read(context, get_record_id(slot), record,
+ flags, arena);
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index = 0) {
+ uint64_t ptr = get_record_id(slot);
+ LocalEnvironment *env = m_db->lenv();
+
+ // key does not yet exist
+ if (!ptr && !is_record_inline(slot)) {
+ // a new inline key is inserted
+ if (record->size <= sizeof(uint64_t)) {
+ set_record_data(slot, record->data, record->size);
+ }
+ // a new (non-inline) key is inserted
+ else {
+ ptr = env->blob_manager()->allocate(context, record, flags);
+ set_record_id(slot, ptr);
+ }
+ return;
+ }
+
+ // an inline key exists
+ if (is_record_inline(slot)) {
+ // disable small/tiny/empty flags
+ set_record_flags(slot, get_record_flags(slot)
+ & ~(BtreeRecord::kBlobSizeSmall
+ | BtreeRecord::kBlobSizeTiny
+ | BtreeRecord::kBlobSizeEmpty));
+ // ... and is overwritten with another inline key
+ if (record->size <= sizeof(uint64_t)) {
+ set_record_data(slot, record->data, record->size);
+ }
+ // ... or with a (non-inline) key
+ else {
+ ptr = env->blob_manager()->allocate(context, record, flags);
+ set_record_id(slot, ptr);
+ }
+ return;
+ }
+
+ // a (non-inline) key exists
+ if (ptr) {
+ // ... and is overwritten by a inline key
+ if (record->size <= sizeof(uint64_t)) {
+ env->blob_manager()->erase(context, ptr);
+ set_record_data(slot, record->data, record->size);
+ }
+ // ... and is overwritten by a (non-inline) key
+ else {
+ ptr = env->blob_manager()->overwrite(context, ptr, record, flags);
+ set_record_id(slot, ptr);
+ }
+ return;
+ }
+
+ ham_assert(!"shouldn't be here");
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ // Erases the record
+ void erase_record(Context *context, int slot, int duplicate_index = 0,
+ bool all_duplicates = true) {
+ if (is_record_inline(slot)) {
+ remove_inline_record(slot);
+ return;
+ }
+
+ // now erase the blob
+ m_db->lenv()->blob_manager()->erase(context, get_record_id(slot), 0);
+ set_record_id(slot, 0);
+ }
+
+ // Erases a whole slot by shifting all larger records to the "left"
+ void erase(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count - 1) {
+ if (m_flags)
+ memmove(&m_flags[slot], &m_flags[slot + 1], node_count - slot - 1);
+ memmove(&m_data[slot], &m_data[slot + 1],
+ sizeof(uint64_t) * (node_count - slot - 1));
+ }
+ }
+
+ // Creates space for one additional record
+ void insert(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count) {
+ if (m_flags)
+ memmove(&m_flags[slot + 1], &m_flags[slot], node_count - slot);
+ memmove(&m_data[slot + 1], &m_data[slot],
+ sizeof(uint64_t) * (node_count - slot));
+ }
+ if (m_flags)
+ m_flags[slot] = 0;
+ m_data[slot] = 0;
+ }
+
+ // Copies |count| records from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count, DefaultRecordList &dest,
+ size_t other_count, int dstart) {
+ if (m_flags)
+ memcpy(&dest.m_flags[dstart], &m_flags[sstart], (node_count - sstart));
+ memcpy(&dest.m_data[dstart], &m_data[sstart],
+ sizeof(uint64_t) * (node_count - sstart));
+ }
+
+ // Sets the record id
+ void set_record_id(int slot, uint64_t ptr) {
+ m_data[slot] = ptr;
+ }
+
+ // Returns the record id
+ uint64_t get_record_id(int slot, int duplicate_index = 0) const {
+ return (m_data[slot]);
+ }
+
+ // Returns true if there's not enough space for another record
+ bool requires_split(size_t node_count) const {
+ return ((node_count + 1) * get_full_record_size() >= m_range_size);
+ }
+
+ // Change the capacity; for PAX layouts this just means copying the
+ // data from one place to the other
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ size_t new_capacity = capacity_hint
+ ? capacity_hint
+ : new_range_size / get_full_record_size();
+ // shift "to the right"? then first shift key data, otherwise
+ // the flags might overwrite the data
+ if (m_flags == 0) {
+ memmove(new_data_ptr, m_data, node_count * sizeof(uint64_t));
+ }
+ else {
+ if (new_data_ptr > m_flags) {
+ memmove(&new_data_ptr[new_capacity], m_data,
+ node_count * sizeof(uint64_t));
+ memmove(new_data_ptr, m_flags, node_count);
+ }
+ else {
+ memmove(new_data_ptr, m_flags, node_count);
+ memmove(&new_data_ptr[new_capacity], m_data,
+ node_count * sizeof(uint64_t));
+ }
+ }
+
+ if (m_db->config().record_size == HAM_RECORD_SIZE_UNLIMITED) {
+ m_flags = new_data_ptr;
+ m_data = (uint64_t *)&new_data_ptr[new_capacity];
+ }
+ else {
+ m_flags = 0;
+ m_data = (uint64_t *)new_data_ptr;
+ }
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseRecordList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_unused,
+ m_range_size - get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) const {
+ out << "(" << get_record_size(context, slot) << " bytes)";
+ }
+
+ private:
+ // Sets record data
+ void set_record_data(int slot, const void *ptr, size_t size) {
+ uint8_t flags = get_record_flags(slot);
+ flags &= ~(BtreeRecord::kBlobSizeSmall
+ | BtreeRecord::kBlobSizeTiny
+ | BtreeRecord::kBlobSizeEmpty);
+
+ if (size == 0) {
+ m_data[slot] = 0;
+ set_record_flags(slot, flags | BtreeRecord::kBlobSizeEmpty);
+ }
+ else if (size < 8) {
+ /* the highest byte of the record id is the size of the blob */
+ char *p = (char *)&m_data[slot];
+ p[sizeof(uint64_t) - 1] = size;
+ memcpy(&m_data[slot], ptr, size);
+ set_record_flags(slot, flags | BtreeRecord::kBlobSizeTiny);
+ }
+ else if (size == 8) {
+ memcpy(&m_data[slot], ptr, size);
+ set_record_flags(slot, flags | BtreeRecord::kBlobSizeSmall);
+ }
+ else {
+ ham_assert(!"shouldn't be here");
+ set_record_flags(slot, flags);
+ }
+ }
+
+ // Returns the record flags of a given |slot|
+ uint8_t get_record_flags(int slot, int duplicate_index = 0)
+ const {
+ return (m_flags ? m_flags[slot] : 0);
+ }
+
+ // Sets the record flags of a given |slot|
+ void set_record_flags(int slot, uint8_t flags) {
+ ham_assert(m_flags != 0);
+ m_flags[slot] = flags;
+ }
+
+ // Returns the size of an inline record
+ uint32_t get_inline_record_size(int slot) const {
+ uint8_t flags = get_record_flags(slot);
+ ham_assert(is_record_inline(slot));
+ if (flags & BtreeRecord::kBlobSizeTiny) {
+ /* the highest byte of the record id is the size of the blob */
+ char *p = (char *)&m_data[slot];
+ return (p[sizeof(uint64_t) - 1]);
+ }
+ if (flags & BtreeRecord::kBlobSizeSmall)
+ return (sizeof(uint64_t));
+ if (flags & BtreeRecord::kBlobSizeEmpty)
+ return (0);
+ ham_assert(!"shouldn't be here");
+ return (0);
+ }
+
+ // Returns true if the record is inline, false if the record is a blob
+ bool is_record_inline(int slot) const {
+ uint8_t flags = get_record_flags(slot);
+ return ((flags & BtreeRecord::kBlobSizeTiny)
+ || (flags & BtreeRecord::kBlobSizeSmall)
+ || (flags & BtreeRecord::kBlobSizeEmpty) != 0);
+ }
+
+ // Removes an inline record; returns the updated record flags
+ void remove_inline_record(int slot) {
+ uint8_t flags = get_record_flags(slot);
+ m_data[slot] = 0;
+ set_record_flags(slot,
+ flags & ~(BtreeRecord::kBlobSizeSmall
+ | BtreeRecord::kBlobSizeTiny
+ | BtreeRecord::kBlobSizeEmpty));
+ }
+
+ // The parent database of this btree
+ LocalDatabase *m_db;
+
+ // The record flags
+ uint8_t *m_flags;
+
+ // The actual record data - an array of 64bit record IDs
+ uint64_t *m_data;
+};
+
+} // namespace PaxLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_RECORDS_DEFAULT_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_duplicate.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_duplicate.h
new file mode 100644
index 0000000000..861f7a7640
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_duplicate.h
@@ -0,0 +1,1557 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * RecordList implementations for duplicate records
+ *
+ * Duplicate records are stored inline till a certain threshold limit
+ * (m_duptable_threshold) is reached. In this case the duplicates are stored
+ * in a separate blob (the DuplicateTable), and the previously occupied storage
+ * in the node is reused for other records.
+ *
+ * Since records therefore have variable length, an UpfrontIndex is used
+ * (see btree_keys_varlen.h).
+ *
+ * This file has two RecordList implementations:
+ *
+ * - DuplicateRecordList: stores regular records as duplicates; records
+ * are stored as blobs if their size exceeds 8 bytes. Otherwise
+ * they are stored inline.
+ *
+ * - DuplicateInlineRecordList: stores small fixed length records as
+ * duplicates
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_RECORDS_DUPLICATE_H
+#define HAM_BTREE_RECORDS_DUPLICATE_H
+
+#include "0root/root.h"
+
+#include <algorithm>
+#include <iostream>
+#include <vector>
+#include <map>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/scoped_ptr.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_index.h"
+#include "3btree/upfront_index.h"
+#include "3btree/btree_records_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+namespace DefLayout {
+
+// helper function which returns true if a record is inline
+static bool is_record_inline(uint8_t flags) {
+ return (flags != 0);
+}
+
+//
+// A helper class for dealing with extended duplicate tables
+//
+// Byte [0..3] - count
+// [4..7] - capacity
+// [8.. [ - the record list
+// if m_inline_records:
+// each record has n bytes record-data
+// else
+// each record has 1 byte flags, n bytes record-data
+//
+class DuplicateTable
+{
+ public:
+ // Constructor; the flag |inline_records| indicates whether record
+ // flags should be stored for each record. |record_size| is the
+ // fixed length size of each record, or HAM_RECORD_SIZE_UNLIMITED
+ DuplicateTable(LocalDatabase *db, bool inline_records, size_t record_size)
+ : m_db(db), m_store_flags(!inline_records), m_record_size(record_size),
+ m_inline_records(inline_records), m_table_id(0) {
+ }
+
+ // Allocates and fills the table; returns the new table id.
+ // Can allocate empty tables (required for testing purposes).
+ // The initial capacity of the table is twice the current
+ // |record_count|.
+ uint64_t create(Context *context, const uint8_t *data,
+ size_t record_count) {
+ ham_assert(m_table_id == 0);
+
+ // This sets the initial capacity as described above
+ size_t capacity = record_count * 2;
+ m_table.resize(8 + capacity * get_record_width());
+ if (likely(record_count > 0))
+ m_table.overwrite(8, data, (m_inline_records
+ ? m_record_size * record_count
+ : 9 * record_count));
+
+ set_record_count(record_count);
+ set_record_capacity(record_count * 2);
+
+ // Flush the table to disk, returns the blob-id of the table
+ return (flush_duplicate_table(context));
+ }
+
+ // Reads the table from disk
+ void open(Context *context, uint64_t table_id) {
+ ham_record_t record = {0};
+ m_db->lenv()->blob_manager()->read(context, table_id,
+ &record, HAM_FORCE_DEEP_COPY, &m_table);
+ m_table_id = table_id;
+ }
+
+ // Returns the number of duplicates in that table
+ int get_record_count() const {
+ ham_assert(m_table.get_size() > 4);
+ return ((int) *(uint32_t *)m_table.get_ptr());
+ }
+
+ // Returns the record size of a duplicate
+ uint64_t get_record_size(Context *context, int duplicate_index) {
+ ham_assert(duplicate_index < get_record_count());
+ if (m_inline_records)
+ return (m_record_size);
+ ham_assert(m_store_flags == true);
+
+ uint8_t *precord_flags;
+ uint8_t *p = get_record_data(duplicate_index, &precord_flags);
+ uint8_t flags = *precord_flags;
+
+ if (flags & BtreeRecord::kBlobSizeTiny)
+ return (p[sizeof(uint64_t) - 1]);
+ if (flags & BtreeRecord::kBlobSizeSmall)
+ return (sizeof(uint64_t));
+ if (flags & BtreeRecord::kBlobSizeEmpty)
+ return (0);
+
+ uint64_t blob_id = *(uint64_t *)p;
+ return (m_db->lenv()->blob_manager()->get_blob_size(context, blob_id));
+ }
+
+ // Returns the full record and stores it in |record|. |flags| can
+ // be 0 or |HAM_DIRECT_ACCESS|, |HAM_PARTIAL|. These are the default
+ // flags of ham_db_find et al.
+ void get_record(Context *context, ByteArray *arena, ham_record_t *record,
+ uint32_t flags, int duplicate_index) {
+ ham_assert(duplicate_index < get_record_count());
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ uint8_t *precord_flags;
+ uint8_t *p = get_record_data(duplicate_index, &precord_flags);
+ uint8_t record_flags = precord_flags ? *precord_flags : 0;
+
+ if (m_inline_records) {
+ if (flags & HAM_PARTIAL) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record is "
+ "stored inline"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+
+ record->size = m_record_size;
+ if (direct_access)
+ record->data = p;
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, p, m_record_size);
+ }
+ return;
+ }
+
+ ham_assert(m_store_flags == true);
+
+ if (record_flags & BtreeRecord::kBlobSizeEmpty) {
+ record->data = 0;
+ record->size = 0;
+ return;
+ }
+
+ if (record_flags & BtreeRecord::kBlobSizeTiny) {
+ record->size = p[sizeof(uint64_t) - 1];
+ if (direct_access)
+ record->data = &p[0];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &p[0], record->size);
+ }
+ return;
+ }
+
+ if (record_flags & BtreeRecord::kBlobSizeSmall) {
+ record->size = sizeof(uint64_t);
+ if (direct_access)
+ record->data = &p[0];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &p[0], record->size);
+ }
+ return;
+ }
+
+ uint64_t blob_id = *(uint64_t *)p;
+
+ // the record is stored as a blob
+ LocalEnvironment *env = m_db->lenv();
+ env->blob_manager()->read(context, blob_id, record, flags, arena);
+ }
+
+ // Updates the record of a key. Analog to the set_record() method
+ // of the NodeLayout class. Returns the new table id and the
+ // new duplicate index, if |new_duplicate_index| is not null.
+ uint64_t set_record(Context *context, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index) {
+ BlobManager *blob_manager = m_db->lenv()->blob_manager();
+
+ // the duplicate is overwritten
+ if (flags & HAM_OVERWRITE) {
+ uint8_t *record_flags = 0;
+ uint8_t *p = get_record_data(duplicate_index, &record_flags);
+
+ // the record is stored inline w/ fixed length?
+ if (m_inline_records) {
+ ham_assert(record->size == m_record_size);
+ memcpy(p, record->data, record->size);
+ return (flush_duplicate_table(context));
+ }
+ // the existing record is a blob
+ if (!is_record_inline(*record_flags)) {
+ uint64_t ptr = *(uint64_t *)p;
+ // overwrite the blob record
+ if (record->size > sizeof(uint64_t)) {
+ *(uint64_t *)p = blob_manager->overwrite(context, ptr,
+ record, flags);
+ return (flush_duplicate_table(context));
+ }
+ // otherwise delete it and continue
+ blob_manager->erase(context, ptr, 0);
+ }
+ }
+
+ // If the key is not overwritten but inserted or appended: create a
+ // "gap" in the table
+ else {
+ int record_count = get_record_count();
+
+ // check for overflow
+ if (unlikely(record_count == std::numeric_limits<int>::max())) {
+ ham_log(("Duplicate table overflow"));
+ throw Exception(HAM_LIMITS_REACHED);
+ }
+
+ // adjust flags
+ if (flags & HAM_DUPLICATE_INSERT_BEFORE && duplicate_index == 0)
+ flags |= HAM_DUPLICATE_INSERT_FIRST;
+ else if (flags & HAM_DUPLICATE_INSERT_AFTER) {
+ if (duplicate_index == record_count)
+ flags |= HAM_DUPLICATE_INSERT_LAST;
+ else {
+ flags |= HAM_DUPLICATE_INSERT_BEFORE;
+ duplicate_index++;
+ }
+ }
+
+ // resize the table, if necessary
+ if (unlikely(record_count == get_record_capacity()))
+ grow_duplicate_table();
+
+ // handle overwrites or inserts/appends
+ if (flags & HAM_DUPLICATE_INSERT_FIRST) {
+ if (record_count) {
+ uint8_t *ptr = get_raw_record_data(0);
+ memmove(ptr + get_record_width(), ptr,
+ record_count * get_record_width());
+ }
+ duplicate_index = 0;
+ }
+ else if (flags & HAM_DUPLICATE_INSERT_BEFORE) {
+ uint8_t *ptr = get_raw_record_data(duplicate_index);
+ memmove(ptr + get_record_width(), ptr,
+ (record_count - duplicate_index) * get_record_width());
+ }
+ else // HAM_DUPLICATE_INSERT_LAST
+ duplicate_index = record_count;
+
+ set_record_count(record_count + 1);
+ }
+
+ uint8_t *record_flags = 0;
+ uint8_t *p = get_record_data(duplicate_index, &record_flags);
+
+ // store record inline?
+ if (m_inline_records) {
+ ham_assert(m_record_size == record->size);
+ if (m_record_size > 0)
+ memcpy(p, record->data, record->size);
+ }
+ else if (record->size == 0) {
+ memcpy(p, "\0\0\0\0\0\0\0\0", 8);
+ *record_flags = BtreeRecord::kBlobSizeEmpty;
+ }
+ else if (record->size < sizeof(uint64_t)) {
+ p[sizeof(uint64_t) - 1] = (uint8_t)record->size;
+ memcpy(&p[0], record->data, record->size);
+ *record_flags = BtreeRecord::kBlobSizeTiny;
+ }
+ else if (record->size == sizeof(uint64_t)) {
+ memcpy(&p[0], record->data, record->size);
+ *record_flags = BtreeRecord::kBlobSizeSmall;
+ }
+ else {
+ *record_flags = 0;
+ uint64_t blob_id = blob_manager->allocate(context, record, flags);
+ memcpy(p, &blob_id, sizeof(blob_id));
+ }
+
+ if (new_duplicate_index)
+ *new_duplicate_index = duplicate_index;
+
+ // write the duplicate table to disk and return the table-id
+ return (flush_duplicate_table(context));
+ }
+
+ // Deletes a record from the table; also adjusts the count. If
+ // |all_duplicates| is true or if the last element of the table is
+ // deleted then the table itself will also be deleted. Returns 0
+ // if this is the case, otherwise returns the table id.
+ uint64_t erase_record(Context *context, int duplicate_index,
+ bool all_duplicates) {
+ int record_count = get_record_count();
+
+ if (record_count == 1 && duplicate_index == 0)
+ all_duplicates = true;
+
+ if (all_duplicates) {
+ if (m_store_flags && !m_inline_records) {
+ for (int i = 0; i < record_count; i++) {
+ uint8_t *record_flags;
+ uint8_t *p = get_record_data(i, &record_flags);
+ if (is_record_inline(*record_flags))
+ continue;
+ if (*(uint64_t *)p != 0) {
+ m_db->lenv()->blob_manager()->erase(context, *(uint64_t *)p);
+ *(uint64_t *)p = 0;
+ }
+ }
+ }
+ if (m_table_id != 0)
+ m_db->lenv()->blob_manager()->erase(context, m_table_id);
+ set_record_count(0);
+ m_table_id = 0;
+ return (0);
+ }
+
+ ham_assert(record_count > 0 && duplicate_index < record_count);
+
+ uint8_t *record_flags;
+ uint8_t *lhs = get_record_data(duplicate_index, &record_flags);
+ if (record_flags != 0 && *record_flags == 0 && !m_inline_records) {
+ m_db->lenv()->blob_manager()->erase(context, *(uint64_t *)lhs);
+ *(uint64_t *)lhs = 0;
+ }
+
+ if (duplicate_index < record_count - 1) {
+ lhs = get_raw_record_data(duplicate_index);
+ uint8_t *rhs = lhs + get_record_width();
+ memmove(lhs, rhs, get_record_width()
+ * (record_count - duplicate_index - 1));
+ }
+
+ // adjust the counter
+ set_record_count(record_count - 1);
+
+ // write the duplicate table to disk and return the table-id
+ return (flush_duplicate_table(context));
+ }
+
+ // Returns the maximum capacity of elements in a duplicate table
+ // This method could be private, but it's required by the unittests
+ int get_record_capacity() const {
+ ham_assert(m_table.get_size() >= 8);
+ return ((int) *(uint32_t *)((uint8_t *)m_table.get_ptr() + 4));
+ }
+
+ private:
+ // Doubles the capacity of the ByteArray which backs the table
+ void grow_duplicate_table() {
+ int capacity = get_record_capacity();
+ if (capacity == 0)
+ capacity = 8;
+ m_table.resize(8 + (capacity * 2) * get_record_width());
+ set_record_capacity(capacity * 2);
+ }
+
+ // Writes the modified duplicate table to disk; returns the new
+ // table-id
+ uint64_t flush_duplicate_table(Context *context) {
+ ham_record_t record = {0};
+ record.data = m_table.get_ptr();
+ record.size = m_table.get_size();
+ if (!m_table_id)
+ m_table_id = m_db->lenv()->blob_manager()->allocate(
+ context, &record, 0);
+ else
+ m_table_id = m_db->lenv()->blob_manager()->overwrite(
+ context, m_table_id, &record, 0);
+ return (m_table_id);
+ }
+
+ // Returns the size of a record structure in the ByteArray
+ size_t get_record_width() const {
+ if (m_inline_records)
+ return (m_record_size);
+ ham_assert(m_store_flags == true);
+ return (sizeof(uint64_t) + 1);
+ }
+
+ // Returns a pointer to the record data (including flags)
+ uint8_t *get_raw_record_data(int duplicate_index) {
+ if (m_inline_records)
+ return ((uint8_t *)m_table.get_ptr()
+ + 8
+ + m_record_size * duplicate_index);
+ else
+ return ((uint8_t *)m_table.get_ptr()
+ + 8
+ + 9 * duplicate_index);
+ }
+
+ // Returns a pointer to the record data, and the flags
+ uint8_t *get_record_data(int duplicate_index,
+ uint8_t **pflags = 0) {
+ uint8_t *p = get_raw_record_data(duplicate_index);
+ if (m_store_flags) {
+ if (pflags)
+ *pflags = p++;
+ else
+ p++;
+ }
+ else if (pflags)
+ *pflags = 0;
+ return (p);
+ }
+
+ // Sets the number of used elements in a duplicate table
+ void set_record_count(int record_count) {
+ *(uint32_t *)m_table.get_ptr() = (uint32_t)record_count;
+ }
+
+ // Sets the maximum capacity of elements in a duplicate table
+ void set_record_capacity(int capacity) {
+ ham_assert(m_table.get_size() >= 8);
+ *(uint32_t *)((uint8_t *)m_table.get_ptr() + 4) = (uint32_t)capacity;
+ }
+
+ // The database
+ LocalDatabase *m_db;
+
+ // Whether to store flags per record or not (true unless records
+ // have constant length)
+ bool m_store_flags;
+
+ // The constant length record size, or HAM_RECORD_SIZE_UNLIMITED
+ size_t m_record_size;
+
+ // Stores the actual data of the table
+ ByteArray m_table;
+
+ // True if records are inline
+ bool m_inline_records;
+
+ // The blob id for persisting the table
+ uint64_t m_table_id;
+};
+
+//
+// Common functions for duplicate record lists
+//
+class DuplicateRecordList : public BaseRecordList
+{
+ protected:
+ // for caching external duplicate tables
+ typedef std::map<uint64_t, DuplicateTable *> DuplicateTableCache;
+
+ public:
+ enum {
+ // A flag whether this RecordList has sequential data
+ kHasSequentialData = 0
+ };
+
+ // Constructor
+ DuplicateRecordList(LocalDatabase *db, PBtreeNode *node,
+ bool store_flags, size_t record_size)
+ : m_db(db), m_node(node), m_index(db), m_data(0),
+ m_store_flags(store_flags), m_record_size(record_size) {
+ size_t page_size = db->lenv()->config().page_size_bytes;
+ if (Globals::ms_duplicate_threshold)
+ m_duptable_threshold = Globals::ms_duplicate_threshold;
+ else {
+ if (page_size == 1024)
+ m_duptable_threshold = 8;
+ else if (page_size <= 1024 * 8)
+ m_duptable_threshold = 12;
+ else if (page_size <= 1024 * 16)
+ m_duptable_threshold = 20;
+ else if (page_size <= 1024 * 32)
+ m_duptable_threshold = 32;
+ else {
+ // 0x7f/127 is the maximum that we can store in the record
+ // counter (7 bits), but we won't exploit this fully
+ m_duptable_threshold = 64;
+ }
+ }
+
+ // UpfrontIndex's chunk_size is just 1 byte (max 255); make sure that
+ // the duplicate list fits into a single chunk!
+ size_t rec_size = m_record_size;
+ if (rec_size == HAM_RECORD_SIZE_UNLIMITED)
+ rec_size = 9;
+ if (m_duptable_threshold * rec_size > 250)
+ m_duptable_threshold = 250 / rec_size;
+ }
+
+ // Destructor - clears the cache
+ ~DuplicateRecordList() {
+ if (m_duptable_cache) {
+ for (DuplicateTableCache::iterator it = m_duptable_cache->begin();
+ it != m_duptable_cache->end(); it++)
+ delete it->second;
+ }
+ }
+
+ // Opens an existing RecordList
+ void open(uint8_t *ptr, size_t range_size, size_t node_count) {
+ m_data = ptr;
+ m_index.open(m_data, range_size);
+ m_range_size = range_size;
+ }
+
+ // Returns a duplicate table; uses a cache to speed up access
+ DuplicateTable *get_duplicate_table(Context *context, uint64_t table_id) {
+ if (!m_duptable_cache)
+ m_duptable_cache.reset(new DuplicateTableCache());
+ else {
+ DuplicateTableCache::iterator it = m_duptable_cache->find(table_id);
+ if (it != m_duptable_cache->end())
+ return (it->second);
+ }
+
+ DuplicateTable *dt = new DuplicateTable(m_db, !m_store_flags,
+ m_record_size);
+ dt->open(context, table_id);
+ (*m_duptable_cache)[table_id] = dt;
+ return (dt);
+ }
+
+ // Updates the DupTableCache and changes the table id of a DuplicateTable.
+ // Called whenever a DuplicateTable's size increases, and the new blob-id
+ // differs from the old one.
+ void update_duplicate_table_id(DuplicateTable *dt,
+ uint64_t old_table_id, uint64_t new_table_id) {
+ m_duptable_cache->erase(old_table_id);
+ (*m_duptable_cache)[new_table_id] = dt;
+ }
+
+ // Erases a slot. Only updates the UpfrontIndex; does NOT delete the
+ // record blobs!
+ void erase(Context *context, size_t node_count, int slot) {
+ m_index.erase(node_count, slot);
+ }
+
+ // Inserts a slot for one additional record
+ void insert(Context *context, size_t node_count, int slot) {
+ m_index.insert(node_count, slot);
+ }
+
+ // Copies |count| items from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count,
+ DuplicateRecordList &dest, size_t other_node_count,
+ int dstart) {
+ // make sure that the other node has sufficient capacity in its
+ // UpfrontIndex
+ dest.m_index.change_range_size(other_node_count, 0, 0,
+ m_index.get_capacity());
+
+ uint32_t doffset;
+ for (size_t i = 0; i < node_count - sstart; i++) {
+ size_t size = m_index.get_chunk_size(sstart + i);
+
+ dest.m_index.insert(other_node_count + i, dstart + i);
+ // destination offset
+ doffset = dest.m_index.allocate_space(other_node_count + i + 1,
+ dstart + i, size);
+ doffset = dest.m_index.get_absolute_offset(doffset);
+ // source offset
+ uint32_t soffset = m_index.get_chunk_offset(sstart + i);
+ soffset = m_index.get_absolute_offset(soffset);
+ // copy the data
+ memcpy(&dest.m_data[doffset], &m_data[soffset], size);
+ }
+
+ // After copying, the caller will reduce the node count drastically.
+ // Therefore invalidate the cached next_offset.
+ m_index.invalidate_next_offset();
+ }
+
+ // Rearranges the list
+ void vacuumize(size_t node_count, bool force) {
+ if (force)
+ m_index.increase_vacuumize_counter(100);
+ m_index.maybe_vacuumize(node_count);
+ }
+
+ protected:
+ // The database
+ LocalDatabase *m_db;
+
+ // The current node
+ PBtreeNode *m_node;
+
+ // The index which manages variable length chunks
+ UpfrontIndex m_index;
+
+ // The actual data of the node
+ uint8_t *m_data;
+
+ // Whether record flags are required
+ bool m_store_flags;
+
+ // The constant record size, or HAM_RECORD_SIZE_UNLIMITED
+ size_t m_record_size;
+
+ // The duplicate threshold
+ size_t m_duptable_threshold;
+
+ // A cache for duplicate tables
+ ScopedPtr<DuplicateTableCache> m_duptable_cache;
+};
+
+//
+// RecordList for records with fixed length, with duplicates. It uses
+// an UpfrontIndex to manage the variable length chunks.
+//
+// If a key has duplicates, then all duplicates are stored sequentially.
+// If that duplicate list exceeds a certain threshold then they are moved
+// to a DuplicateTable, which is stored as a blob.
+//
+// Format for each slot:
+//
+// 1 byte meta data
+// bit 1 - 7: duplicate counter, if kExtendedDuplicates == 0
+// bit 8: kExtendedDuplicates
+// if kExtendedDuplicates == 0:
+// <counter> * <length> bytes
+// <length> byte data (always inline)
+// if kExtendedDuplicates == 1:
+// 8 byte: record id of the extended duplicate table
+//
+class DuplicateInlineRecordList : public DuplicateRecordList
+{
+ public:
+ // Constructor
+ DuplicateInlineRecordList(LocalDatabase *db, PBtreeNode *node)
+ : DuplicateRecordList(db, node, false, db->config().record_size),
+ m_record_size(db->config().record_size) {
+ }
+
+ // Creates a new RecordList starting at |data|
+ void create(uint8_t *data, size_t range_size) {
+ m_data = data;
+ m_index.create(m_data, range_size, range_size / get_full_record_size());
+ m_range_size = range_size;
+ }
+
+ // Calculates the required size for a range with the specified |capacity|
+ size_t get_required_range_size(size_t node_count) const {
+ return (m_index.get_required_range_size(node_count));
+ }
+
+ // Returns the actual record size including overhead
+ size_t get_full_record_size() const {
+ return (1 + m_record_size + m_index.get_full_index_size());
+ }
+
+ // Returns the number of duplicates for a slot
+ int get_record_count(Context *context, int slot) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ if (m_data[offset] & BtreeRecord::kExtendedDuplicates) {
+ DuplicateTable *dt = get_duplicate_table(context, get_record_id(slot));
+ return ((int)dt->get_record_count());
+ }
+
+ return (m_data[offset] & 0x7f);
+ }
+
+ // Returns the size of a record; the size is always constant
+ uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index = 0) const {
+ return (m_record_size);
+ }
+
+ // Returns the full record and stores it in |dest|
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index) {
+ // forward to duplicate table?
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ DuplicateTable *dt = get_duplicate_table(context, get_record_id(slot));
+ dt->get_record(context, arena, record, flags, duplicate_index);
+ return;
+ }
+
+ if (flags & HAM_PARTIAL) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record is "
+ "stored inline"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+
+ ham_assert(duplicate_index < (int)get_inline_record_count(slot));
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ // the record is always stored inline
+ const uint8_t *ptr = get_record_data(slot, duplicate_index);
+ record->size = m_record_size;
+ if (direct_access)
+ record->data = (void *)ptr;
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, ptr, m_record_size);
+ }
+ }
+
+ // Adds or overwrites a record
+ void set_record(Context *context, int slot, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index = 0) {
+ uint32_t chunk_offset = m_index.get_absolute_chunk_offset(slot);
+ uint32_t current_size = m_index.get_chunk_size(slot);
+
+ ham_assert(m_record_size == record->size);
+
+ // if the slot was not yet allocated: allocate new space, initialize
+ // it and then overwrite the record
+ if (current_size == 0) {
+ duplicate_index = 0;
+ flags |= HAM_OVERWRITE;
+ chunk_offset = m_index.allocate_space(m_node->get_count(), slot,
+ 1 + m_record_size);
+ chunk_offset = m_index.get_absolute_offset(chunk_offset);
+ // clear the flags
+ m_data[chunk_offset] = 0;
+
+ set_inline_record_count(slot, 1);
+ }
+
+ // if there's no duplicate table, but we're not able to add another
+ // duplicate because of size constraints, then offload all
+ // existing duplicates to an external DuplicateTable
+ uint32_t record_count = get_inline_record_count(slot);
+ size_t required_size = 1 + (record_count + 1) * m_record_size;
+
+ if (!(m_data[chunk_offset] & BtreeRecord::kExtendedDuplicates)
+ && !(flags & HAM_OVERWRITE)) {
+ bool force_duptable = record_count >= m_duptable_threshold;
+ if (!force_duptable
+ && !m_index.can_allocate_space(m_node->get_count(),
+ required_size))
+ force_duptable = true;
+
+ // update chunk_offset - it might have been modified if
+ // m_index.can_allocate_space triggered a vacuumize() operation
+ chunk_offset = m_index.get_absolute_chunk_offset(slot);
+
+ // already too many duplicates, or the record does not fit? then
+ // allocate an overflow duplicate list and move all duplicates to
+ // this list
+ if (force_duptable) {
+ DuplicateTable *dt = new DuplicateTable(m_db, !m_store_flags,
+ m_record_size);
+ uint64_t table_id = dt->create(context, get_record_data(slot, 0),
+ record_count);
+ if (!m_duptable_cache)
+ m_duptable_cache.reset(new DuplicateTableCache());
+ (*m_duptable_cache)[table_id] = dt;
+
+ // write the id of the duplicate table
+ if (m_index.get_chunk_size(slot) < 8 + 1) {
+ // do not erase the slot because it occupies so little space
+ size_t node_count = m_node->get_count();
+ // force a split in the caller if the duplicate table cannot
+ // be inserted
+ if (!m_index.can_allocate_space(node_count, 8 + 1))
+ throw Exception(HAM_LIMITS_REACHED);
+ m_index.allocate_space(node_count, slot, 8 + 1);
+ chunk_offset = m_index.get_absolute_chunk_offset(slot);
+ }
+
+ m_data[chunk_offset] |= BtreeRecord::kExtendedDuplicates;
+ set_record_id(slot, table_id);
+ set_inline_record_count(slot, 0);
+
+ m_index.set_chunk_size(slot, 8 + 1);
+ m_index.increase_vacuumize_counter(m_index.get_chunk_size(slot) - 9);
+ m_index.invalidate_next_offset();
+
+ // fall through
+ }
+ }
+
+ // forward to duplicate table?
+ if (unlikely(m_data[chunk_offset] & BtreeRecord::kExtendedDuplicates)) {
+ uint64_t table_id = get_record_id(slot);
+ DuplicateTable *dt = get_duplicate_table(context, table_id);
+ uint64_t new_table_id = dt->set_record(context, duplicate_index, record,
+ flags, new_duplicate_index);
+ if (new_table_id != table_id) {
+ update_duplicate_table_id(dt, table_id, new_table_id);
+ set_record_id(slot, new_table_id);
+ }
+ return;
+ }
+
+ // the duplicate is overwritten
+ if (flags & HAM_OVERWRITE) {
+ // the record is always stored inline w/ fixed length
+ uint8_t *p = (uint8_t *)get_record_data(slot, duplicate_index);
+ memcpy(p, record->data, record->size);
+ return;
+ }
+
+ // Allocate new space for the duplicate table, if required
+ if (current_size < required_size) {
+ uint8_t *oldp = &m_data[chunk_offset];
+ uint32_t old_chunk_size = m_index.get_chunk_size(slot);
+ uint32_t old_chunk_offset = m_index.get_chunk_offset(slot);
+ uint32_t new_chunk_offset = m_index.allocate_space(m_node->get_count(),
+ slot, required_size);
+ chunk_offset = m_index.get_absolute_offset(new_chunk_offset);
+ if (current_size > 0 && old_chunk_offset != new_chunk_offset) {
+ memmove(&m_data[chunk_offset], oldp, current_size);
+ m_index.add_to_freelist(m_node->get_count(), old_chunk_offset,
+ old_chunk_size);
+ }
+ }
+
+ // adjust flags
+ if (flags & HAM_DUPLICATE_INSERT_BEFORE && duplicate_index == 0)
+ flags |= HAM_DUPLICATE_INSERT_FIRST;
+ else if (flags & HAM_DUPLICATE_INSERT_AFTER) {
+ if (duplicate_index == (int)record_count)
+ flags |= HAM_DUPLICATE_INSERT_LAST;
+ else {
+ flags |= HAM_DUPLICATE_INSERT_BEFORE;
+ duplicate_index++;
+ }
+ }
+
+ // handle overwrites or inserts/appends
+ if (flags & HAM_DUPLICATE_INSERT_FIRST) {
+ if (record_count > 0) {
+ uint8_t *ptr = get_record_data(slot, 0);
+ memmove(get_record_data(slot, 1), ptr, record_count * m_record_size);
+ }
+ duplicate_index = 0;
+ }
+ else if (flags & HAM_DUPLICATE_INSERT_BEFORE) {
+ memmove(get_record_data(slot, duplicate_index),
+ get_record_data(slot, duplicate_index + 1),
+ (record_count - duplicate_index) * m_record_size);
+ }
+ else // HAM_DUPLICATE_INSERT_LAST
+ duplicate_index = record_count;
+
+ set_inline_record_count(slot, record_count + 1);
+
+ // store the new record inline
+ if (m_record_size > 0)
+ memcpy(get_record_data(slot, duplicate_index),
+ record->data, record->size);
+
+ if (new_duplicate_index)
+ *new_duplicate_index = duplicate_index;
+ }
+
+ // Erases a record's blob (does not remove the slot!)
+ void erase_record(Context *context, int slot, int duplicate_index = 0,
+ bool all_duplicates = false) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+
+ // forward to external duplicate table?
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ uint64_t table_id = get_record_id(slot);
+ DuplicateTable *dt = get_duplicate_table(context, table_id);
+ uint64_t new_table_id = dt->erase_record(context, duplicate_index,
+ all_duplicates);
+ if (new_table_id == 0) {
+ m_duptable_cache->erase(table_id);
+ set_record_id(slot, 0);
+ m_data[offset] &= ~BtreeRecord::kExtendedDuplicates;
+ delete dt;
+ }
+ else if (new_table_id != table_id) {
+ update_duplicate_table_id(dt, table_id, new_table_id);
+ set_record_id(slot, new_table_id);
+ }
+ return;
+ }
+
+ // there's only one record left which is erased?
+ size_t node_count = get_inline_record_count(slot);
+ if (node_count == 1 && duplicate_index == 0)
+ all_duplicates = true;
+
+ // erase all duplicates?
+ if (all_duplicates) {
+ set_inline_record_count(slot, 0);
+ }
+ else {
+ if (duplicate_index < (int)node_count - 1)
+ memmove(get_record_data(duplicate_index),
+ get_record_data(duplicate_index + 1),
+ m_record_size * (node_count - duplicate_index - 1));
+ set_inline_record_count(slot, node_count - 1);
+ }
+ }
+
+ // Returns a 64bit record id from a record
+ uint64_t get_record_id(int slot,
+ int duplicate_index = 0) const {
+ return (*(uint64_t *)get_record_data(slot, duplicate_index));
+ }
+
+ // Sets a 64bit record id; used for internal nodes to store Page IDs
+ // or for leaf nodes to store DuplicateTable IDs
+ void set_record_id(int slot, uint64_t id) {
+ ham_assert(m_index.get_chunk_size(slot) >= sizeof(id));
+ *(uint64_t *)get_record_data(slot, 0) = id;
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ void check_integrity(Context *context, size_t node_count,
+ bool quick = false) const {
+ for (size_t i = 0; i < node_count; i++) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(i);
+ if (m_data[offset] & BtreeRecord::kExtendedDuplicates) {
+ ham_assert((m_data[offset] & 0x7f) == 0);
+ }
+ }
+
+ m_index.check_integrity(node_count);
+ }
+
+ // Change the capacity; the capacity will be reduced, growing is not
+ // implemented. Which means that the data area must be copied; the offsets
+ // do not have to be changed.
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ // no capacity given? then try to find a good default one
+ if (capacity_hint == 0) {
+ capacity_hint = (new_range_size - m_index.get_next_offset(node_count)
+ - get_full_record_size()) / m_index.get_full_index_size();
+ if (capacity_hint <= node_count)
+ capacity_hint = node_count + 1;
+ }
+
+ // if there's not enough space for the new capacity then try to reduce
+ // the capacity
+ if (m_index.get_next_offset(node_count) + get_full_record_size()
+ + capacity_hint * m_index.get_full_index_size()
+ + UpfrontIndex::kPayloadOffset
+ > new_range_size)
+ capacity_hint = node_count + 1;
+
+ m_index.change_range_size(node_count, new_data_ptr, new_range_size,
+ capacity_hint);
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Returns true if there's not enough space for another record
+ bool requires_split(size_t node_count) {
+ // if the record is extremely small then make sure there's some headroom;
+ // this is required for DuplicateTable ids which are 64bit numbers
+ size_t required = get_full_record_size();
+ if (required < 10)
+ required = 10;
+ return (m_index.requires_split(node_count, required));
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseRecordList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_index,
+ m_index.get_capacity() * m_index.get_full_index_size());
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_unused,
+ m_range_size - get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) {
+ out << "(" << get_record_count(context, slot) << " records)";
+ }
+
+ private:
+ // Returns the number of records that are stored inline
+ uint32_t get_inline_record_count(int slot) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (m_data[offset] & 0x7f);
+ }
+
+ // Sets the number of records that are stored inline
+ void set_inline_record_count(int slot, size_t count) {
+ ham_assert(count <= 0x7f);
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ m_data[offset] &= BtreeRecord::kExtendedDuplicates;
+ m_data[offset] |= count;
+ }
+
+ // Returns a pointer to the record data
+ uint8_t *get_record_data(int slot, int duplicate_index = 0) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (&m_data[offset + 1 + m_record_size * duplicate_index]);
+ }
+
+ // Returns a pointer to the record data (const flavour)
+ const uint8_t *get_record_data(int slot,
+ int duplicate_index = 0) const {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (&m_data[offset + 1 + m_record_size * duplicate_index]);
+ }
+
+ // The constant length record size
+ size_t m_record_size;
+};
+
+//
+// RecordList for default records (8 bytes; either inline or a record id),
+// with duplicates
+//
+// Format for each slot:
+//
+// 1 byte meta data
+// bit 1 - 7: duplicate counter, if kExtendedDuplicates == 0
+// bit 8: kExtendedDuplicates
+// if kExtendedDuplicates == 0:
+// <counter> * 9 bytes
+// 1 byte flags (RecordFlag::*)
+// 8 byte data (either inline or record-id)
+// if kExtendedDuplicates == 1:
+// 8 byte: record id of the extended duplicate table
+//
+class DuplicateDefaultRecordList : public DuplicateRecordList
+{
+ public:
+ // Constructor
+ DuplicateDefaultRecordList(LocalDatabase *db, PBtreeNode *node)
+ : DuplicateRecordList(db, node, true, HAM_RECORD_SIZE_UNLIMITED) {
+ }
+
+ // Creates a new RecordList starting at |data|
+ void create(uint8_t *data, size_t range_size) {
+ m_data = data;
+ m_index.create(m_data, range_size, range_size / get_full_record_size());
+ }
+
+ // Calculates the required size for a range with the specified |capacity|
+ size_t get_required_range_size(size_t node_count) const {
+ return (m_index.get_required_range_size(node_count));
+ }
+
+ // Returns the actual key record including overhead
+ size_t get_full_record_size() const {
+ return (1 + 1 + 8 + m_index.get_full_index_size());
+ }
+
+ // Returns the number of duplicates
+ int get_record_count(Context *context, int slot) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ DuplicateTable *dt = get_duplicate_table(context, get_record_id(slot));
+ return ((int) dt->get_record_count());
+ }
+
+ return (m_data[offset] & 0x7f);
+ }
+
+ // Returns the size of a record
+ uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index = 0) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ DuplicateTable *dt = get_duplicate_table(context, get_record_id(slot));
+ return (dt->get_record_size(context, duplicate_index));
+ }
+
+ uint8_t *p = &m_data[offset + 1 + 9 * duplicate_index];
+ uint8_t flags = *(p++);
+ if (flags & BtreeRecord::kBlobSizeTiny)
+ return (p[sizeof(uint64_t) - 1]);
+ if (flags & BtreeRecord::kBlobSizeSmall)
+ return (sizeof(uint64_t));
+ if (flags & BtreeRecord::kBlobSizeEmpty)
+ return (0);
+
+ LocalEnvironment *env = m_db->lenv();
+ return (env->blob_manager()->get_blob_size(context, *(uint64_t *)p));
+ }
+
+ // Returns the full record and stores it in |dest|; memory must be
+ // allocated by the caller
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags, int duplicate_index) {
+ // forward to duplicate table?
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ DuplicateTable *dt = get_duplicate_table(context, get_record_id(slot));
+ dt->get_record(context, arena, record, flags, duplicate_index);
+ return;
+ }
+
+ ham_assert(duplicate_index < (int)get_inline_record_count(slot));
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ uint8_t *p = &m_data[offset + 1 + 9 * duplicate_index];
+ uint8_t record_flags = *(p++);
+
+ if (record_flags && (flags & HAM_PARTIAL)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record is "
+ "stored inline"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+
+ if (record_flags & BtreeRecord::kBlobSizeEmpty) {
+ record->data = 0;
+ record->size = 0;
+ return;
+ }
+
+ if (record_flags & BtreeRecord::kBlobSizeTiny) {
+ record->size = p[sizeof(uint64_t) - 1];
+ if (direct_access)
+ record->data = &p[0];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &p[0], record->size);
+ }
+ return;
+ }
+
+ if (record_flags & BtreeRecord::kBlobSizeSmall) {
+ record->size = sizeof(uint64_t);
+ if (direct_access)
+ record->data = &p[0];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &p[0], record->size);
+ }
+ return;
+ }
+
+ uint64_t blob_id = *(uint64_t *)p;
+
+ // the record is stored as a blob
+ LocalEnvironment *env = m_db->lenv();
+ env->blob_manager()->read(context, blob_id, record, flags, arena);
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index = 0) {
+ uint32_t chunk_offset = m_index.get_absolute_chunk_offset(slot);
+ uint32_t current_size = m_index.get_chunk_size(slot);
+
+ // if the slot was not yet allocated: allocate new space, initialize
+ // it and then overwrite the record
+ if (current_size == 0) {
+ duplicate_index = 0;
+ flags |= HAM_OVERWRITE;
+ chunk_offset = m_index.allocate_space(m_node->get_count(), slot, 1 + 9);
+ chunk_offset = m_index.get_absolute_offset(chunk_offset);
+ // clear the record flags
+ m_data[chunk_offset] = 0;
+ m_data[chunk_offset + 1] = BtreeRecord::kBlobSizeEmpty;
+
+ set_inline_record_count(slot, 1);
+ }
+
+ // if there's no duplicate table, but we're not able to add another
+ // duplicate then offload all existing duplicates to a table
+ uint32_t record_count = get_inline_record_count(slot);
+ size_t required_size = 1 + (record_count + 1) * 9;
+
+ if (!(m_data[chunk_offset] & BtreeRecord::kExtendedDuplicates)
+ && !(flags & HAM_OVERWRITE)) {
+ bool force_duptable = record_count >= m_duptable_threshold;
+ if (!force_duptable
+ && !m_index.can_allocate_space(m_node->get_count(),
+ required_size))
+ force_duptable = true;
+
+ // update chunk_offset - it might have been modified if
+ // m_index.can_allocate_space triggered a vacuumize() operation
+ chunk_offset = m_index.get_absolute_chunk_offset(slot);
+
+ // already too many duplicates, or the record does not fit? then
+ // allocate an overflow duplicate list and move all duplicates to
+ // this list
+ if (force_duptable) {
+ DuplicateTable *dt = new DuplicateTable(m_db, !m_store_flags,
+ HAM_RECORD_SIZE_UNLIMITED);
+ uint64_t table_id = dt->create(context, get_record_data(slot, 0),
+ record_count);
+ if (!m_duptable_cache)
+ m_duptable_cache.reset(new DuplicateTableCache());
+ (*m_duptable_cache)[table_id] = dt;
+
+ // write the id of the duplicate table
+ if (m_index.get_chunk_size(slot) < 8 + 1) {
+ // do not erase the slot because it obviously occupies so
+ // little space
+ m_index.allocate_space(m_node->get_count(), slot, 8 + 1);
+ chunk_offset = m_index.get_absolute_chunk_offset(slot);
+ }
+
+ m_data[chunk_offset] |= BtreeRecord::kExtendedDuplicates;
+ set_record_id(slot, table_id);
+ set_inline_record_count(slot, 0);
+
+ m_index.set_chunk_size(slot, 10);
+ m_index.increase_vacuumize_counter(m_index.get_chunk_size(slot) - 10);
+ m_index.invalidate_next_offset();
+
+ // fall through
+ }
+ }
+
+ // forward to duplicate table?
+ if (unlikely(m_data[chunk_offset] & BtreeRecord::kExtendedDuplicates)) {
+ uint64_t table_id = get_record_id(slot);
+ DuplicateTable *dt = get_duplicate_table(context, table_id);
+ uint64_t new_table_id = dt->set_record(context, duplicate_index, record,
+ flags, new_duplicate_index);
+ if (new_table_id != table_id) {
+ update_duplicate_table_id(dt, table_id, new_table_id);
+ set_record_id(slot, new_table_id);
+ }
+ return;
+ }
+
+ uint64_t overwrite_blob_id = 0;
+ uint8_t *record_flags = 0;
+ uint8_t *p = 0;
+
+ // the (inline) duplicate is overwritten
+ if (flags & HAM_OVERWRITE) {
+ record_flags = &m_data[chunk_offset + 1 + 9 * duplicate_index];
+ p = record_flags + 1;
+
+ // If a blob is overwritten with an inline record then the old blob
+ // has to be deleted
+ if (*record_flags == 0) {
+ if (record->size <= 8) {
+ uint64_t blob_id = *(uint64_t *)p;
+ if (blob_id)
+ m_db->lenv()->blob_manager()->erase(context, blob_id);
+ }
+ else
+ overwrite_blob_id = *(uint64_t *)p;
+ // fall through
+ }
+ // then jump to the code which performs the actual insertion
+ goto write_record;
+ }
+
+ // Allocate new space for the duplicate table, if required
+ if (current_size < required_size) {
+ uint8_t *oldp = &m_data[chunk_offset];
+ uint32_t old_chunk_size = m_index.get_chunk_size(slot);
+ uint32_t old_chunk_offset = m_index.get_chunk_offset(slot);
+ uint32_t new_chunk_offset = m_index.allocate_space(m_node->get_count(),
+ slot, required_size);
+ chunk_offset = m_index.get_absolute_offset(new_chunk_offset);
+ if (current_size > 0)
+ memmove(&m_data[chunk_offset], oldp, current_size);
+ if (old_chunk_offset != new_chunk_offset)
+ m_index.add_to_freelist(m_node->get_count(), old_chunk_offset,
+ old_chunk_size);
+ }
+
+ // adjust flags
+ if (flags & HAM_DUPLICATE_INSERT_BEFORE && duplicate_index == 0)
+ flags |= HAM_DUPLICATE_INSERT_FIRST;
+ else if (flags & HAM_DUPLICATE_INSERT_AFTER) {
+ if (duplicate_index == (int)record_count)
+ flags |= HAM_DUPLICATE_INSERT_LAST;
+ else {
+ flags |= HAM_DUPLICATE_INSERT_BEFORE;
+ duplicate_index++;
+ }
+ }
+
+ // handle overwrites or inserts/appends
+ if (flags & HAM_DUPLICATE_INSERT_FIRST) {
+ if (record_count > 0) {
+ uint8_t *ptr = &m_data[chunk_offset + 1];
+ memmove(&m_data[chunk_offset + 1 + 9], ptr, record_count * 9);
+ }
+ duplicate_index = 0;
+ }
+ else if (flags & HAM_DUPLICATE_INSERT_BEFORE) {
+ memmove(&m_data[chunk_offset + 1 + 9 * (duplicate_index + 1)],
+ &m_data[chunk_offset + 1 + 9 * duplicate_index],
+ (record_count - duplicate_index) * 9);
+ }
+ else // HAM_DUPLICATE_INSERT_LAST
+ duplicate_index = record_count;
+
+ set_inline_record_count(slot, record_count + 1);
+
+ record_flags = &m_data[chunk_offset + 1 + 9 * duplicate_index];
+ p = record_flags + 1;
+
+write_record:
+ if (record->size == 0) {
+ memcpy(p, "\0\0\0\0\0\0\0\0", 8);
+ *record_flags = BtreeRecord::kBlobSizeEmpty;
+ }
+ else if (record->size < sizeof(uint64_t)) {
+ p[sizeof(uint64_t) - 1] = (uint8_t)record->size;
+ memcpy(&p[0], record->data, record->size);
+ *record_flags = BtreeRecord::kBlobSizeTiny;
+ }
+ else if (record->size == sizeof(uint64_t)) {
+ memcpy(&p[0], record->data, record->size);
+ *record_flags = BtreeRecord::kBlobSizeSmall;
+ }
+ else {
+ LocalEnvironment *env = m_db->lenv();
+ *record_flags = 0;
+ uint64_t blob_id;
+ if (overwrite_blob_id)
+ blob_id = env->blob_manager()->overwrite(context,
+ overwrite_blob_id, record, flags);
+ else
+ blob_id = env->blob_manager()->allocate(context, record, flags);
+ memcpy(p, &blob_id, sizeof(blob_id));
+ }
+
+ if (new_duplicate_index)
+ *new_duplicate_index = duplicate_index;
+ }
+
+ // Erases a record
+ void erase_record(Context *context, int slot, int duplicate_index = 0,
+ bool all_duplicates = false) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+
+ // forward to external duplicate table?
+ if (unlikely(m_data[offset] & BtreeRecord::kExtendedDuplicates)) {
+ uint64_t table_id = get_record_id(slot);
+ DuplicateTable *dt = get_duplicate_table(context, table_id);
+ uint64_t new_table_id = dt->erase_record(context, duplicate_index,
+ all_duplicates);
+ if (new_table_id == 0) {
+ m_duptable_cache->erase(table_id);
+ set_record_id(slot, 0);
+ m_data[offset] &= ~BtreeRecord::kExtendedDuplicates;
+ delete dt;
+ }
+ else if (new_table_id != table_id) {
+ update_duplicate_table_id(dt, table_id, new_table_id);
+ set_record_id(slot, new_table_id);
+ }
+ return;
+ }
+
+ // erase the last duplicate?
+ uint32_t count = get_inline_record_count(slot);
+ if (count == 1 && duplicate_index == 0)
+ all_duplicates = true;
+
+ // adjust next_offset, if necessary. Note that get_next_offset() is
+ // called with a node_count of zero, which is valid (it avoids a
+ // recalculation in case there is no next_offset)
+ m_index.maybe_invalidate_next_offset(m_index.get_chunk_offset(slot)
+ + m_index.get_chunk_size(slot));
+
+ // erase all duplicates?
+ if (all_duplicates) {
+ for (uint32_t i = 0; i < count; i++) {
+ uint8_t *p = &m_data[offset + 1 + 9 * i];
+ if (!is_record_inline(*p)) {
+ m_db->lenv()->blob_manager()->erase(context, *(uint64_t *)(p + 1));
+ *(uint64_t *)(p + 1) = 0;
+ }
+ }
+ set_inline_record_count(slot, 0);
+ m_index.set_chunk_size(slot, 0);
+ }
+ else {
+ uint8_t *p = &m_data[offset + 1 + 9 * duplicate_index];
+ if (!is_record_inline(*p)) {
+ m_db->lenv()->blob_manager()->erase(context, *(uint64_t *)(p + 1));
+ *(uint64_t *)(p + 1) = 0;
+ }
+ if (duplicate_index < (int)count - 1)
+ memmove(&m_data[offset + 1 + 9 * duplicate_index],
+ &m_data[offset + 1 + 9 * (duplicate_index + 1)],
+ 9 * (count - duplicate_index - 1));
+ set_inline_record_count(slot, count - 1);
+ }
+ }
+
+ // Returns a record id
+ uint64_t get_record_id(int slot,
+ int duplicate_index = 0) const {
+ return (*(uint64_t *)get_record_data(slot, duplicate_index));
+ }
+
+ // Sets a record id
+ void set_record_id(int slot, uint64_t id) {
+ *(uint64_t *)get_record_data(slot, 0) = id;
+ }
+
+ // Checks the integrity of this node. Throws an exception if there is a
+ // violation.
+ void check_integrity(Context *context, size_t node_count) const {
+ for (size_t i = 0; i < node_count; i++) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(i);
+ if (m_data[offset] & BtreeRecord::kExtendedDuplicates) {
+ ham_assert((m_data[offset] & 0x7f) == 0);
+ }
+ }
+
+ m_index.check_integrity(node_count);
+ }
+
+ // Change the capacity; the capacity will be reduced, growing is not
+ // implemented. Which means that the data area must be copied; the offsets
+ // do not have to be changed.
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ // no capacity given? then try to find a good default one
+ if (capacity_hint == 0) {
+ capacity_hint = (new_range_size - m_index.get_next_offset(node_count)
+ - get_full_record_size()) / m_index.get_full_index_size();
+ if (capacity_hint <= node_count)
+ capacity_hint = node_count + 1;
+ }
+
+ // if there's not enough space for the new capacity then try to reduce
+ // the capacity
+ if (m_index.get_next_offset(node_count) + get_full_record_size()
+ + capacity_hint * m_index.get_full_index_size()
+ + UpfrontIndex::kPayloadOffset
+ > new_range_size)
+ capacity_hint = node_count + 1;
+
+ m_index.change_range_size(node_count, new_data_ptr, new_range_size,
+ capacity_hint);
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Returns true if there's not enough space for another record
+ bool requires_split(size_t node_count) {
+ // if the record is extremely small then make sure there's some headroom;
+ // this is required for DuplicateTable ids which are 64bit numbers
+ size_t required = get_full_record_size();
+ if (required < 10)
+ required = 10;
+ return (m_index.requires_split(node_count, required));
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseRecordList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_index,
+ m_index.get_capacity() * m_index.get_full_index_size());
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_unused,
+ m_range_size - get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) {
+ out << "(" << get_record_count(context, slot) << " records)";
+ }
+
+ private:
+ // Returns the number of records that are stored inline
+ uint32_t get_inline_record_count(int slot) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (m_data[offset] & 0x7f);
+ }
+
+ // Sets the number of records that are stored inline
+ void set_inline_record_count(int slot, size_t count) {
+ ham_assert(count <= 0x7f);
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ m_data[offset] &= BtreeRecord::kExtendedDuplicates;
+ m_data[offset] |= count;
+ }
+
+ // Returns a pointer to the record data (const flavour)
+ uint8_t *get_record_data(int slot, int duplicate_index = 0) {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (&m_data[offset + 1 + 9 * duplicate_index]);
+ }
+
+ // Returns a pointer to the record data (const flavour)
+ const uint8_t *get_record_data(int slot,
+ int duplicate_index = 0) const {
+ uint32_t offset = m_index.get_absolute_chunk_offset(slot);
+ return (&m_data[offset + 1 + 9 * duplicate_index]);
+ }
+};
+
+} // namespace DefLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_RECORDS_DUPLICATE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_inline.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_inline.h
new file mode 100644
index 0000000000..6a7ac4ff35
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_inline.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * RecordList for Inline Records
+ *
+ * Inline Records are records that are stored directly in the leaf node, and
+ * not in an external blob. Only for fixed length records.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_RECORDS_INLINE_H
+#define HAM_BTREE_RECORDS_INLINE_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_node.h"
+#include "3btree/btree_records_base.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The template classes in this file are wrapped in a separate namespace
+// to avoid naming clashes with btree_impl_default.h
+//
+namespace PaxLayout {
+
+class InlineRecordList : public BaseRecordList
+{
+ public:
+ enum {
+ // A flag whether this RecordList has sequential data
+ kHasSequentialData = 1
+ };
+
+ // Constructor
+ InlineRecordList(LocalDatabase *db, PBtreeNode *node)
+ : m_db(db), m_record_size(db->config().record_size), m_data(0) {
+ ham_assert(m_record_size != HAM_RECORD_SIZE_UNLIMITED);
+ }
+
+ // Sets the data pointer
+ void create(uint8_t *data, size_t range_size) {
+ m_data = (uint8_t *)data;
+ m_range_size = range_size;
+ }
+
+ // Opens an existing RecordList
+ void open(uint8_t *ptr, size_t range_size, size_t node_count) {
+ m_data = ptr;
+ m_range_size = range_size;
+ }
+
+ // Returns the actual record size including overhead
+ size_t get_full_record_size() const {
+ return (m_record_size);
+ }
+
+ // Calculates the required size for a range with the specified |capacity|
+ size_t get_required_range_size(size_t node_count) const {
+ return (node_count * m_record_size);
+ }
+
+ // Returns the record counter of a key
+ int get_record_count(Context *context, int slot) const {
+ return (1);
+ }
+
+ // Returns the record size
+ uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index = 0) const {
+ return (m_record_size);
+ }
+
+ // Returns the full record and stores it in |dest|; memory must be
+ // allocated by the caller
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index) const {
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ if (flags & HAM_PARTIAL) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record is "
+ "stored inline"));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+
+ // the record is stored inline
+ record->size = m_record_size;
+
+ if (m_record_size == 0)
+ record->data = 0;
+ else if (direct_access)
+ record->data = &m_data[slot * m_record_size];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &m_data[slot * m_record_size], record->size);
+ }
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index = 0) {
+ ham_assert(record->size == m_record_size);
+ // it's possible that the records have size 0 - then don't copy anything
+ if (m_record_size)
+ memcpy(&m_data[m_record_size * slot], record->data, m_record_size);
+ }
+
+ // Erases the record
+ void erase_record(Context *context, int slot, int duplicate_index = 0,
+ bool all_duplicates = true) {
+ if (m_record_size)
+ memset(&m_data[m_record_size * slot], 0, m_record_size);
+ }
+
+ // Erases a whole slot by shifting all larger records to the "left"
+ void erase(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count - 1)
+ memmove(&m_data[m_record_size * slot],
+ &m_data[m_record_size * (slot + 1)],
+ m_record_size * (node_count - slot - 1));
+ }
+
+ // Creates space for one additional record
+ void insert(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count) {
+ memmove(&m_data[m_record_size * (slot + 1)],
+ &m_data[m_record_size * slot],
+ m_record_size * (node_count - slot));
+ }
+ memset(&m_data[m_record_size * slot], 0, m_record_size);
+ }
+
+ // Copies |count| records from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count, InlineRecordList &dest,
+ size_t other_count, int dstart) {
+ memcpy(&dest.m_data[m_record_size * dstart],
+ &m_data[m_record_size * sstart],
+ m_record_size * (node_count - sstart));
+ }
+
+ // Returns the record id. Not required for fixed length leaf nodes
+ uint64_t get_record_id(int slot, int duplicate_index = 0)
+ const {
+ ham_assert(!"shouldn't be here");
+ return (0);
+ }
+
+ // Sets the record id. Not required for fixed length leaf nodes
+ void set_record_id(int slot, uint64_t ptr) {
+ ham_assert(!"shouldn't be here");
+ }
+
+ // Returns true if there's not enough space for another record
+ bool requires_split(size_t node_count) const {
+ if (m_range_size == 0)
+ return (false);
+ return ((node_count + 1) * m_record_size >= m_range_size);
+ }
+
+ // Change the capacity; for PAX layouts this just means copying the
+ // data from one place to the other
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ memmove(new_data_ptr, m_data, node_count * m_record_size);
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseRecordList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_unused,
+ m_range_size - get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) const {
+ out << "(" << get_record_size(context, slot) << " bytes)";
+ }
+
+ private:
+ // The parent database of this btree
+ LocalDatabase *m_db;
+
+ // The record size, as specified when the database was created
+ size_t m_record_size;
+
+ // The actual record data
+ uint8_t *m_data;
+};
+
+} // namespace PaxLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_RECORDS_INLINE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_internal.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_internal.h
new file mode 100644
index 0000000000..9773119991
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_records_internal.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Internal RecordList
+ *
+ * Only for records of internal nodes. Internal nodes only store page IDs,
+ * therefore this |InternalRecordList| is optimized for 64bit IDs
+ * (and is implemented as a uint64_t[] array).
+ *
+ * For file-based databases the page IDs are stored modulo page size, which
+ * results in smaller IDs. Small IDs can be compressed more efficiently
+ * (-> hamsterdb pro).
+ *
+ * In-memory based databases just store the raw pointers.
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_RECORDS_INTERNAL_H
+#define HAM_BTREE_RECORDS_INTERNAL_H
+
+#include "0root/root.h"
+
+#include <sstream>
+#include <iostream>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+#include "1base/dynamic_array.h"
+#include "2page/page.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_records_base.h"
+#include "3btree/btree_node.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The template classes in this file are wrapped in a separate namespace
+// to avoid naming clashes with btree_impl_default.h
+//
+namespace PaxLayout {
+
+class InternalRecordList : public BaseRecordList
+{
+ public:
+ enum {
+ // A flag whether this RecordList has sequential data
+ kHasSequentialData = 1
+ };
+
+ // Constructor
+ InternalRecordList(LocalDatabase *db, PBtreeNode *node)
+ : m_db(db), m_data(0) {
+ m_page_size = m_db->lenv()->config().page_size_bytes;
+ m_store_raw_id = (m_db->lenv()->config().flags
+ & HAM_IN_MEMORY) == HAM_IN_MEMORY;
+ }
+
+ // Sets the data pointer
+ void create(uint8_t *data, size_t range_size) {
+ m_data = (uint64_t *)data;
+ m_range_size = range_size;
+ }
+
+ // Opens an existing RecordList
+ void open(uint8_t *ptr, size_t range_size, size_t node_count) {
+ m_data = (uint64_t *)ptr;
+ m_range_size = range_size;
+ }
+
+ // Returns the actual size including overhead
+ size_t get_full_record_size() const {
+ return (sizeof(uint64_t));
+ }
+
+ // Calculates the required size for a range with the specified |capacity|
+ size_t get_required_range_size(size_t node_count) const {
+ return (node_count * sizeof(uint64_t));
+ }
+
+ // Returns the record counter of a key; this implementation does not
+ // support duplicates, therefore the record count is always 1
+ int get_record_count(Context *context, int slot) const {
+ return (1);
+ }
+
+ // Returns the record size
+ uint64_t get_record_size(Context *context, int slot,
+ int duplicate_index = 0) const {
+ return (sizeof(uint64_t));
+ }
+
+ // Returns the full record and stores it in |dest|; memory must be
+ // allocated by the caller
+ void get_record(Context *context, int slot, ByteArray *arena,
+ ham_record_t *record, uint32_t flags,
+ int duplicate_index) const {
+ bool direct_access = (flags & HAM_DIRECT_ACCESS) != 0;
+
+ // the record is stored inline
+ record->size = sizeof(uint64_t);
+
+ if (direct_access)
+ record->data = (void *)&m_data[slot];
+ else {
+ if ((record->flags & HAM_RECORD_USER_ALLOC) == 0) {
+ arena->resize(record->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, &m_data[slot], record->size);
+ }
+ }
+
+ // Updates the record of a key
+ void set_record(Context *context, int slot, int duplicate_index,
+ ham_record_t *record, uint32_t flags,
+ uint32_t *new_duplicate_index = 0) {
+ ham_assert(record->size == sizeof(uint64_t));
+ m_data[slot] = *(uint64_t *)record->data;
+ }
+
+ // Erases the record
+ void erase_record(Context *context, int slot, int duplicate_index = 0,
+ bool all_duplicates = true) {
+ m_data[slot] = 0;
+ }
+
+ // Erases a whole slot by shifting all larger records to the "left"
+ void erase(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count - 1)
+ memmove(&m_data[slot], &m_data[slot + 1],
+ sizeof(uint64_t) * (node_count - slot - 1));
+ }
+
+ // Creates space for one additional record
+ void insert(Context *context, size_t node_count, int slot) {
+ if (slot < (int)node_count) {
+ memmove(&m_data[slot + 1], &m_data[slot],
+ sizeof(uint64_t) * (node_count - slot));
+ }
+ m_data[slot] = 0;
+ }
+
+ // Copies |count| records from this[sstart] to dest[dstart]
+ void copy_to(int sstart, size_t node_count, InternalRecordList &dest,
+ size_t other_count, int dstart) {
+ memcpy(&dest.m_data[dstart], &m_data[sstart],
+ sizeof(uint64_t) * (node_count - sstart));
+ }
+
+ // Sets the record id
+ void set_record_id(int slot, uint64_t value) {
+ ham_assert(m_store_raw_id ? 1 : value % m_page_size == 0);
+ m_data[slot] = m_store_raw_id ? value : value / m_page_size;
+ }
+
+ // Returns the record id
+ uint64_t get_record_id(int slot,
+ int duplicate_index = 0) const {
+ ham_assert(duplicate_index == 0);
+ return (m_store_raw_id ? m_data[slot] : m_page_size * m_data[slot]);
+ }
+
+ // Returns true if there's not enough space for another record
+ bool requires_split(size_t node_count) const {
+ return ((node_count + 1) * sizeof(uint64_t) >= m_range_size);
+ }
+
+ // Change the capacity; for PAX layouts this just means copying the
+ // data from one place to the other
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t capacity_hint) {
+ if ((uint64_t *)new_data_ptr != m_data) {
+ memmove(new_data_ptr, m_data, node_count * sizeof(uint64_t));
+ m_data = (uint64_t *)new_data_ptr;
+ }
+ m_range_size = new_range_size;
+ }
+
+ // Fills the btree_metrics structure
+ void fill_metrics(btree_metrics_t *metrics, size_t node_count) {
+ BaseRecordList::fill_metrics(metrics, node_count);
+ BtreeStatistics::update_min_max_avg(&metrics->recordlist_unused,
+ m_range_size - get_required_range_size(node_count));
+ }
+
+ // Prints a slot to |out| (for debugging)
+ void print(Context *context, int slot, std::stringstream &out) const {
+ out << "(" << get_record_id(slot);
+ }
+
+ private:
+ // The parent database of this btree
+ LocalDatabase *m_db;
+
+ // The record data is an array of page IDs
+ uint64_t *m_data;
+
+ // The page size
+ size_t m_page_size;
+
+ // Store page ID % page size or the raw page ID?
+ bool m_store_raw_id;
+};
+
+} // namespace PaxLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_RECORDS_INTERNAL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.cc
new file mode 100644
index 0000000000..edd8c7b7a1
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+#include <stdio.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2page/page.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_node_proxy.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+BtreeStatistics::BtreeStatistics()
+ : m_append_count(0), m_prepend_count(0)
+{
+ memset(&m_last_leaf_pages[0], 0, sizeof(m_last_leaf_pages));
+ memset(&m_last_leaf_count[0], 0, sizeof(m_last_leaf_count));
+ memset(&m_keylist_range_size[0], 0, sizeof(m_keylist_range_size));
+ memset(&m_keylist_capacities[0], 0, sizeof(m_keylist_capacities));
+}
+
+void
+BtreeStatistics::find_succeeded(Page *page)
+{
+ uint64_t old = m_last_leaf_pages[kOperationFind];
+ if (old != page->get_address()) {
+ m_last_leaf_pages[kOperationFind] = 0;
+ m_last_leaf_count[kOperationFind] = 0;
+ }
+ else
+ m_last_leaf_count[kOperationFind]++;
+}
+
+void
+BtreeStatistics::find_failed()
+{
+ m_last_leaf_pages[kOperationFind] = 0;
+ m_last_leaf_count[kOperationFind] = 0;
+}
+
+void
+BtreeStatistics::insert_succeeded(Page *page, uint16_t slot)
+{
+ uint64_t old = m_last_leaf_pages[kOperationInsert];
+ if (old != page->get_address()) {
+ m_last_leaf_pages[kOperationInsert] = page->get_address();
+ m_last_leaf_count[kOperationInsert] = 0;
+ }
+ else
+ m_last_leaf_count[kOperationInsert]++;
+
+ BtreeNodeProxy *node;
+ node = page->get_db()->btree_index()->get_node_from_page(page);
+ ham_assert(node->is_leaf());
+
+ if (!node->get_right() && slot == node->get_count() - 1)
+ m_append_count++;
+ else
+ m_append_count = 0;
+
+ if (!node->get_left() && slot == 0)
+ m_prepend_count++;
+ else
+ m_prepend_count = 0;
+}
+
+void
+BtreeStatistics::insert_failed()
+{
+ m_last_leaf_pages[kOperationInsert] = 0;
+ m_last_leaf_count[kOperationInsert] = 0;
+ m_append_count = 0;
+ m_prepend_count = 0;
+}
+
+void
+BtreeStatistics::erase_succeeded(Page *page)
+{
+ uint64_t old = m_last_leaf_pages[kOperationErase];
+ if (old != page->get_address()) {
+ m_last_leaf_pages[kOperationErase] = page->get_address();
+ m_last_leaf_count[kOperationErase] = 0;
+ }
+ else
+ m_last_leaf_count[kOperationErase]++;
+}
+
+void
+BtreeStatistics::erase_failed()
+{
+ m_last_leaf_pages[kOperationErase] = 0;
+ m_last_leaf_count[kOperationErase] = 0;
+}
+
+void
+BtreeStatistics::reset_page(Page *page)
+{
+ for (int i = 0; i < kOperationMax; i++) {
+ m_last_leaf_pages[i] = 0;
+ m_last_leaf_count[i] = 0;
+ }
+}
+
+BtreeStatistics::FindHints
+BtreeStatistics::get_find_hints(uint32_t flags)
+{
+ BtreeStatistics::FindHints hints = {flags, flags, 0, false};
+
+ /* if the last 5 lookups hit the same page: reuse that page */
+ if (m_last_leaf_count[kOperationFind] >= 5) {
+ hints.try_fast_track = true;
+ hints.leaf_page_addr = m_last_leaf_pages[kOperationFind];
+ }
+
+ return (hints);
+}
+
+BtreeStatistics::InsertHints
+BtreeStatistics::get_insert_hints(uint32_t flags)
+{
+ InsertHints hints = {flags, flags, 0, 0, 0, 0, 0};
+
+ /* if the previous insert-operation replaced the upper bound (or
+ * lower bound) key then it was actually an append (or prepend) operation.
+ * in this case there's some probability that the next operation is also
+ * appending/prepending.
+ */
+ if (m_append_count > 0)
+ hints.flags |= HAM_HINT_APPEND;
+ else if (m_prepend_count > 0)
+ hints.flags |= HAM_HINT_PREPEND;
+
+ hints.append_count = m_append_count;
+ hints.prepend_count = m_prepend_count;
+
+ /* if the last 5 inserts hit the same page: reuse that page */
+ if (m_last_leaf_count[kOperationInsert] >= 5)
+ hints.leaf_page_addr = m_last_leaf_pages[kOperationInsert];
+
+ return (hints);
+}
+
+#define AVG(m) m._instances ? (m._total / m._instances) : 0
+
+void
+BtreeStatistics::finalize_metrics(btree_metrics_t *metrics)
+{
+ metrics->keys_per_page.avg = AVG(metrics->keys_per_page);
+ metrics->keylist_ranges.avg = AVG(metrics->keylist_ranges);
+ metrics->recordlist_ranges.avg = AVG(metrics->recordlist_ranges);
+ metrics->keylist_index.avg = AVG(metrics->keylist_index);
+ metrics->recordlist_index.avg = AVG(metrics->recordlist_index);
+ metrics->keylist_unused.avg = AVG(metrics->keylist_unused);
+ metrics->recordlist_unused.avg = AVG(metrics->recordlist_unused);
+ metrics->keylist_blocks_per_page.avg = AVG(metrics->keylist_blocks_per_page);
+ metrics->keylist_block_sizes.avg = AVG(metrics->keylist_block_sizes);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.h
new file mode 100644
index 0000000000..66c3f21ab9
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_stats.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree find/insert/erase statistical structures, functions and macros
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_STATS_H
+#define HAM_BTREE_STATS_H
+
+#include "0root/root.h"
+
+#include <limits>
+
+#include "ham/hamsterdb_int.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Page;
+
+class BtreeStatistics {
+ public:
+ // Indices into find/insert/erase specific statistics
+ enum {
+ kOperationFind = 0,
+ kOperationInsert = 1,
+ kOperationErase = 2,
+ kOperationMax = 3
+ };
+
+ struct FindHints {
+ // the original flags of ham_find
+ uint32_t original_flags;
+
+ // the modified flags
+ uint32_t flags;
+
+ // page/btree leaf to check first
+ uint64_t leaf_page_addr;
+
+ // check specified btree leaf node page first
+ bool try_fast_track;
+ };
+
+ struct InsertHints {
+ // the original flags of ham_insert
+ uint32_t original_flags;
+
+ // the modified flags
+ uint32_t flags;
+
+ // page/btree leaf to check first
+ uint64_t leaf_page_addr;
+
+ // the processed leaf page
+ Page *processed_leaf_page;
+
+ // the slot in that page
+ uint16_t processed_slot;
+
+ // count the number of appends
+ size_t append_count;
+
+ // count the number of prepends
+ size_t prepend_count;
+ };
+
+ // Constructor
+ BtreeStatistics();
+
+ // Returns the btree hints for ham_find
+ FindHints get_find_hints(uint32_t flags);
+
+ // Returns the btree hints for insert
+ InsertHints get_insert_hints(uint32_t flags);
+
+ // Reports that a ham_find/ham_cusor_find succeeded
+ void find_succeeded(Page *page);
+
+ // Reports that a ham_find/ham_cursor_find failed
+ void find_failed();
+
+ // Reports that a ham_insert/ham_cursor_insert succeeded
+ void insert_succeeded(Page *page, uint16_t slot);
+
+ // Reports that a ham_insert/ham_cursor_insert failed
+ void insert_failed();
+
+ // Reports that a ham_erase/ham_cusor_erase succeeded
+ void erase_succeeded(Page *page);
+
+ // Reports that a ham_erase/ham_cursor_erase failed
+ void erase_failed();
+
+ // Resets the statistics for a single page
+ void reset_page(Page *page);
+
+ // Keep track of the KeyList range size
+ void set_keylist_range_size(bool leaf, size_t size) {
+ m_keylist_range_size[(int)leaf] = size;
+ }
+
+ // Retrieves the KeyList range size
+ size_t get_keylist_range_size(bool leaf) const {
+ return (m_keylist_range_size[(int)leaf]);
+ }
+
+ // Keep track of the KeyList capacities
+ void set_keylist_capacities(bool leaf, size_t capacity) {
+ m_keylist_capacities[(int)leaf] = capacity;
+ }
+
+ // Retrieves the KeyList capacities size
+ size_t get_keylist_capacities(bool leaf) const {
+ return (m_keylist_capacities[(int)leaf]);
+ }
+
+ // Calculate the "average" values
+ static void finalize_metrics(btree_metrics_t *metrics);
+
+ // Update a min_max_avg structure
+ static void update_min_max_avg(min_max_avg_u32_t *data, uint32_t value) {
+ // first update? then perform initialization
+ if (data->_instances == 0)
+ data->min = std::numeric_limits<uint32_t>::max();
+
+ if (data->min > value)
+ data->min = value;
+ if (data->max < value)
+ data->max = value;
+ data->_total += value;
+ data->_instances++;
+ }
+
+ private:
+ // last leaf page for find/insert/erase
+ uint64_t m_last_leaf_pages[kOperationMax];
+
+ // count of how often this leaf page was used
+ size_t m_last_leaf_count[kOperationMax];
+
+ // count the number of appends
+ size_t m_append_count;
+
+ // count the number of prepends
+ size_t m_prepend_count;
+
+ // the range size of the KeyList
+ size_t m_keylist_range_size[2];
+
+ // the capacities of the KeyList
+ size_t m_keylist_capacities[2];
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_STATS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.cc
new file mode 100644
index 0000000000..07d6cf61d4
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.cc
@@ -0,0 +1,436 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "3page_manager/page_manager.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_stats.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_update.h"
+#include "3btree/btree_node_proxy.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/* a unittest hook triggered when a page is split */
+void (*g_BTREE_INSERT_SPLIT_HOOK)(void);
+
+// Traverses the tree, looking for the leaf with the specified |key|. Will
+// split or merge nodes while descending.
+// Returns the leaf page and the |parent| of the leaf (can be null if
+// there is no parent).
+Page *
+BtreeUpdateAction::traverse_tree(const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints,
+ Page **parent)
+{
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ Page *page = env->page_manager()->fetch(m_context,
+ m_btree->get_root_address());
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ *parent = 0;
+
+ // if the root page is empty with children then collapse it
+ if (node->get_count() == 0 && !node->is_leaf()) {
+ page = collapse_root(page);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ int slot;
+
+ // now walk down the tree
+ while (!node->is_leaf()) {
+ // is a split required?
+ if (node->requires_split(m_context)) {
+ page = split_page(page, *parent, key, hints);
+ node = m_btree->get_node_from_page(page);
+ }
+
+ // get the child page
+ Page *sib_page = 0;
+ Page *child_page = m_btree->find_child(m_context, page, key, 0, &slot);
+ BtreeNodeProxy *child_node = m_btree->get_node_from_page(child_page);
+
+ // We can merge this child with the RIGHT sibling iff...
+ // 1. it's not the right-most slot (and therefore the right sibling has
+ // the same parent as the child)
+ // 2. the child is a leaf!
+ // 3. it's empty or has too few elements
+ // 4. its right sibling is also empty
+ if (slot < (int)node->get_count() - 1
+ && child_node->is_leaf()
+ && child_node->requires_merge()
+ && child_node->get_right() != 0) {
+ sib_page = env->page_manager()->fetch(m_context,
+ child_node->get_right(),
+ PageManager::kOnlyFromCache);
+ if (sib_page != 0) {
+ BtreeNodeProxy *sib_node = m_btree->get_node_from_page(sib_page);
+ if (sib_node->requires_merge()) {
+ merge_page(child_page, sib_page);
+ // also remove the link to the sibling from the parent
+ node->erase(m_context, slot + 1);
+ page->set_dirty(true);
+ }
+ }
+ }
+
+ // We can also merge this child with the LEFT sibling iff...
+ // 1. it's not the left-most slot
+ // 2. the child is a leaf!
+ // 3. it's empty or has too few elements
+ // 4. its left sibling is also empty
+ else if (slot > 0
+ && child_node->is_leaf()
+ && child_node->requires_merge()
+ && child_node->get_left() != 0) {
+ sib_page = env->page_manager()->fetch(m_context,
+ child_node->get_left(),
+ PageManager::kOnlyFromCache);
+ if (sib_page != 0) {
+ BtreeNodeProxy *sib_node = m_btree->get_node_from_page(sib_page);
+ if (sib_node->requires_merge()) {
+ merge_page(sib_page, child_page);
+ // also remove the link to the sibling from the parent
+ node->erase(m_context, slot);
+ page->set_dirty(true);
+ // continue traversal with the sibling
+ child_page = sib_page;
+ child_node = sib_node;
+ }
+ }
+ }
+
+ *parent = page;
+
+ // go down one level in the tree
+ page = child_page;
+ node = child_node;
+ }
+
+ return (page);
+}
+
+Page *
+BtreeUpdateAction::merge_page(Page *page, Page *sibling)
+{
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ BtreeNodeProxy *sib_node = m_btree->get_node_from_page(sibling);
+
+ if (sib_node->is_leaf())
+ BtreeCursor::uncouple_all_cursors(m_context, sibling, 0);
+
+ node->merge_from(m_context, sib_node);
+ page->set_dirty(true);
+
+ // fix the linked list
+ node->set_right(sib_node->get_right());
+ if (node->get_right()) {
+ Page *new_right = env->page_manager()->fetch(m_context, node->get_right());
+ BtreeNodeProxy *new_right_node = m_btree->get_node_from_page(new_right);
+ new_right_node->set_left(page->get_address());
+ new_right->set_dirty(true);
+ }
+
+ m_btree->get_statistics()->reset_page(sibling);
+ m_btree->get_statistics()->reset_page(page);
+ env->page_manager()->del(m_context, sibling);
+
+ BtreeIndex::ms_btree_smo_merge++;
+ return (page);
+}
+
+Page *
+BtreeUpdateAction::collapse_root(Page *root_page)
+{
+ LocalEnvironment *env = root_page->get_db()->lenv();
+ BtreeNodeProxy *node = m_btree->get_node_from_page(root_page);
+ ham_assert(node->get_count() == 0);
+
+ m_btree->get_statistics()->reset_page(root_page);
+ m_btree->set_root_address(m_context, node->get_ptr_down());
+ Page *header = env->page_manager()->fetch(m_context, 0);
+ header->set_dirty(true);
+
+ Page *new_root = env->page_manager()->fetch(m_context,
+ m_btree->get_root_address());
+ new_root->set_type(Page::kTypeBroot);
+ env->page_manager()->del(m_context, root_page);
+ return (new_root);
+}
+
+Page *
+BtreeUpdateAction::split_page(Page *old_page, Page *parent,
+ const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints)
+{
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ m_btree->get_statistics()->reset_page(old_page);
+ BtreeNodeProxy *old_node = m_btree->get_node_from_page(old_page);
+
+ /* allocate a new page and initialize it */
+ Page *new_page = env->page_manager()->alloc(m_context, Page::kTypeBindex);
+ {
+ PBtreeNode *node = PBtreeNode::from_page(new_page);
+ node->set_flags(old_node->is_leaf() ? PBtreeNode::kLeafNode : 0);
+ }
+ BtreeNodeProxy *new_node = m_btree->get_node_from_page(new_page);
+
+ /* no parent page? then we're splitting the root page. allocate
+ * a new root page */
+ if (!parent)
+ parent = allocate_new_root(old_page);
+
+ Page *to_return = 0;
+ ByteArray pivot_key_arena;
+ ham_key_t pivot_key = {0};
+
+ /* if the key is appended then don't split the page; simply allocate
+ * a new page and insert the new key. */
+ int pivot = 0;
+ if (hints.flags & HAM_HINT_APPEND && old_node->is_leaf()) {
+ int cmp = old_node->compare(m_context, key, old_node->get_count() - 1);
+ if (cmp == +1) {
+ to_return = new_page;
+ pivot_key = *key;
+ pivot = old_node->get_count();
+ }
+ }
+
+ /* no append? then calculate the pivot key and perform the split */
+ if (pivot != (int)old_node->get_count()) {
+ pivot = get_pivot(old_node, key, hints);
+
+ /* and store the pivot key for later */
+ old_node->get_key(m_context, pivot, &pivot_key_arena, &pivot_key);
+
+ /* leaf page: uncouple all cursors */
+ if (old_node->is_leaf())
+ BtreeCursor::uncouple_all_cursors(m_context, old_page, pivot);
+ /* internal page: fix the ptr_down of the new page
+ * (it must point to the ptr of the pivot key) */
+ else
+ new_node->set_ptr_down(old_node->get_record_id(m_context, pivot));
+
+ /* now move some of the key/rid-tuples to the new page */
+ old_node->split(m_context, new_node, pivot);
+
+ // if the new key is >= the pivot key then continue with the right page,
+ // otherwise continue with the left page
+ to_return = m_btree->compare_keys((ham_key_t *)key, &pivot_key) >= 0
+ ? new_page
+ : old_page;
+ }
+
+ /* update the parent page */
+ BtreeNodeProxy *parent_node = m_btree->get_node_from_page(parent);
+ uint64_t rid = new_page->get_address();
+ ham_record_t record = ham_make_record(&rid, sizeof(rid));
+ ham_status_t st = insert_in_page(parent, &pivot_key, &record, hints);
+ if (st)
+ throw Exception(st);
+ /* new root page? then also set ptr_down! */
+ if (parent_node->get_count() == 0)
+ parent_node->set_ptr_down(old_page->get_address());
+
+ /* fix the double-linked list of pages, and mark the pages as dirty */
+ if (old_node->get_right()) {
+ Page *sib_page = env->page_manager()->fetch(m_context,
+ old_node->get_right());
+ BtreeNodeProxy *sib_node = m_btree->get_node_from_page(sib_page);
+ sib_node->set_left(new_page->get_address());
+ sib_page->set_dirty(true);
+ }
+ new_node->set_left(old_page->get_address());
+ new_node->set_right(old_node->get_right());
+ old_node->set_right(new_page->get_address());
+ new_page->set_dirty(true);
+ old_page->set_dirty(true);
+
+ BtreeIndex::ms_btree_smo_split++;
+
+ if (g_BTREE_INSERT_SPLIT_HOOK)
+ g_BTREE_INSERT_SPLIT_HOOK();
+
+ return (to_return);
+}
+
+Page *
+BtreeUpdateAction::allocate_new_root(Page *old_root)
+{
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ Page *new_root = env->page_manager()->alloc(m_context, Page::kTypeBroot);
+
+ /* insert the pivot element and set ptr_down */
+ BtreeNodeProxy *new_node = m_btree->get_node_from_page(new_root);
+ new_node->set_ptr_down(old_root->get_address());
+
+ m_btree->set_root_address(m_context, new_root->get_address());
+ Page *header = env->page_manager()->fetch(m_context, 0);
+ header->set_dirty(true);
+
+ old_root->set_type(Page::kTypeBindex);
+
+ return (new_root);
+}
+
+int
+BtreeUpdateAction::get_pivot(BtreeNodeProxy *old_node, const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints) const
+{
+ uint32_t old_count = old_node->get_count();
+ ham_assert(old_count > 2);
+
+ bool pivot_at_end = false;
+ if (hints.flags & HAM_HINT_APPEND && hints.append_count > 5)
+ pivot_at_end = true;
+ else if (old_node->get_right() == 0) {
+ int cmp = old_node->compare(m_context, key, old_node->get_count() - 1);
+ if (cmp > 0)
+ pivot_at_end = true;
+ }
+
+ /* The position of the pivot key depends on the previous inserts; if most
+ * of them were appends then pick a pivot key at the "end" of the node */
+ int pivot;
+ if (pivot_at_end || hints.append_count > 30)
+ pivot = old_count - 2;
+ else if (hints.append_count > 10)
+ pivot = (int)(old_count / 100.f * 66);
+ else if (hints.prepend_count > 10)
+ pivot = (int)(old_count / 100.f * 33);
+ else if (hints.prepend_count > 30)
+ pivot = 2;
+ else
+ pivot = old_count / 2;
+
+ ham_assert(pivot > 0 && pivot <= (int)old_count - 2);
+
+ return (pivot);
+}
+
+ham_status_t
+BtreeUpdateAction::insert_in_page(Page *page, ham_key_t *key,
+ ham_record_t *record,
+ BtreeStatistics::InsertHints &hints,
+ bool force_prepend, bool force_append)
+{
+ bool exists = false;
+
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+
+ int flags = 0;
+ if (force_prepend)
+ flags |= PBtreeNode::kInsertPrepend;
+ if (force_append)
+ flags |= PBtreeNode::kInsertAppend;
+
+ PBtreeNode::InsertResult result = node->insert(m_context, key, flags);
+ switch (result.status) {
+ case HAM_DUPLICATE_KEY:
+ if (hints.flags & HAM_OVERWRITE) {
+ /* key already exists; only overwrite the data */
+ if (!node->is_leaf())
+ return (HAM_SUCCESS);
+ }
+ else if (!(hints.flags & HAM_DUPLICATE))
+ return (HAM_DUPLICATE_KEY);
+ /* do NOT shift keys up to make room; just overwrite the
+ * current [slot] */
+ exists = true;
+ break;
+ case HAM_SUCCESS:
+ break;
+ default:
+ return (result.status);
+ }
+
+ uint32_t new_duplicate_id = 0;
+ if (exists) {
+ if (node->is_leaf()) {
+ // overwrite record blob
+ node->set_record(m_context, result.slot, record, m_duplicate_index,
+ hints.flags, &new_duplicate_id);
+
+ hints.processed_leaf_page = page;
+ hints.processed_slot = result.slot;
+ }
+ else {
+ // overwrite record id
+ ham_assert(record->size == sizeof(uint64_t));
+ node->set_record_id(m_context, result.slot, *(uint64_t *)record->data);
+ }
+ }
+ // key does not exist and has to be inserted or appended
+ else {
+ try {
+ if (node->is_leaf()) {
+ // allocate record id
+ node->set_record(m_context, result.slot, record, m_duplicate_index,
+ hints.flags, &new_duplicate_id);
+
+ hints.processed_leaf_page = page;
+ hints.processed_slot = result.slot;
+ }
+ else {
+ // set the internal record id
+ ham_assert(record->size == sizeof(uint64_t));
+ node->set_record_id(m_context, result.slot, *(uint64_t *)record->data);
+ }
+ }
+ // In case of an error: undo the insert. This happens very rarely but
+ // it's possible, i.e. if the BlobManager fails to allocate storage.
+ catch (Exception &ex) {
+ if (result.slot < (int)node->get_count())
+ node->erase(m_context, result.slot);
+ throw ex;
+ }
+ }
+
+ page->set_dirty(true);
+
+ // if this update was triggered with a cursor (and this is a leaf node):
+ // couple it to the inserted key
+ // TODO only when performing an insert(), not an erase()!
+ if (m_cursor && node->is_leaf()) {
+ m_cursor->get_parent()->set_to_nil(Cursor::kBtree);
+ ham_assert(m_cursor->get_state() == BtreeCursor::kStateNil);
+ m_cursor->couple_to_page(page, result.slot, new_duplicate_id);
+ }
+
+ return (HAM_SUCCESS);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.h
new file mode 100644
index 0000000000..51176980fe
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_update.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_BTREE_UPDATE_H
+#define HAM_BTREE_UPDATE_H
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class BtreeIndex;
+class BtreeCursor;
+
+/*
+ * Base class for updates; derived for erasing and inserting keys.
+ */
+class BtreeUpdateAction
+{
+ public:
+ // Constructor
+ BtreeUpdateAction(BtreeIndex *btree, Context *context, BtreeCursor *cursor,
+ uint32_t duplicate_index)
+ : m_btree(btree), m_context(context), m_cursor(cursor),
+ m_duplicate_index(duplicate_index) {
+ }
+
+ // Traverses the tree, looking for the leaf with the specified |key|. Will
+ // split or merge nodes while descending.
+ // Returns the leaf page and the |parent| of the leaf (can be null if
+ // there is no parent).
+ Page *traverse_tree(const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints, Page **parent);
+
+ // Calculates the pivot index of a split.
+ //
+ // For databases with sequential access (this includes recno databases):
+ // do not split in the middle, but at the very end of the page.
+ //
+ // If this page is the right-most page in the index, and the new key is
+ // inserted at the very end, then we select the same pivot as for
+ // sequential access.
+ int get_pivot(BtreeNodeProxy *old_node, const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints) const;
+
+ // Splits |page| and updates the |parent|. If |parent| is null then
+ // it's assumed that |page| is the root node.
+ // Returns the new page in the path for |key|; caller can immediately
+ // continue the traversal.
+ Page *split_page(Page *old_page, Page *parent, const ham_key_t *key,
+ BtreeStatistics::InsertHints &hints);
+
+ // Allocates a new root page and sets it up in the btree
+ Page *allocate_new_root(Page *old_root);
+
+ // Inserts a key in a page
+ ham_status_t insert_in_page(Page *page, ham_key_t *key,
+ ham_record_t *record,
+ BtreeStatistics::InsertHints &hints,
+ bool force_prepend = false, bool force_append = false);
+
+ protected:
+ // the current btree
+ BtreeIndex *m_btree;
+
+ // The caller's Context
+ Context *m_context;
+
+ // the current cursor
+ BtreeCursor *m_cursor;
+
+ // the duplicate index (in case the update is for a duplicate key)
+ // 1-based (if 0 then this update is not for a duplicate)
+ uint32_t m_duplicate_index;
+
+ private:
+ /* Merges the |sibling| into |page|, returns the merged page and moves
+ * the sibling to the freelist */
+ Page *merge_page(Page *page, Page *sibling);
+
+ /* collapse the root node; returns the new root */
+ Page *collapse_root(Page *root_page);
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_BTREE_UPDATE_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visit.cc b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visit.cc
new file mode 100644
index 0000000000..05cd2603e5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visit.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * btree enumeration; visits each node
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3page_manager/page_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_node_proxy.h"
+#include "3btree/btree_visitor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class BtreeVisitAction
+{
+ public:
+ BtreeVisitAction(BtreeIndex *btree, Context *context, BtreeVisitor &visitor,
+ bool visit_internal_nodes)
+ : m_btree(btree), m_context(context), m_visitor(visitor),
+ m_visit_internal_nodes(visit_internal_nodes) {
+ ham_assert(m_btree->get_root_address() != 0);
+ }
+
+ void run() {
+ LocalDatabase *db = m_btree->get_db();
+ LocalEnvironment *env = db->lenv();
+
+ uint32_t pm_flags = 0;
+ if (m_visitor.is_read_only())
+ pm_flags = PageManager::kReadOnly;
+
+ // get the root page of the tree
+ Page *page = env->page_manager()->fetch(m_context,
+ m_btree->get_root_address(), pm_flags);
+
+ // go down to the leaf
+ while (page) {
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ uint64_t ptr_down = node->get_ptr_down();
+
+ // visit internal nodes as well?
+ if (ptr_down != 0 && m_visit_internal_nodes) {
+ while (page) {
+ node = m_btree->get_node_from_page(page);
+ m_visitor(m_context, node);
+
+ // load the right sibling
+ uint64_t right = node->get_right();
+ if (right)
+ page = env->page_manager()->fetch(m_context, right, pm_flags);
+ else
+ page = 0;
+ }
+ }
+
+ // follow the pointer to the smallest child
+ if (ptr_down)
+ page = env->page_manager()->fetch(m_context, ptr_down, pm_flags);
+ else
+ break;
+ }
+
+ ham_assert(page != 0);
+
+ // now visit all leaf nodes
+ while (page) {
+ BtreeNodeProxy *node = m_btree->get_node_from_page(page);
+ uint64_t right = node->get_right();
+
+ m_visitor(m_context, node);
+
+ /* follow the pointer to the right sibling */
+ if (right)
+ page = env->page_manager()->fetch(m_context, right, pm_flags);
+ else
+ break;
+ }
+ }
+
+ private:
+ BtreeIndex *m_btree;
+ Context *m_context;
+ BtreeVisitor &m_visitor;
+ bool m_visit_internal_nodes;
+};
+
+void
+BtreeIndex::visit_nodes(Context *context, BtreeVisitor &visitor,
+ bool visit_internal_nodes)
+{
+ BtreeVisitAction bva(this, context, visitor, visit_internal_nodes);
+ bva.run();
+}
+
+} // namespace hamsterdb
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visitor.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visitor.h
new file mode 100644
index 0000000000..19770a9e70
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/btree_visitor.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_BTREE_VISITOR_H
+#define HAM_BTREE_VISITOR_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb_ola.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The ScanVisitor is the callback implementation for the scan call.
+// It will either receive single keys or multiple keys in an array.
+//
+struct ScanVisitor {
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) = 0;
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) = 0;
+
+ // Assigns the internal result to |result|
+ virtual void assign_result(hola_result_t *result) = 0;
+};
+
+struct Context;
+class BtreeNodeProxy;
+
+//
+// The BtreeVisitor is the callback implementation for the visit call.
+// It will visit each node instead of each key.
+//
+struct BtreeVisitor {
+ // Specifies if the visitor modifies the node
+ virtual bool is_read_only() const = 0;
+
+ // called for each node
+ virtual void operator()(Context *context, BtreeNodeProxy *node) = 0;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_VISITOR_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3btree/upfront_index.h b/plugins/Dbx_kv/src/hamsterdb/src/3btree/upfront_index.h
new file mode 100644
index 0000000000..b8aad1396d
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3btree/upfront_index.h
@@ -0,0 +1,684 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A small index which manages variable length buffers. Used to manage
+ * variable length keys or records.
+ *
+ * The UpfrontIndex manages a range of bytes, organized in variable length
+ * |chunks|, assigned at initialization time when calling |allocate()|
+ * or |open()|.
+ *
+ * These chunks are organized in |slots|, each slot stores the offset and
+ * the size of the chunk data. The offset is stored as 16- or 32-bit, depending
+ * on the page size. The size is always a 16bit integer.
+ *
+ * The number of used slots is not stored in the UpfrontIndex, since it is
+ * already managed in the caller (this is equal to |PBtreeNode::get_count()|).
+ * Therefore you will see a lot of methods receiving a |node_count| parameter.
+ *
+ * Deleted chunks are moved to a |freelist|, which is simply a list of slots
+ * directly following those slots that are in use.
+ *
+ * In addition, the UpfrontIndex keeps track of the unused space at the end
+ * of the range (via |get_next_offset()|), in order to allow a fast
+ * allocation of space.
+ *
+ * The UpfrontIndex stores metadata at the beginning:
+ * [0..3] freelist count
+ * [4..7] next offset
+ * [8..11] capacity
+ *
+ * Data is stored in the following layout:
+ * |metadata|slot1|slot2|...|slotN|free1|free2|...|freeM|data1|data2|...|dataN|
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_BTREE_UPFRONT_INDEX_H
+#define HAM_BTREE_UPFRONT_INDEX_H
+
+#include "0root/root.h"
+
+#include <algorithm>
+#include <vector>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1globals/globals.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+namespace DefLayout {
+
+/*
+ * A helper class to sort ranges; used during validation of the up-front
+ * index in check_index_integrity()
+ */
+struct SortHelper {
+ uint32_t offset;
+ int slot;
+
+ bool operator<(const SortHelper &rhs) const {
+ return (offset < rhs.offset);
+ }
+};
+
+static bool
+sort_by_offset(const SortHelper &lhs, const SortHelper &rhs) {
+ return (lhs.offset < rhs.offset);
+}
+
+class UpfrontIndex
+{
+ enum {
+ // width of the 'size' field
+ kSizeofSize = 1 // 1 byte - max chunk size is 255
+ };
+
+ public:
+ enum {
+ // for freelist_count, next_offset, capacity
+ kPayloadOffset = 12,
+
+ // minimum capacity of the index
+ kMinimumCapacity = 16
+ };
+
+ // Constructor; creates an empty index which needs to be initialized
+ // with |create()| or |open()|.
+ UpfrontIndex(LocalDatabase *db)
+ : m_data(0), m_range_size(0), m_vacuumize_counter(0) {
+ size_t page_size = db->lenv()->config().page_size_bytes;
+ if (page_size <= 64 * 1024)
+ m_sizeof_offset = 2;
+ else
+ m_sizeof_offset = 4;
+ }
+
+ // Initialization routine; sets data pointer, range size and the
+ // initial capacity.
+ void create(uint8_t *data, size_t range_size, size_t capacity) {
+ m_data = data;
+ m_range_size = range_size;
+ set_capacity(capacity);
+ clear();
+ }
+
+ // "Opens" an existing index from memory. This method sets the data
+ // pointer and initializes itself.
+ void open(uint8_t *data, size_t range_size) {
+ m_data = data;
+ m_range_size = range_size;
+ // the vacuumize-counter is not persisted, therefore
+ // pretend that the counter is very high; in worst case this will cause
+ // an invalid call to vacuumize(), which is not a problem
+ if (get_freelist_count())
+ m_vacuumize_counter = m_range_size;
+ }
+
+ // Changes the range size and capacity of the index; used to resize the
+ // KeyList or RecordList
+ void change_range_size(size_t node_count, uint8_t *new_data_ptr,
+ size_t new_range_size, size_t new_capacity) {
+ if (!new_data_ptr)
+ new_data_ptr = m_data;
+ if (!new_range_size)
+ new_range_size = m_range_size;
+
+ // get rid of the freelist and collect the garbage
+ if (get_freelist_count() > 0)
+ vacuumize(node_count);
+ ham_assert(get_freelist_count() == 0);
+
+ size_t used_data_size = get_next_offset(node_count);
+ size_t old_capacity = get_capacity();
+ uint8_t *src = &m_data[kPayloadOffset
+ + old_capacity * get_full_index_size()];
+ uint8_t *dst = &new_data_ptr[kPayloadOffset
+ + new_capacity * get_full_index_size()];
+
+ // if old range == new range then leave
+ if (m_range_size == new_range_size
+ && old_capacity == new_capacity
+ && m_data == new_data_ptr )
+ return;
+
+ ham_assert(dst - new_data_ptr + used_data_size <= new_range_size);
+
+ // shift "to the right"? Then first move the data and afterwards
+ // the index
+ if (dst > src) {
+ memmove(dst, src, used_data_size);
+ memmove(new_data_ptr, m_data,
+ kPayloadOffset + new_capacity * get_full_index_size());
+ }
+ // vice versa otherwise
+ else if (dst <= src) {
+ if (new_data_ptr != m_data)
+ memmove(new_data_ptr, m_data,
+ kPayloadOffset + new_capacity * get_full_index_size());
+ memmove(dst, src, used_data_size);
+ }
+
+ m_data = new_data_ptr;
+ m_range_size = new_range_size;
+ set_capacity(new_capacity);
+ set_freelist_count(0);
+ set_next_offset(used_data_size); // has dependency to get_freelist_count()
+ }
+
+ // Calculates the required size for a range
+ size_t get_required_range_size(size_t node_count) const {
+ return (UpfrontIndex::kPayloadOffset
+ + get_capacity() * get_full_index_size()
+ + get_next_offset(node_count));
+ }
+
+ // Returns the size of a single index entry
+ size_t get_full_index_size() const {
+ return (m_sizeof_offset + kSizeofSize);
+ }
+
+ // Transforms a relative offset of the payload data to an absolute offset
+ // in |m_data|
+ uint32_t get_absolute_offset(uint32_t offset) const {
+ return (offset
+ + kPayloadOffset
+ + get_capacity() * get_full_index_size());
+ }
+
+ // Returns the absolute start offset of a chunk
+ uint32_t get_absolute_chunk_offset(int slot) const {
+ return (get_absolute_offset(get_chunk_offset(slot)));
+ }
+
+ // Returns the relative start offset of a chunk
+ uint32_t get_chunk_offset(int slot) const {
+ uint8_t *p = &m_data[kPayloadOffset + get_full_index_size() * slot];
+ if (m_sizeof_offset == 2)
+ return (*(uint16_t *)p);
+ else {
+ ham_assert(m_sizeof_offset == 4);
+ return (*(uint32_t *)p);
+ }
+ }
+
+ // Returns the size of a chunk
+ uint16_t get_chunk_size(int slot) const {
+ return (m_data[kPayloadOffset + get_full_index_size() * slot
+ + m_sizeof_offset]);
+ }
+
+ // Sets the size of a chunk (does NOT actually resize the chunk!)
+ void set_chunk_size(int slot, uint16_t size) {
+ ham_assert(size <= 255);
+ m_data[kPayloadOffset + get_full_index_size() * slot + m_sizeof_offset]
+ = (uint8_t)size;
+ }
+
+ // Increases the "vacuumize-counter", which is an indicator whether
+ // rearranging the node makes sense
+ void increase_vacuumize_counter(size_t gap_size) {
+ m_vacuumize_counter += gap_size;
+ }
+
+ // Vacuumizes the index, *if it makes sense*. Returns true if the
+ // operation was successful, otherwise false
+ bool maybe_vacuumize(size_t node_count) {
+ if (m_vacuumize_counter > 0 || get_freelist_count() > 0) {
+ vacuumize(node_count);
+ return (true);
+ }
+ return (false);
+ }
+
+ // Returns true if this index has at least one free slot available.
+ // |node_count| is the number of used slots (this is managed by the caller)
+ bool can_insert(size_t node_count) {
+ return (likely(node_count + get_freelist_count() < get_capacity()));
+ }
+
+ // Inserts a slot at the position |slot|. |node_count| is the number of
+ // used slots (this is managed by the caller)
+ void insert(size_t node_count, int slot) {
+ ham_assert(can_insert(node_count) == true);
+
+ size_t slot_size = get_full_index_size();
+ size_t total_count = node_count + get_freelist_count();
+ uint8_t *p = &m_data[kPayloadOffset + slot_size * slot];
+ if (total_count > 0 && slot < (int)total_count) {
+ // create a gap in the index
+ memmove(p + slot_size, p, slot_size * (total_count - slot));
+ }
+
+ // now fill the gap
+ memset(p, 0, slot_size);
+ }
+
+ // Erases a slot at the position |slot|
+ // |node_count| is the number of used slots (this is managed by the caller)
+ void erase(size_t node_count, int slot) {
+ size_t slot_size = get_full_index_size();
+ size_t total_count = node_count + get_freelist_count();
+
+ ham_assert(slot < (int)total_count);
+
+ set_freelist_count(get_freelist_count() + 1);
+
+ size_t chunk_size = get_chunk_size(slot);
+
+ increase_vacuumize_counter(chunk_size);
+
+ // nothing to do if we delete the very last (used) slot; the freelist
+ // counter was already incremented, the used counter is decremented
+ // by the caller
+ if (slot == (int)node_count - 1)
+ return;
+
+ size_t chunk_offset = get_chunk_offset(slot);
+
+ // shift all items to the left
+ uint8_t *p = &m_data[kPayloadOffset + slot_size * slot];
+ memmove(p, p + slot_size, slot_size * (total_count - slot));
+
+ // then copy the deleted chunk to the freelist
+ set_chunk_offset(total_count - 1, chunk_offset);
+ set_chunk_size(total_count - 1, chunk_size);
+ }
+
+ // Adds a chunk to the freelist. Will not do anything if the node
+ // is already full.
+ void add_to_freelist(size_t node_count, uint32_t chunk_offset,
+ uint32_t chunk_size) {
+ size_t total_count = node_count + get_freelist_count();
+ if (likely(total_count < get_capacity())) {
+ set_freelist_count(get_freelist_count() + 1);
+ set_chunk_size(total_count, chunk_size);
+ set_chunk_offset(total_count, chunk_offset);
+ }
+ }
+
+ // Returns true if this page has enough space to store at least |num_bytes|
+ // bytes.
+ bool can_allocate_space(size_t node_count, size_t num_bytes) {
+ // first check if we can append the data; this is the cheapest check,
+ // therefore it comes first
+ if (get_next_offset(node_count) + num_bytes <= get_usable_data_size())
+ return (true);
+
+ // otherwise check the freelist
+ uint32_t total_count = node_count + get_freelist_count();
+ for (uint32_t i = node_count; i < total_count; i++)
+ if (get_chunk_size(i) >= num_bytes)
+ return (true);
+ return (false);
+ }
+
+ // Allocates space for a |slot| and returns the offset of that chunk
+ uint32_t allocate_space(size_t node_count, int slot,
+ size_t num_bytes) {
+ ham_assert(can_allocate_space(node_count, num_bytes));
+
+ size_t next_offset = get_next_offset(node_count);
+
+ // try to allocate space at the end of the node
+ if (next_offset + num_bytes <= get_usable_data_size()) {
+ uint32_t offset = get_chunk_offset(slot);
+ // if this slot's data is at the very end then maybe it can be
+ // resized without actually moving the data
+ if (unlikely(next_offset == offset + get_chunk_size(slot))) {
+ set_next_offset(offset + num_bytes);
+ set_chunk_size(slot, num_bytes);
+ return (offset);
+ }
+ set_next_offset(next_offset + num_bytes);
+ set_chunk_offset(slot, next_offset);
+ set_chunk_size(slot, num_bytes);
+ return (next_offset);
+ }
+
+ size_t slot_size = get_full_index_size();
+
+ // otherwise check the freelist
+ uint32_t total_count = node_count + get_freelist_count();
+ for (uint32_t i = node_count; i < total_count; i++) {
+ uint32_t chunk_size = get_chunk_size(i);
+ uint32_t chunk_offset = get_chunk_offset(i);
+ if (chunk_size >= num_bytes) {
+ // update next_offset?
+ if (unlikely(next_offset == chunk_offset + chunk_size))
+ invalidate_next_offset();
+ else if (unlikely(next_offset == get_chunk_offset(slot)
+ + get_chunk_size(slot)))
+ invalidate_next_offset();
+ // copy the chunk to the new slot
+ set_chunk_size(slot, num_bytes);
+ set_chunk_offset(slot, chunk_offset);
+ // remove from the freelist
+ if (i < total_count - 1) {
+ uint8_t *p = &m_data[kPayloadOffset + slot_size * i];
+ memmove(p, p + slot_size, slot_size * (total_count - i - 1));
+ }
+ set_freelist_count(get_freelist_count() - 1);
+ return (get_chunk_offset(slot));
+ }
+ }
+
+ ham_assert(!"shouldn't be here");
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ // Returns true if |key| cannot be inserted because a split is required.
+ // Unlike implied by the name, this function will try to re-arrange the
+ // node in order for the key to fit in.
+ bool requires_split(size_t node_count, size_t required_size) {
+ return (!can_insert(node_count)
+ || !can_allocate_space(node_count, required_size));
+ }
+
+ // Verifies that there are no overlapping chunks
+ void check_integrity(size_t node_count) const {
+ typedef std::pair<uint32_t, uint32_t> Range;
+ //typedef std::vector<Range> RangeVec;
+ uint32_t total_count = node_count + get_freelist_count();
+
+ ham_assert(node_count > 1
+ ? get_next_offset(node_count) > 0
+ : true);
+
+ if (total_count > get_capacity()) {
+ ham_trace(("integrity violated: total count %u (%u+%u) > capacity %u",
+ total_count, node_count, get_freelist_count(),
+ get_capacity()));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+
+ //RangeVec ranges;
+ //ranges.reserve(total_count);
+ uint32_t next_offset = 0;
+ for (uint32_t i = 0; i < total_count; i++) {
+ Range range = std::make_pair(get_chunk_offset(i), get_chunk_size(i));
+ uint32_t next = range.first + range.second;
+ if (next >= next_offset)
+ next_offset = next;
+ //ranges.push_back(range);
+ }
+
+#if 0
+ std::sort(ranges.begin(), ranges.end());
+
+ if (!ranges.empty()) {
+ for (uint32_t i = 0; i < ranges.size() - 1; i++) {
+ if (ranges[i].first + ranges[i].second > ranges[i + 1].first) {
+ ham_trace(("integrity violated: slot %u/%u overlaps with %lu",
+ ranges[i].first, ranges[i].second,
+ ranges[i + 1].first));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+ }
+#endif
+
+ if (next_offset != get_next_offset(node_count)) {
+ ham_trace(("integrity violated: next offset %d, cached offset %d",
+ next_offset, get_next_offset(node_count)));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ if (next_offset != calc_next_offset(node_count)) {
+ ham_trace(("integrity violated: next offset %d, calculated offset %d",
+ next_offset, calc_next_offset(node_count)));
+ throw Exception(HAM_INTEGRITY_VIOLATED);
+ }
+ }
+
+ // Splits an index and moves all chunks starting from position |pivot|
+ // to the other index.
+ // The other index *must* be empty!
+ void split(UpfrontIndex *other, size_t node_count, int pivot) {
+ other->clear();
+
+ // now copy key by key
+ for (size_t i = pivot; i < node_count; i++) {
+ other->insert(i - pivot, i - pivot);
+ uint32_t size = get_chunk_size(i);
+ uint32_t offset = other->allocate_space(i - pivot, i - pivot, size);
+ memcpy(other->get_chunk_data_by_offset(offset),
+ get_chunk_data_by_offset(get_chunk_offset(i)),
+ size);
+ }
+
+ // this node has lost lots of its data - make sure that it will be
+ // vacuumized as soon as more data is allocated
+ m_vacuumize_counter += node_count;
+ set_freelist_count(0);
+ set_next_offset((uint32_t)-1);
+ }
+
+ // Merges all chunks from the |other| index to this index
+ void merge_from(UpfrontIndex *other, size_t node_count,
+ size_t other_node_count) {
+ vacuumize(node_count);
+
+ for (size_t i = 0; i < other_node_count; i++) {
+ insert(i + node_count, i + node_count);
+ uint32_t size = other->get_chunk_size(i);
+ uint32_t offset = allocate_space(i + node_count, i + node_count, size);
+ memcpy(get_chunk_data_by_offset(offset),
+ other->get_chunk_data_by_offset(other->get_chunk_offset(i)),
+ size);
+ }
+
+ other->clear();
+ }
+
+ // Returns a pointer to the actual data of a chunk
+ uint8_t *get_chunk_data_by_offset(uint32_t offset) {
+ return (&m_data[kPayloadOffset
+ + get_capacity() * get_full_index_size()
+ + offset]);
+ }
+
+ // Returns a pointer to the actual data of a chunk
+ uint8_t *get_chunk_data_by_offset(uint32_t offset) const {
+ return (&m_data[kPayloadOffset
+ + get_capacity() * get_full_index_size()
+ + offset]);
+ }
+
+ // Reduces the capacity of the UpfrontIndex, if required
+ void reduce_capacity(size_t node_count) {
+ size_t old_capacity = get_capacity();
+ if (node_count > 0 && old_capacity > node_count + 4) {
+ size_t new_capacity = old_capacity - (old_capacity - node_count) / 2;
+ if (new_capacity != old_capacity)
+ change_range_size(node_count, m_data, m_range_size, new_capacity);
+ }
+ }
+
+ // Re-arranges the node: moves all keys sequentially to the beginning
+ // of the key space, removes the whole freelist.
+ //
+ // This call is extremely expensive! Try to avoid it as much as possible.
+ void vacuumize(size_t node_count) {
+ if (m_vacuumize_counter < 10) {
+ if (get_freelist_count() > 0) {
+ set_freelist_count(0);
+ invalidate_next_offset();
+ }
+ return;
+ }
+
+ // get rid of the freelist - this node is now completely rewritten,
+ // and the freelist would just complicate things
+ set_freelist_count(0);
+
+ // make a copy of all indices (excluding the freelist)
+ bool requires_sort = false;
+ SortHelper *s = (SortHelper *)::alloca(node_count * sizeof(SortHelper));
+ for (size_t i = 0; i < node_count; i++) {
+ s[i].slot = i;
+ s[i].offset = get_chunk_offset(i);
+ if (i > 0 && s[i].offset < s[i - 1].offset)
+ requires_sort = true;
+ }
+
+ // sort them by offset; this is a very expensive call. only sort if
+ // it's absolutely necessary!
+ if (requires_sort)
+ std::sort(&s[0], &s[node_count], sort_by_offset);
+
+ // shift all keys to the left, get rid of all gaps at the front of the
+ // key data or between the keys
+ uint32_t next_offset = 0;
+ uint32_t start = kPayloadOffset + get_capacity() * get_full_index_size();
+ for (size_t i = 0; i < node_count; i++) {
+ uint32_t offset = s[i].offset;
+ int slot = s[i].slot;
+ uint32_t size = get_chunk_size(slot);
+ if (offset != next_offset) {
+ // shift key to the left
+ memmove(&m_data[start + next_offset],
+ get_chunk_data_by_offset(offset), size);
+ // store the new offset
+ set_chunk_offset(slot, next_offset);
+ }
+ next_offset += size;
+ }
+
+ set_next_offset(next_offset);
+ m_vacuumize_counter = 0;
+ }
+
+ // Invalidates the cached "next offset". In some cases it's necessary
+ // that the caller forces a re-evaluation of the next offset. Although
+ // i *think* that this method could become private, but the effort
+ // is not worth the gain.
+ void invalidate_next_offset() {
+ set_next_offset((uint32_t)-1);
+ }
+
+ // Same as above, but only if the next_offset equals |new_offset|
+ void maybe_invalidate_next_offset(size_t new_offset) {
+ if (get_next_offset(0) == new_offset)
+ invalidate_next_offset();
+ }
+
+ // Returns the capacity
+ size_t get_capacity() const {
+ return (*(uint32_t *)(m_data + 8));
+ }
+
+ // Returns the offset of the unused space at the end of the page
+ uint32_t get_next_offset(size_t node_count) {
+ uint32_t ret = *(uint32_t *)(m_data + 4);
+ if (unlikely(ret == (uint32_t)-1 && node_count > 0)) {
+ ret = calc_next_offset(node_count);
+ set_next_offset(ret);
+ }
+ return (ret);
+ }
+
+ private:
+ friend class UpfrontIndexFixture;
+
+ // Resets the page
+ void clear() {
+ set_freelist_count(0);
+ set_next_offset(0);
+ m_vacuumize_counter = 0;
+ }
+
+ // Returns the offset of the unused space at the end of the page
+ // (const version)
+ uint32_t get_next_offset(size_t node_count) const {
+ uint32_t ret = *(uint32_t *)(m_data + 4);
+ if (unlikely(ret == (uint32_t)-1))
+ return (calc_next_offset(node_count));
+ return (ret);
+ }
+
+ // Returns the size (in bytes) where payload data can be stored
+ size_t get_usable_data_size() const {
+ return (m_range_size - kPayloadOffset
+ - get_capacity() * get_full_index_size());
+ }
+
+ // Sets the chunk offset of a slot
+ void set_chunk_offset(int slot, uint32_t offset) {
+ uint8_t *p = &m_data[kPayloadOffset + get_full_index_size() * slot];
+ if (m_sizeof_offset == 2)
+ *(uint16_t *)p = (uint16_t)offset;
+ else
+ *(uint32_t *)p = offset;
+ }
+
+ // Returns the number of freelist entries
+ size_t get_freelist_count() const {
+ return (*(uint32_t *)m_data);
+ }
+
+ // Sets the number of freelist entries
+ void set_freelist_count(size_t freelist_count) {
+ ham_assert(freelist_count <= get_capacity());
+ *(uint32_t *)m_data = freelist_count;
+ }
+
+ // Calculates and returns the next offset; does not store it
+ uint32_t calc_next_offset(size_t node_count) const {
+ uint32_t total_count = node_count + get_freelist_count();
+ uint32_t next_offset = 0;
+ for (uint32_t i = 0; i < total_count; i++) {
+ uint32_t next = get_chunk_offset(i) + get_chunk_size(i);
+ if (next >= next_offset)
+ next_offset = next;
+ }
+ return (next_offset);
+ }
+
+ // Sets the offset of the unused space at the end of the page
+ void set_next_offset(uint32_t next_offset) {
+ *(uint32_t *)(m_data + 4) = next_offset;
+ }
+
+ // Sets the capacity (number of slots)
+ void set_capacity(size_t capacity) {
+ ham_assert(capacity > 0);
+ *(uint32_t *)(m_data + 8) = (uint32_t)capacity;
+ }
+
+ // The physical data in the node
+ uint8_t *m_data;
+
+ // The size of the offset; either 16 or 32 bits, depending on page size
+ size_t m_sizeof_offset;
+
+ // The size of the range, in bytes
+ size_t m_range_size;
+
+ // A counter to indicate when rearranging the data makes sense
+ int m_vacuumize_counter;
+};
+
+} // namespace DefLayout
+
+} // namespace hamsterdb
+
+#endif /* HAM_BTREE_UPFRONT_INDEX_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3cache/cache.h b/plugins/Dbx_kv/src/hamsterdb/src/3cache/cache.h
new file mode 100644
index 0000000000..a24daf3828
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3cache/cache.h
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The Cache Manager
+ *
+ * Stores pages in a non-intrusive hash table (each Page instance keeps
+ * next/previous pointers for the overflow bucket). Can efficiently purge
+ * unused pages, because all pages are also stored in a (non-intrusive)
+ * linked list, and whenever a page is accessed it is removed and re-inserted
+ * at the head. The tail therefore points to the page which was not used
+ * in a long time, and is the primary candidate for purging.
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_CACHE_H
+#define HAM_CACHE_H
+
+#include "0root/root.h"
+
+#include <vector>
+
+#include "ham/hamsterdb_int.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "2page/page.h"
+#include "2page/page_collection.h"
+#include "2config/env_config.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Cache
+{
+ enum {
+ // The number of buckets should be a prime number or similar, as it
+ // is used in a MODULO hash scheme
+ kBucketSize = 10317,
+ };
+
+ template<typename Purger>
+ struct PurgeIfSelector
+ {
+ PurgeIfSelector(Cache *cache, Purger &purger)
+ : m_cache(cache), m_purger(purger) {
+ }
+
+ bool operator()(Page *page) {
+ if (m_purger(page)) {
+ m_cache->del(page);
+ delete page;
+ }
+ // don't remove page from list; it was already removed above
+ return (false);
+ }
+
+ Cache *m_cache;
+ Purger &m_purger;
+ };
+
+ public:
+ // The default constructor
+ Cache(const EnvironmentConfiguration &config)
+ : m_capacity_bytes(config.flags & HAM_CACHE_UNLIMITED
+ ? 0xffffffffffffffffull
+ : config.cache_size_bytes),
+ m_page_size_bytes(config.page_size_bytes),
+ m_alloc_elements(0), m_totallist(Page::kListCache),
+ m_buckets(kBucketSize, PageCollection(Page::kListBucket)),
+ m_cache_hits(0), m_cache_misses(0) {
+ ham_assert(m_capacity_bytes > 0);
+ }
+
+ // Fills in the current metrics
+ void fill_metrics(ham_env_metrics_t *metrics) const {
+ metrics->cache_hits = m_cache_hits;
+ metrics->cache_misses = m_cache_misses;
+ }
+
+ // Retrieves a page from the cache, also removes the page from the cache
+ // and re-inserts it at the front. Returns null if the page was not cached.
+ Page *get(uint64_t address) {
+ size_t hash = calc_hash(address);
+
+ Page *page = m_buckets[hash].get(address);;
+ if (!page) {
+ m_cache_misses++;
+ return (0);
+ }
+
+ // Now re-insert the page at the head of the "totallist", and
+ // thus move far away from the tail. The pages at the tail are highest
+ // candidates to be deleted when the cache is purged.
+ m_totallist.del(page);
+ m_totallist.put(page);
+ m_cache_hits++;
+ return (page);
+ }
+
+ // Stores a page in the cache
+ void put(Page *page) {
+ size_t hash = calc_hash(page->get_address());
+ ham_assert(page->get_data());
+
+ /* First remove the page from the cache, if it's already cached
+ *
+ * Then re-insert the page at the head of the list. The tail will
+ * point to the least recently used page.
+ */
+ m_totallist.del(page);
+ m_totallist.put(page);
+
+ if (page->is_allocated())
+ m_alloc_elements++;
+ m_buckets[hash].put(page);
+ }
+
+ // Removes a page from the cache
+ void del(Page *page) {
+ ham_assert(page->get_address() != 0);
+ size_t hash = calc_hash(page->get_address());
+ /* remove the page from the cache buckets */
+ m_buckets[hash].del(page);
+
+ /* remove it from the list of all cached pages */
+ if (m_totallist.del(page) && page->is_allocated())
+ m_alloc_elements--;
+ }
+
+ // Purges the cache. Implements a LRU eviction algorithm. Dirty pages are
+ // forwarded to the |processor()| for flushing.
+ //
+ // Tries to purge at least 20 pages. In benchmarks this has proven to
+ // be a good limit.
+ template<typename Processor>
+ void purge(Processor &processor, Page *ignore_page) {
+ int limit = int(current_elements()
+ - (m_capacity_bytes / m_page_size_bytes));
+
+ Page *page = m_totallist.tail();
+ for (int i = 0; i < limit && page != 0; i++) {
+ Page *next = page->get_previous(Page::kListCache);
+
+ // dirty pages are flushed by the worker thread
+ if (page->is_dirty()) {
+ processor(page);
+ page = next;
+ continue;
+ }
+ // non-dirty pages are deleted if possible
+ if (!page->is_dirty()
+ && page->cursor_list() == 0
+ && page != ignore_page
+ && page->mutex().try_lock()) {
+ del(page);
+ page->mutex().unlock();
+ delete page;
+ }
+
+ page = next;
+ }
+ }
+
+ // Visits all pages in the "totallist". If |cb| returns true then the
+ // page is removed and deleted. This is used by the Environment
+ // to flush (and delete) pages.
+ template<typename Purger>
+ void purge_if(Purger &purger) {
+ PurgeIfSelector<Purger> selector(this, purger);
+ m_totallist.extract(selector);
+ }
+
+ // Returns true if the capacity limits are exceeded
+ bool is_cache_full() const {
+ return (current_elements() * m_page_size_bytes
+ > m_capacity_bytes);
+ }
+
+ // Returns the capacity (in bytes)
+ uint64_t capacity() const {
+ return (m_capacity_bytes);
+ }
+
+ // Returns the number of currently cached elements
+ size_t current_elements() const {
+ return (m_totallist.size());
+ }
+
+ // Returns the number of currently cached elements (excluding those that
+ // are mmapped)
+ size_t allocated_elements() const {
+ return (m_alloc_elements);
+ }
+
+ private:
+ // Calculates the hash of a page address
+ size_t calc_hash(uint64_t value) const {
+ return ((size_t)(value % Cache::kBucketSize));
+ }
+
+ // the capacity (in bytes)
+ uint64_t m_capacity_bytes;
+
+ // the current page size (in bytes)
+ uint64_t m_page_size_bytes;
+
+ // the current number of cached elements that were allocated (and not
+ // mapped)
+ size_t m_alloc_elements;
+
+ // linked list of ALL cached pages
+ PageCollection m_totallist;
+
+ // The hash table buckets - each is a linked list of Page pointers
+ std::vector<PageCollection> m_buckets;
+
+ // counts the cache hits
+ uint64_t m_cache_hits;
+
+ // counts the cache misses
+ uint64_t m_cache_misses;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_CACHE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.cc b/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.cc
new file mode 100644
index 0000000000..2e5ace06f5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.cc
@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Always verify that a file of level N does not include headers > N!
+#include "1errorinducer/errorinducer.h"
+#include "2device/device.h"
+#include "2page/page.h"
+#include "3changeset/changeset.h"
+#include "3journal/journal.h"
+#include "3page_manager/page_manager.h"
+#include "4db/db.h"
+#include "4env/env_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/* a unittest hook for Changeset::flush() */
+void (*g_CHANGESET_POST_LOG_HOOK)(void);
+
+struct PageCollectionVisitor
+{
+ PageCollectionVisitor(Page **pages)
+ : num_pages(0), pages(pages) {
+ }
+
+ void prepare(size_t size) {
+ }
+
+ bool operator()(Page *page) {
+ if (page->is_dirty() == true) {
+ pages[num_pages] = page;
+ ++num_pages;
+ }
+ // |page| is now removed from the Changeset
+ page->mutex().unlock();
+ return (true);
+ }
+
+ int num_pages;
+ Page **pages;
+};
+
+void
+Changeset::flush(uint64_t lsn)
+{
+ // now flush all modified pages to disk
+ if (m_collection.is_empty())
+ return;
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+
+ // Fetch the pages, ignoring all pages that are not dirty
+ Page **pages = (Page **)::alloca(sizeof(Page *) * m_collection.size());
+ PageCollectionVisitor visitor(pages);
+ m_collection.extract(visitor);
+
+ // TODO sort by address (really?)
+
+ if (visitor.num_pages == 0)
+ return;
+
+ // If only one page is modified then the modification is atomic. The page
+ // is written to the btree (no log required).
+ //
+ // If more than one page is modified then the modification is no longer
+ // atomic. All dirty pages are written to the log.
+ if (visitor.num_pages > 1) {
+ m_env->journal()->append_changeset((const Page **)visitor.pages,
+ visitor.num_pages, lsn);
+ }
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+
+ /* execute a post-log hook; this hook is set by the unittest framework
+ * and can be used to make a backup copy of the logfile */
+ if (g_CHANGESET_POST_LOG_HOOK)
+ g_CHANGESET_POST_LOG_HOOK();
+
+ /* now write all the pages to the file; if any of these writes fail,
+ * we can still recover from the log */
+ for (int i = 0; i < visitor.num_pages; i++) {
+ Page *p = visitor.pages[i];
+ if (p->is_without_header() == false)
+ p->set_lsn(lsn);
+ p->flush();
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+ }
+
+ /* flush the file handle (if required) */
+ if (m_env->get_flags() & HAM_ENABLE_FSYNC)
+ m_env->device()->flush();
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.h b/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.h
new file mode 100644
index 0000000000..a21c6f45f9
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3changeset/changeset.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A changeset collects all pages that are modified during a single
+ * operation.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_CHANGESET_H
+#define HAM_CHANGESET_H
+
+#include "0root/root.h"
+
+#include <stdlib.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2page/page.h"
+#include "2page/page_collection.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class LocalEnvironment;
+
+class Changeset
+{
+ struct UnlockPage
+ {
+ bool operator()(Page *page) {
+ #ifdef HAM_ENABLE_HELGRIND
+ page->mutex().try_lock();
+ #endif
+ page->mutex().unlock();
+ return (true);
+ }
+ };
+
+ public:
+ Changeset(LocalEnvironment *env)
+ : m_env(env), m_collection(Page::kListChangeset) {
+ }
+
+ /*
+ * Returns a page from the changeset, or NULL if the page is not part
+ * of the changeset
+ */
+ Page *get(uint64_t address) {
+ return (m_collection.get(address));
+ }
+
+ /* Append a new page to the changeset. The page is locked. */
+ void put(Page *page) {
+ if (!has(page)) {
+ page->mutex().lock();
+ }
+ m_collection.put(page);
+ }
+
+ /* Removes a page from the changeset. The page is unlocked. */
+ void del(Page *page) {
+ page->mutex().unlock();
+ m_collection.del(page);
+ }
+
+ /* Check if the page is already part of the changeset */
+ bool has(Page *page) const {
+ return (m_collection.has(page));
+ }
+
+ /* Returns true if the changeset is empty */
+ bool is_empty() const {
+ return (m_collection.is_empty());
+ }
+
+ /* Removes all pages from the changeset. The pages are unlocked. */
+ void clear() {
+ UnlockPage unlocker;
+ m_collection.for_each(unlocker);
+ m_collection.clear();
+ }
+
+ /*
+ * Flush all pages in the changeset - first write them to the log, then
+ * write them to the disk.
+ * On success: will clear the changeset and the journal
+ */
+ void flush(uint64_t lsn);
+
+ private:
+ /* The Environment */
+ LocalEnvironment *m_env;
+
+ /* The pages which were added to this Changeset */
+ PageCollection m_collection;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_CHANGESET_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.cc b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.cc
new file mode 100644
index 0000000000..50e749240f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.cc
@@ -0,0 +1,862 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+#ifndef HAM_OS_WIN32
+# include <libgen.h>
+#endif
+
+#include "1base/error.h"
+#include "1errorinducer/errorinducer.h"
+#include "1os/os.h"
+#include "2device/device.h"
+#include "3journal/journal.h"
+#include "3page_manager/page_manager.h"
+#include "4db/db.h"
+#include "4txn/txn_local.h"
+#include "4env/env_local.h"
+#include "4context/context.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+Journal::Journal(LocalEnvironment *env)
+ : m_state(env)
+{
+}
+
+void
+Journal::create()
+{
+ // create the two files
+ for (int i = 0; i < 2; i++) {
+ std::string path = get_path(i);
+ m_state.files[i].create(path.c_str(), 0644);
+ }
+}
+
+void
+Journal::open()
+{
+ // open the two files
+ try {
+ std::string path = get_path(0);
+ m_state.files[0].open(path.c_str(), false);
+ path = get_path(1);
+ m_state.files[1].open(path.c_str(), 0);
+ }
+ catch (Exception &ex) {
+ m_state.files[1].close();
+ m_state.files[0].close();
+ throw ex;
+ }
+}
+
+int
+Journal::switch_files_maybe()
+{
+ int other = m_state.current_fd ? 0 : 1;
+
+ // determine the journal file which is used for this transaction
+ // if the "current" file is not yet full, continue to write to this file
+ if (m_state.open_txn[m_state.current_fd]
+ + m_state.closed_txn[m_state.current_fd]
+ < m_state.threshold)
+ return (m_state.current_fd);
+
+ // If the other file does no longer have open Transactions then
+ // delete the other file and use the other file as the current file
+ if (m_state.open_txn[other] == 0) {
+ clear_file(other);
+ m_state.current_fd = other;
+ // fall through
+ }
+
+ // Otherwise just continue using the current file
+ return (m_state.current_fd);
+}
+
+void
+Journal::append_txn_begin(LocalTransaction *txn, const char *name, uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ ham_assert((txn->get_flags() & HAM_TXN_TEMPORARY) == 0);
+
+ PJournalEntry entry;
+ entry.txn_id = txn->get_id();
+ entry.type = kEntryTypeTxnBegin;
+ entry.lsn = lsn;
+ if (name)
+ entry.followup_size = strlen(name) + 1;
+
+ txn->set_log_desc(switch_files_maybe());
+
+ int cur = txn->get_log_desc();
+
+ if (txn->get_name().size())
+ append_entry(cur, (uint8_t *)&entry, (uint32_t)sizeof(entry),
+ (uint8_t *)txn->get_name().c_str(),
+ (uint32_t)txn->get_name().size() + 1);
+ else
+ append_entry(cur, (uint8_t *)&entry, (uint32_t)sizeof(entry));
+ maybe_flush_buffer(cur);
+
+ m_state.open_txn[cur]++;
+
+ // store the fp-index in the journal structure; it's needed for
+ // journal_append_checkpoint() to quickly find out which file is
+ // the newest
+ m_state.current_fd = cur;
+}
+
+void
+Journal::append_txn_abort(LocalTransaction *txn, uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ ham_assert((txn->get_flags() & HAM_TXN_TEMPORARY) == 0);
+
+ int idx;
+ PJournalEntry entry;
+ entry.lsn = lsn;
+ entry.txn_id = txn->get_id();
+ entry.type = kEntryTypeTxnAbort;
+
+ // update the transaction counters of this logfile
+ idx = txn->get_log_desc();
+ m_state.open_txn[idx]--;
+ m_state.closed_txn[idx]++;
+
+ append_entry(idx, (uint8_t *)&entry, sizeof(entry));
+ maybe_flush_buffer(idx);
+ // no need for fsync - incomplete transactions will be aborted anyway
+}
+
+void
+Journal::append_txn_commit(LocalTransaction *txn, uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ ham_assert((txn->get_flags() & HAM_TXN_TEMPORARY) == 0);
+
+ PJournalEntry entry;
+ entry.lsn = lsn;
+ entry.txn_id = txn->get_id();
+ entry.type = kEntryTypeTxnCommit;
+
+ // do not yet update the transaction counters of this logfile; just
+ // because the txn was committed does not mean that it will be flushed
+ // immediately. The counters will be modified in transaction_flushed().
+ int idx = txn->get_log_desc();
+
+ append_entry(idx, (uint8_t *)&entry, sizeof(entry));
+
+ // and flush the file
+ flush_buffer(idx, m_state.env->get_flags() & HAM_ENABLE_FSYNC);
+}
+
+void
+Journal::append_insert(Database *db, LocalTransaction *txn,
+ ham_key_t *key, ham_record_t *record, uint32_t flags,
+ uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ PJournalEntry entry;
+ PJournalEntryInsert insert;
+ uint32_t size = sizeof(PJournalEntryInsert)
+ + key->size
+ + (flags & HAM_PARTIAL
+ ? record->partial_size
+ : record->size)
+ - 1;
+
+ entry.lsn = lsn;
+ entry.dbname = db->name();
+ entry.type = kEntryTypeInsert;
+ entry.followup_size = size;
+
+ int idx;
+ if (txn->get_flags() & HAM_TXN_TEMPORARY) {
+ entry.txn_id = 0;
+ idx = switch_files_maybe();
+ m_state.closed_txn[idx]++;
+ }
+ else {
+ entry.txn_id = txn->get_id();
+ idx = txn->get_log_desc();
+ }
+
+ insert.key_size = key->size;
+ insert.record_size = record->size;
+ insert.record_partial_size = record->partial_size;
+ insert.record_partial_offset = record->partial_offset;
+ insert.insert_flags = flags;
+
+ // append the entry to the logfile
+ append_entry(idx, (uint8_t *)&entry, sizeof(entry),
+ (uint8_t *)&insert, sizeof(PJournalEntryInsert) - 1,
+ (uint8_t *)key->data, key->size,
+ (uint8_t *)record->data, (flags & HAM_PARTIAL
+ ? record->partial_size
+ : record->size));
+ maybe_flush_buffer(idx);
+}
+
+void
+Journal::append_erase(Database *db, LocalTransaction *txn, ham_key_t *key,
+ int duplicate_index, uint32_t flags, uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ PJournalEntry entry;
+ PJournalEntryErase erase;
+ uint32_t size = sizeof(PJournalEntryErase) + key->size - 1;
+
+ entry.lsn = lsn;
+ entry.dbname = db->name();
+ entry.type = kEntryTypeErase;
+ entry.followup_size = size;
+ erase.key_size = key->size;
+ erase.erase_flags = flags;
+ erase.duplicate = duplicate_index;
+
+ int idx;
+ if (txn->get_flags() & HAM_TXN_TEMPORARY) {
+ entry.txn_id = 0;
+ idx = switch_files_maybe();
+ m_state.closed_txn[idx]++;
+ }
+ else {
+ entry.txn_id = txn->get_id();
+ idx = txn->get_log_desc();
+ }
+
+ // append the entry to the logfile
+ append_entry(idx, (uint8_t *)&entry, sizeof(entry),
+ (uint8_t *)&erase, sizeof(PJournalEntryErase) - 1,
+ (uint8_t *)key->data, key->size);
+ maybe_flush_buffer(idx);
+}
+
+void
+Journal::append_changeset(const Page **pages, int num_pages, uint64_t lsn)
+{
+ if (m_state.disable_logging)
+ return;
+
+ PJournalEntry entry;
+ PJournalEntryChangeset changeset;
+
+ entry.lsn = lsn;
+ entry.dbname = 0;
+ entry.txn_id = 0;
+ entry.type = kEntryTypeChangeset;
+ // followup_size is incomplete - the actual page sizes are added later
+ entry.followup_size = sizeof(PJournalEntryChangeset);
+ changeset.num_pages = num_pages;
+
+ // we need the current position in the file buffer. if compression is enabled
+ // then we do not know the actual followup-size of this entry. it will be
+ // patched in later.
+ uint32_t entry_position = m_state.buffer[m_state.current_fd].get_size();
+
+ // write the data to the file
+ append_entry(m_state.current_fd, (uint8_t *)&entry, sizeof(entry),
+ (uint8_t *)&changeset, sizeof(PJournalEntryChangeset));
+
+ size_t page_size = m_state.env->config().page_size_bytes;
+ for (int i = 0; i < num_pages; i++) {
+ entry.followup_size += append_changeset_page(pages[i], page_size);
+ }
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+
+ // and patch in the followup-size
+ m_state.buffer[m_state.current_fd].overwrite(entry_position,
+ (uint8_t *)&entry, sizeof(entry));
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+
+ // and flush the file
+ flush_buffer(m_state.current_fd, m_state.env->get_flags() & HAM_ENABLE_FSYNC);
+
+ HAM_INDUCE_ERROR(ErrorInducer::kChangesetFlush);
+
+ // if recovery is enabled (w/o transactions) then simulate a "commit" to
+ // make sure that the log files are switched properly
+ m_state.closed_txn[m_state.current_fd]++;
+ (void)switch_files_maybe();
+}
+
+uint32_t
+Journal::append_changeset_page(const Page *page, uint32_t page_size)
+{
+ PJournalEntryPageHeader header(page->get_address());
+
+ append_entry(m_state.current_fd, (uint8_t *)&header, sizeof(header),
+ page->get_raw_payload(), page_size);
+ return (page_size + sizeof(header));
+}
+
+void
+Journal::transaction_flushed(LocalTransaction *txn)
+{
+ ham_assert((txn->get_flags() & HAM_TXN_TEMPORARY) == 0);
+ if (m_state.disable_logging) // ignore this call during recovery
+ return;
+
+ int idx = txn->get_log_desc();
+ ham_assert(m_state.open_txn[idx] > 0);
+ m_state.open_txn[idx]--;
+ m_state.closed_txn[idx]++;
+}
+
+void
+Journal::get_entry(Iterator *iter, PJournalEntry *entry, ByteArray *auxbuffer)
+{
+ uint64_t filesize;
+
+ auxbuffer->clear();
+
+ // if iter->offset is 0, then the iterator was created from scratch
+ // and we start reading from the first (oldest) entry.
+ //
+ // The oldest of the two logfiles is always the "other" one (the one
+ // NOT in current_fd).
+ if (iter->offset == 0) {
+ iter->fdstart = iter->fdidx =
+ m_state.current_fd == 0
+ ? 1
+ : 0;
+ }
+
+ // get the size of the journal file
+ filesize = m_state.files[iter->fdidx].get_file_size();
+
+ // reached EOF? then either skip to the next file or we're done
+ if (filesize == iter->offset) {
+ if (iter->fdstart == iter->fdidx) {
+ iter->fdidx = iter->fdidx == 1 ? 0 : 1;
+ iter->offset = 0;
+ filesize = m_state.files[iter->fdidx].get_file_size();
+ }
+ else {
+ entry->lsn = 0;
+ return;
+ }
+ }
+
+ // second file is also empty? then return
+ if (filesize == iter->offset) {
+ entry->lsn = 0;
+ return;
+ }
+
+ // now try to read the next entry
+ try {
+ m_state.files[iter->fdidx].pread(iter->offset, entry, sizeof(*entry));
+
+ iter->offset += sizeof(*entry);
+
+ // read auxiliary data if it's available
+ if (entry->followup_size) {
+ auxbuffer->resize((uint32_t)entry->followup_size);
+
+ m_state.files[iter->fdidx].pread(iter->offset, auxbuffer->get_ptr(),
+ (size_t)entry->followup_size);
+ iter->offset += entry->followup_size;
+ }
+ }
+ catch (Exception &) {
+ ham_trace(("failed to read journal entry, aborting recovery"));
+ entry->lsn = 0; // this triggers the end of recovery
+ }
+}
+
+void
+Journal::close(bool noclear)
+{
+ int i;
+
+ // the noclear flag is set during testing, for checking whether the files
+ // contain the correct data. Flush the buffers, otherwise the tests will
+ // fail because data is missing
+ if (noclear) {
+ flush_buffer(0);
+ flush_buffer(1);
+ }
+
+ if (!noclear)
+ clear();
+
+ for (i = 0; i < 2; i++) {
+ m_state.files[i].close();
+ m_state.buffer[i].clear();
+ }
+}
+
+Database *
+Journal::get_db(uint16_t dbname)
+{
+ // first check if the Database is already open
+ JournalState::DatabaseMap::iterator it = m_state.database_map.find(dbname);
+ if (it != m_state.database_map.end())
+ return (it->second);
+
+ // not found - open it
+ Database *db = 0;
+ DatabaseConfiguration config;
+ config.db_name = dbname;
+ ham_status_t st = m_state.env->open_db(&db, config, 0);
+ if (st)
+ throw Exception(st);
+ m_state.database_map[dbname] = db;
+ return (db);
+}
+
+Transaction *
+Journal::get_txn(LocalTransactionManager *txn_manager, uint64_t txn_id)
+{
+ Transaction *txn = txn_manager->get_oldest_txn();
+ while (txn) {
+ if (txn->get_id() == txn_id)
+ return (txn);
+ txn = txn->get_next();
+ }
+
+ return (0);
+}
+
+void
+Journal::close_all_databases()
+{
+ ham_status_t st = 0;
+
+ JournalState::DatabaseMap::iterator it = m_state.database_map.begin();
+ while (it != m_state.database_map.end()) {
+ JournalState::DatabaseMap::iterator it2 = it; it++;
+ st = ham_db_close((ham_db_t *)it2->second, HAM_DONT_LOCK);
+ if (st) {
+ ham_log(("ham_db_close() failed w/ error %d (%s)", st, ham_strerror(st)));
+ throw Exception(st);
+ }
+ }
+ m_state.database_map.clear();
+}
+
+void
+Journal::abort_uncommitted_txns(LocalTransactionManager *txn_manager)
+{
+ Transaction *txn = txn_manager->get_oldest_txn();
+
+ while (txn) {
+ if (!txn->is_committed())
+ txn->abort();
+ txn = txn->get_next();
+ }
+}
+
+void
+Journal::recover(LocalTransactionManager *txn_manager)
+{
+ Context context(m_state.env, 0, 0);
+
+ // first re-apply the last changeset
+ uint64_t start_lsn = recover_changeset();
+
+ // load the state of the PageManager; the PageManager state is loaded AFTER
+ // physical recovery because its page might have been restored in
+ // recover_changeset()
+ uint64_t page_manager_blobid = m_state.env->header()->get_page_manager_blobid();
+ if (page_manager_blobid != 0) {
+ m_state.env->page_manager()->initialize(page_manager_blobid);
+ }
+
+ // then start the normal recovery
+ if (m_state.env->get_flags() & HAM_ENABLE_TRANSACTIONS)
+ recover_journal(&context, txn_manager, start_lsn);
+}
+
+uint64_t
+Journal::scan_for_newest_changeset(File *file, uint64_t *position)
+{
+ Iterator it;
+ PJournalEntry entry;
+ ByteArray buffer;
+ uint64_t result = 0;
+
+ // get the next entry
+ try {
+ uint64_t filesize = file->get_file_size();
+
+ while (it.offset < filesize) {
+ file->pread(it.offset, &entry, sizeof(entry));
+
+ if (entry.lsn == 0)
+ break;
+
+ if (entry.type == kEntryTypeChangeset) {
+ *position = it.offset;
+ result = entry.lsn;
+ }
+
+ // increment the offset
+ it.offset += sizeof(entry);
+ if (entry.followup_size)
+ it.offset += entry.followup_size;
+ }
+ }
+ catch (Exception &ex) {
+ ham_log(("exception (error %d) while reading journal", ex.code));
+ }
+
+ return (result);
+}
+
+uint64_t
+Journal::recover_changeset()
+{
+ // scan through both files, look for the file with the newest changeset
+ uint64_t position0, position1, position;
+ uint64_t lsn1 = scan_for_newest_changeset(&m_state.files[0], &position0);
+ uint64_t lsn2 = scan_for_newest_changeset(&m_state.files[1], &position1);
+
+ // both files are empty or do not contain a changeset?
+ if (lsn1 == 0 && lsn2 == 0)
+ return (0);
+
+ // re-apply the newest changeset
+ m_state.current_fd = lsn1 > lsn2 ? 0 : 1;
+ position = lsn1 > lsn2 ? position0 : position1;
+
+ PJournalEntry entry;
+ uint64_t start_lsn = 0;
+
+ try {
+ m_state.files[m_state.current_fd].pread(position, &entry, sizeof(entry));
+ position += sizeof(entry);
+ ham_assert(entry.type == kEntryTypeChangeset);
+
+ // Read the Changeset header
+ PJournalEntryChangeset changeset;
+ m_state.files[m_state.current_fd].pread(position, &changeset,
+ sizeof(changeset));
+ position += sizeof(changeset);
+
+ uint32_t page_size = m_state.env->config().page_size_bytes;
+ ByteArray arena(page_size);
+
+ uint64_t file_size = m_state.env->device()->file_size();
+
+ // for each page in this changeset...
+ for (uint32_t i = 0; i < changeset.num_pages; i++) {
+ PJournalEntryPageHeader page_header;
+ m_state.files[m_state.current_fd].pread(position, &page_header,
+ sizeof(page_header));
+ position += sizeof(page_header);
+ m_state.files[m_state.current_fd].pread(position, arena.get_ptr(),
+ page_size);
+ position += page_size;
+
+ Page *page;
+
+ // now write the page to disk
+ if (page_header.address == file_size) {
+ file_size += page_size;
+
+ page = new Page(m_state.env->device());
+ page->alloc(0);
+ }
+ else if (page_header.address > file_size) {
+ file_size = (size_t)page_header.address + page_size;
+ m_state.env->device()->truncate(file_size);
+
+ page = new Page(m_state.env->device());
+ page->fetch(page_header.address);
+ }
+ else {
+ page = new Page(m_state.env->device());
+ page->fetch(page_header.address);
+ }
+
+ // only overwrite the page data if the page's last modification
+ // is OLDER than the changeset!
+ bool skip = false;
+ if (page->is_without_header() == false) {
+ if (page->get_lsn() > entry.lsn) {
+ skip = true;
+ start_lsn = page->get_lsn();
+ }
+ }
+
+ if (!skip) {
+ // overwrite the page data
+ memcpy(page->get_data(), arena.get_ptr(), page_size);
+
+ ham_assert(page->get_address() == page_header.address);
+
+ // flush the modified page to disk
+ page->set_dirty(true);
+ page->flush();
+ }
+
+ delete page;
+ }
+ }
+ catch (Exception &) {
+ ham_trace(("Exception when applying changeset; skipping changeset"));
+ // fall through
+ }
+
+ return (std::max(start_lsn, entry.lsn));
+}
+
+void
+Journal::recover_journal(Context *context,
+ LocalTransactionManager *txn_manager, uint64_t start_lsn)
+{
+ ham_status_t st = 0;
+ Iterator it;
+ ByteArray buffer;
+
+ /* recovering the journal is rather simple - we iterate over the
+ * files and re-apply EVERY operation (incl. txn_begin and txn_abort),
+ * that was not yet flushed with a Changeset.
+ *
+ * Basically we iterate over both log files and skip everything with
+ * a sequence number (lsn) smaller the one of the last Changeset.
+ *
+ * When done then auto-abort all transactions that were not yet
+ * committed.
+ */
+
+ // make sure that there are no pending transactions - start with
+ // a clean state!
+ ham_assert(txn_manager->get_oldest_txn() == 0);
+ ham_assert(m_state.env->get_flags() & HAM_ENABLE_TRANSACTIONS);
+ ham_assert(m_state.env->get_flags() & HAM_ENABLE_RECOVERY);
+
+ // do not append to the journal during recovery
+ m_state.disable_logging = true;
+
+ do {
+ PJournalEntry entry;
+
+ // get the next entry
+ get_entry(&it, &entry, &buffer);
+
+ // reached end of logfile?
+ if (!entry.lsn)
+ break;
+
+ // re-apply this operation
+ switch (entry.type) {
+ case kEntryTypeTxnBegin: {
+ Transaction *txn = 0;
+ st = ham_txn_begin((ham_txn_t **)&txn, (ham_env_t *)m_state.env,
+ (const char *)buffer.get_ptr(), 0, HAM_DONT_LOCK);
+ // on success: patch the txn ID
+ if (st == 0) {
+ txn->set_id(entry.txn_id);
+ txn_manager->set_txn_id(entry.txn_id);
+ }
+ break;
+ }
+ case kEntryTypeTxnAbort: {
+ Transaction *txn = get_txn(txn_manager, entry.txn_id);
+ st = ham_txn_abort((ham_txn_t *)txn, HAM_DONT_LOCK);
+ break;
+ }
+ case kEntryTypeTxnCommit: {
+ Transaction *txn = get_txn(txn_manager, entry.txn_id);
+ st = ham_txn_commit((ham_txn_t *)txn, HAM_DONT_LOCK);
+ break;
+ }
+ case kEntryTypeInsert: {
+ PJournalEntryInsert *ins = (PJournalEntryInsert *)buffer.get_ptr();
+ Transaction *txn = 0;
+ Database *db;
+ ham_key_t key = {0};
+ ham_record_t record = {0};
+ if (!ins) {
+ st = HAM_IO_ERROR;
+ goto bail;
+ }
+
+ // do not insert if the key was already flushed to disk
+ if (entry.lsn <= start_lsn)
+ continue;
+
+ key.data = ins->get_key_data();
+ key.size = ins->key_size;
+ record.data = ins->get_record_data();
+ record.size = ins->record_size;
+ record.partial_size = ins->record_partial_size;
+ record.partial_offset = ins->record_partial_offset;
+ if (entry.txn_id)
+ txn = get_txn(txn_manager, entry.txn_id);
+ db = get_db(entry.dbname);
+ st = ham_db_insert((ham_db_t *)db, (ham_txn_t *)txn,
+ &key, &record, ins->insert_flags | HAM_DONT_LOCK);
+ break;
+ }
+ case kEntryTypeErase: {
+ PJournalEntryErase *e = (PJournalEntryErase *)buffer.get_ptr();
+ Transaction *txn = 0;
+ Database *db;
+ ham_key_t key = {0};
+ if (!e) {
+ st = HAM_IO_ERROR;
+ goto bail;
+ }
+
+ // do not erase if the key was already erased from disk
+ if (entry.lsn <= start_lsn)
+ continue;
+
+ if (entry.txn_id)
+ txn = get_txn(txn_manager, entry.txn_id);
+ db = get_db(entry.dbname);
+ key.data = e->get_key_data();
+ key.size = e->key_size;
+ st = ham_db_erase((ham_db_t *)db, (ham_txn_t *)txn, &key,
+ e->erase_flags | HAM_DONT_LOCK);
+ // key might have already been erased when the changeset
+ // was flushed
+ if (st == HAM_KEY_NOT_FOUND)
+ st = 0;
+ break;
+ }
+ case kEntryTypeChangeset: {
+ // skip this; the changeset was already applied
+ break;
+ }
+ default:
+ ham_log(("invalid journal entry type or journal is corrupt"));
+ st = HAM_IO_ERROR;
+ }
+
+ if (st)
+ goto bail;
+ } while (1);
+
+bail:
+ // all transactions which are not yet committed will be aborted
+ abort_uncommitted_txns(txn_manager);
+
+ // also close and delete all open databases - they were created in get_db()
+ close_all_databases();
+
+ // flush all committed transactions
+ if (st == 0)
+ st = m_state.env->flush(HAM_FLUSH_COMMITTED_TRANSACTIONS);
+
+ // re-enable the logging
+ m_state.disable_logging = false;
+
+ if (st)
+ throw Exception(st);
+
+ // clear the journal files
+ clear();
+}
+
+void
+Journal::clear_file(int idx)
+{
+ if (m_state.files[idx].is_open()) {
+ m_state.files[idx].truncate(0);
+
+ // after truncate, the file pointer is far beyond the new end of file;
+ // reset the file pointer, or the next write will resize the file to
+ // the original size
+ m_state.files[idx].seek(0, File::kSeekSet);
+ }
+
+ // clear the transaction counters
+ m_state.open_txn[idx] = 0;
+ m_state.closed_txn[idx] = 0;
+
+ // also clear the buffer with the outstanding data
+ m_state.buffer[idx].clear();
+}
+
+std::string
+Journal::get_path(int i)
+{
+ std::string path;
+
+ if (m_state.env->config().log_filename.empty()) {
+ path = m_state.env->config().filename;
+ }
+ else {
+ path = m_state.env->config().log_filename;
+#ifdef HAM_OS_WIN32
+ path += "\\";
+ char fname[_MAX_FNAME];
+ char ext[_MAX_EXT];
+ _splitpath(m_state.env->config().filename.c_str(), 0, 0, fname, ext);
+ path += fname;
+ path += ext;
+#else
+ path += "/";
+ path += ::basename((char *)m_state.env->config().filename.c_str());
+#endif
+ }
+ if (i == 0)
+ path += ".jrn0";
+ else if (i == 1)
+ path += ".jrn1";
+ else
+ ham_assert(!"invalid index");
+ return (path);
+}
+
+JournalTest
+Journal::test()
+{
+ return (JournalTest(&m_state));
+}
+
+JournalState::JournalState(LocalEnvironment *env)
+ : env(env), current_fd(0), threshold(env->config().journal_switch_threshold),
+ disable_logging(false), count_bytes_flushed(0),
+ count_bytes_before_compression(0), count_bytes_after_compression(0)
+{
+ if (threshold == 0)
+ threshold = kSwitchTxnThreshold;
+
+ open_txn[0] = 0;
+ open_txn[1] = 0;
+ closed_txn[0] = 0;
+ closed_txn[1] = 0;
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.h b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.h
new file mode 100644
index 0000000000..dd55b66fea
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal.h
@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Routines for the journal - writing, reading, recovering
+ *
+ * The journal is a facility for storing logical and physical redo-information.
+ *
+ * The logical information describes the database operation (i.e. insert/erase),
+ * the physical information describes the modified pages.
+ *
+ * "Undo" information is not required because aborted Transactions are never
+ * written to disk. The journal only can "redo" operations.
+ *
+ * The journal is organized in two files. If one of the files grows too large
+ * then all new Transactions are stored in the other file
+ * ("Log file switching"). When all Transactions from file #0 are committed,
+ * and file #1 exceeds a limit, then the files are switched back again.
+ *
+ * For writing, files are buffered. The buffers are flushed when they
+ * exceed a certain threshold, when a Transaction is committed or a Changeset
+ * was written. In case of a commit or a changeset there will also be an
+ * fsync, if HAM_ENABLE_FSYNC is enabled.
+ *
+ * The physical information is a collection of pages which are modified in
+ * one or more database operations (i.e. ham_db_erase). This collection is
+ * called a "changeset" and implemented in changeset.h/.cc. As soon as the
+ * operation is finished, the changeset is flushed: if the changeset contains
+ * just a single page, then this operation is atomic and is NOT logged.
+ * Otherwise the whole changeset is appended to the journal, and afterwards
+ * the database file is modified.
+ *
+ * For recovery to work, each page stores the lsn of its last modification.
+ *
+ * When recovering, the Journal first extracts the newest/latest entry.
+ * If this entry is a changeset then the changeset is reapplied, because
+ * we assume that there was a crash immediately AFTER the changeset was
+ * written, but BEFORE the database file was modified. (The changeset is
+ * idempotent; if the database file was successfully modified then the
+ * changes are re-applied; this is not a problem.)
+ *
+ * Afterwards, hamsterdb uses the lsn's to figure out whether an update
+ * was already applied or not. If the journal's last entry is a changeset then
+ * this changeset's lsn marks the beginning of the sequence. Otherwise the lsn
+ * is fetched from the journal file headers. All journal entries with an lsn
+ * *older* than this start-lsn will be skipped, all others are re-applied.
+ *
+ * In this phase all changesets are skipped because the newest changeset was
+ * already applied, and we know that all older changesets
+ * have already been written successfully to the database file.
+ *
+ * @exception_safe: basic
+ * @thread_safe: no
+ */
+
+#ifndef HAM_JOURNAL_H
+#define HAM_JOURNAL_H
+
+#include "0root/root.h"
+
+#include <map>
+#include <cstdio>
+#include <string>
+
+#include "ham/hamsterdb_int.h" // for metrics
+
+#include "1base/dynamic_array.h"
+#include "1os/file.h"
+#include "1errorinducer/errorinducer.h"
+#include "2page/page_collection.h"
+#include "3journal/journal_entries.h"
+#include "3journal/journal_state.h"
+#include "3journal/journal_test.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class Page;
+class Database;
+class Transaction;
+class LocalEnvironment;
+class LocalTransaction;
+class LocalTransactionManager;
+
+#include "1base/packstart.h"
+
+//
+// The Journal object
+//
+class Journal
+{
+ public:
+ enum {
+ // marks the start of a new transaction
+ kEntryTypeTxnBegin = 1,
+
+ // marks the end of an aborted transaction
+ kEntryTypeTxnAbort = 2,
+
+ // marks the end of an committed transaction
+ kEntryTypeTxnCommit = 3,
+
+ // marks an insert operation
+ kEntryTypeInsert = 4,
+
+ // marks an erase operation
+ kEntryTypeErase = 5,
+
+ // marks a whole changeset operation (writes modified pages)
+ kEntryTypeChangeset = 6
+ };
+
+ //
+ // An "iterator" structure for traversing the journal files
+ //
+ struct Iterator {
+ Iterator()
+ : fdidx(0), fdstart(0), offset(0) {
+ }
+
+ // selects the file descriptor [0..1]
+ int fdidx;
+
+ // which file descriptor did we start with? [0..1]
+ int fdstart;
+
+ // the offset in the file of the NEXT entry
+ uint64_t offset;
+ };
+
+ // Constructor
+ Journal(LocalEnvironment *env);
+
+ // Creates a new journal
+ void create();
+
+ // Opens an existing journal
+ void open();
+
+ // Returns true if the journal is empty
+ bool is_empty() {
+ if (!m_state.files[0].is_open() && !m_state.files[1].is_open())
+ return (true);
+
+ for (int i = 0; i < 2; i++) {
+ uint64_t size = m_state.files[i].get_file_size();
+ if (size > 0)
+ return (false);
+ }
+
+ return (true);
+ }
+
+ // Appends a journal entry for ham_txn_begin/kEntryTypeTxnBegin
+ void append_txn_begin(LocalTransaction *txn, const char *name,
+ uint64_t lsn);
+
+ // Appends a journal entry for ham_txn_abort/kEntryTypeTxnAbort
+ void append_txn_abort(LocalTransaction *txn, uint64_t lsn);
+
+ // Appends a journal entry for ham_txn_commit/kEntryTypeTxnCommit
+ void append_txn_commit(LocalTransaction *txn, uint64_t lsn);
+
+ // Appends a journal entry for ham_insert/kEntryTypeInsert
+ void append_insert(Database *db, LocalTransaction *txn,
+ ham_key_t *key, ham_record_t *record, uint32_t flags,
+ uint64_t lsn);
+
+ // Appends a journal entry for ham_erase/kEntryTypeErase
+ void append_erase(Database *db, LocalTransaction *txn,
+ ham_key_t *key, int duplicate_index, uint32_t flags,
+ uint64_t lsn);
+
+ // Appends a journal entry for a whole changeset/kEntryTypeChangeset
+ void append_changeset(const Page **pages, int num_pages, uint64_t lsn);
+
+ // Adjusts the transaction counters; called whenever |txn| is flushed.
+ void transaction_flushed(LocalTransaction *txn);
+
+ // Empties the journal, removes all entries
+ void clear() {
+ for (int i = 0; i < 2; i++)
+ clear_file(i);
+ }
+
+ // Closes the journal, frees all allocated resources
+ void close(bool noclear = false);
+
+ // Performs the recovery! All committed Transactions will be re-applied,
+ // all others are automatically aborted
+ void recover(LocalTransactionManager *txn_manager);
+
+ // Fills the metrics
+ void fill_metrics(ham_env_metrics_t *metrics) {
+ metrics->journal_bytes_flushed = m_state.count_bytes_flushed;
+ }
+
+ private:
+ friend struct JournalFixture;
+
+ // Returns a pointer to database. If the database was not yet opened then
+ // it is opened implicitly.
+ Database *get_db(uint16_t dbname);
+
+ // Returns a pointer to a Transaction object.
+ Transaction *get_txn(LocalTransactionManager *txn_manager, uint64_t txn_id);
+
+ // Closes all databases.
+ void close_all_databases();
+
+ // Aborts all transactions which are still active.
+ void abort_uncommitted_txns(LocalTransactionManager *txn_manager);
+
+ // Helper function which adds a single page from the changeset to
+ // the Journal; returns the page size (or compressed size, if compression
+ // was enabled)
+ uint32_t append_changeset_page(const Page *page, uint32_t page_size);
+
+ // Recovers (re-applies) the physical changelog; returns the lsn of the
+ // Changelog
+ uint64_t recover_changeset();
+
+ // Scans a file for the newest changeset. Returns the lsn of this
+ // changeset, and the position (offset) in the file
+ uint64_t scan_for_newest_changeset(File *file, uint64_t *position);
+
+ // Recovers the logical journal
+ void recover_journal(Context *context,
+ LocalTransactionManager *txn_manager, uint64_t start_lsn);
+
+ // Switches the log file if necessary; returns the new log descriptor in the
+ // transaction
+ int switch_files_maybe();
+
+ // returns the path of the journal file
+ std::string get_path(int i);
+
+ // Sequentially returns the next journal entry, starting with
+ // the oldest entry.
+ //
+ // |iter| must be initialized with zeroes for the first call.
+ // |auxbuffer| returns the auxiliary data of the entry and is either
+ // a structure of type PJournalEntryInsert or PJournalEntryErase.
+ //
+ // Returns an empty entry (lsn is zero) after the last element.
+ void get_entry(Iterator *iter, PJournalEntry *entry,
+ ByteArray *auxbuffer);
+
+ // Appends an entry to the journal
+ void append_entry(int idx,
+ const uint8_t *ptr1 = 0, size_t ptr1_size = 0,
+ const uint8_t *ptr2 = 0, size_t ptr2_size = 0,
+ const uint8_t *ptr3 = 0, size_t ptr3_size = 0,
+ const uint8_t *ptr4 = 0, size_t ptr4_size = 0,
+ const uint8_t *ptr5 = 0, size_t ptr5_size = 0) {
+ if (ptr1_size)
+ m_state.buffer[idx].append(ptr1, ptr1_size);
+ if (ptr2_size)
+ m_state.buffer[idx].append(ptr2, ptr2_size);
+ if (ptr3_size)
+ m_state.buffer[idx].append(ptr3, ptr3_size);
+ if (ptr4_size)
+ m_state.buffer[idx].append(ptr4, ptr4_size);
+ if (ptr5_size)
+ m_state.buffer[idx].append(ptr5, ptr5_size);
+ }
+
+ // flush buffer if size limit is exceeded
+ void maybe_flush_buffer(int idx) {
+ if (m_state.buffer[idx].get_size() >= JournalState::kBufferLimit)
+ flush_buffer(idx);
+ }
+
+ // Flushes a buffer to disk
+ void flush_buffer(int idx, bool fsync = false) {
+ if (m_state.buffer[idx].get_size() > 0) {
+ // error inducer? then write only a part of the buffer and return
+ if (ErrorInducer::is_active()
+ && ErrorInducer::get_instance()->induce(ErrorInducer::kChangesetFlush)) {
+ m_state.files[idx].write(m_state.buffer[idx].get_ptr(),
+ m_state.buffer[idx].get_size() - 5);
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ m_state.files[idx].write(m_state.buffer[idx].get_ptr(),
+ m_state.buffer[idx].get_size());
+ m_state.count_bytes_flushed += m_state.buffer[idx].get_size();
+
+ m_state.buffer[idx].clear();
+ if (fsync)
+ m_state.files[idx].flush();
+ }
+ }
+
+ // Clears a single file
+ void clear_file(int idx);
+
+ // Returns the test object
+ JournalTest test();
+
+ private:
+ // The mutable state
+ JournalState m_state;
+};
+
+#include "1base/packstop.h"
+
+} // namespace hamsterdb
+
+#endif /* HAM_JOURNAL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_entries.h b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_entries.h
new file mode 100644
index 0000000000..b32f53693b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_entries.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * journal entries for insert, erase, begin, commit, abort...
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_JOURNAL_ENTRIES_H
+#define HAM_JOURNAL_ENTRIES_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+#include "1base/packstart.h"
+
+/*
+ * A journal entry for all txn related operations (begin, commit, abort)
+ *
+ * This structure can be followed by one of the structures below
+ * (PJournalEntryInsert or PJournalEntryERASE); the field |followup_size|
+ * is the structure size of this follow-up structure.
+ */
+HAM_PACK_0 struct HAM_PACK_1 PJournalEntry {
+ // Constructor - sets all fields to 0
+ PJournalEntry()
+ : lsn(0), followup_size(0), txn_id(0), type(0),
+ dbname(0), _reserved(0) {
+ }
+
+ // the lsn of this entry
+ uint64_t lsn;
+
+ // the size of the follow-up entry in bytes (may be padded)
+ uint64_t followup_size;
+
+ // the transaction id
+ uint64_t txn_id;
+
+ // the type of this entry
+ uint32_t type;
+
+ // the name of the database which is modified by this entry
+ uint16_t dbname;
+
+ // a reserved value - reqd for padding
+ uint16_t _reserved;
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+
+#include "1base/packstart.h"
+
+//
+// a Journal entry for an 'insert' operation
+//
+HAM_PACK_0 struct HAM_PACK_1 PJournalEntryInsert {
+ // Constructor - sets all fields to 0
+ PJournalEntryInsert()
+ : key_size(0), compressed_key_size(0), record_size(0),
+ compressed_record_size(0), record_partial_size(0),
+ record_partial_offset(0), insert_flags(0) {
+ data[0] = 0;
+ }
+
+ // key size
+ uint16_t key_size;
+
+ // PRO: compressed key size
+ uint16_t compressed_key_size;
+
+ // record size
+ uint32_t record_size;
+
+ // PRO: compressed record size
+ uint32_t compressed_record_size;
+
+ // record partial size
+ uint32_t record_partial_size;
+
+ // record partial offset
+ uint32_t record_partial_offset;
+
+ // flags of ham_insert(), ham_cursor_insert()
+ uint32_t insert_flags;
+
+ // data follows here - first |key_size| bytes for the key, then
+ // |record_size| bytes for the record (and maybe some padding)
+ //
+ // PRO: this data can be compressed
+ uint8_t data[1];
+
+ // Returns a pointer to the key data
+ uint8_t *get_key_data() {
+ return (&data[0]);
+ }
+
+ // Returns a pointer to the record data
+ uint8_t *get_record_data() {
+ return (&data[key_size]);
+ }
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+
+#include "1base/packstart.h"
+
+//
+// a Journal entry for 'erase' operations
+//
+HAM_PACK_0 struct HAM_PACK_1 PJournalEntryErase {
+ // Constructor - sets all fields to 0
+ PJournalEntryErase()
+ : key_size(0), compressed_key_size(0), erase_flags(0), duplicate(0) {
+ data[0] = 0;
+ }
+
+ // key size
+ uint16_t key_size;
+
+ // PRO: compressed key size
+ uint16_t compressed_key_size;
+
+ // flags of ham_erase(), ham_cursor_erase()
+ uint32_t erase_flags;
+
+ // which duplicate to erase
+ int duplicate;
+
+ // the key data
+ //
+ // PRO: this data can be compressed
+ uint8_t data[1];
+
+ // Returns a pointer to the key data
+ uint8_t *get_key_data() {
+ return (&data[0]);
+ }
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+
+#include "1base/packstart.h"
+
+//
+// a Journal entry for a 'changeset' group
+//
+HAM_PACK_0 struct HAM_PACK_1 PJournalEntryChangeset {
+ // Constructor - sets all fields to 0
+ PJournalEntryChangeset()
+ : num_pages(0) {
+ }
+
+ // number of pages in this changeset
+ uint32_t num_pages;
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+
+#include "1base/packstart.h"
+
+//
+// a Journal entry for a single page
+//
+HAM_PACK_0 struct HAM_PACK_1 PJournalEntryPageHeader {
+ // Constructor - sets all fields to 0
+ PJournalEntryPageHeader(uint64_t _address = 0)
+ : address(_address), compressed_size(0) {
+ }
+
+ // the page address
+ uint64_t address;
+
+ // PRO: the compressed size, if compression is enabled
+ uint32_t compressed_size;
+} HAM_PACK_2;
+
+#include "1base/packstop.h"
+
+} // namespace hamsterdb
+
+#endif /* HAM_JOURNAL_ENTRIES_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_state.h b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_state.h
new file mode 100644
index 0000000000..817fcac1d5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_state.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The Journal's state
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_JOURNAL_STATE_H
+#define HAM_JOURNAL_STATE_H
+
+#include "0root/root.h"
+
+#include <map>
+#include <string>
+
+#include "ham/hamsterdb_int.h" // for metrics
+
+#include "1base/dynamic_array.h"
+#include "1os/file.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Database;
+class LocalEnvironment;
+
+struct JournalState
+{
+ enum {
+ // switch log file after |kSwitchTxnThreshold| transactions
+ kSwitchTxnThreshold = 32,
+
+ // flush buffers if this limit is exceeded
+ kBufferLimit = 1024 * 1024 // 1 mb
+ };
+
+ JournalState(LocalEnvironment *env);
+
+ // References the Environment this journal file is for
+ LocalEnvironment *env;
+
+ // The index of the file descriptor we are currently writing to (0 or 1)
+ uint32_t current_fd;
+
+ // The two file descriptors
+ File files[2];
+
+ // Buffers for writing data to the files
+ ByteArray buffer[2];
+
+ // For counting all open transactions in the files
+ size_t open_txn[2];
+
+ // For counting all closed transactions in the files
+ size_t closed_txn[2];
+
+ // The lsn of the previous checkpoint
+ uint64_t last_cp_lsn;
+
+ // When having more than these Transactions in one file, we
+ // swap the files
+ size_t threshold;
+
+ // Set to false to disable logging; used during recovery
+ bool disable_logging;
+
+ // Counting the flushed bytes (for ham_env_get_metrics)
+ uint64_t count_bytes_flushed;
+
+ // Counting the bytes before compression (for ham_env_get_metrics)
+ uint64_t count_bytes_before_compression;
+
+ // Counting the bytes after compression (for ham_env_get_metrics)
+ uint64_t count_bytes_after_compression;
+
+ // A map of all opened Databases
+ typedef std::map<uint16_t, Database *> DatabaseMap;
+ DatabaseMap database_map;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_JOURNAL_STATE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_test.h b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_test.h
new file mode 100644
index 0000000000..464d8fa43c
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3journal/journal_test.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Test gateway for the Journal
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_JOURNAL_TEST_H
+#define HAM_JOURNAL_TEST_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb_int.h" // for metrics
+
+#include "3journal/journal_state.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class JournalTest
+{
+ public:
+ JournalTest(JournalState *state)
+ : m_state(state) {
+ }
+
+ // Returns the state
+ JournalState *state() { return (m_state); }
+
+ private:
+ // The journal's state
+ JournalState *m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_JOURNAL_TEST_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.cc b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.cc
new file mode 100644
index 0000000000..bec3cc32e0
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.cc
@@ -0,0 +1,798 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "1base/pickle.h"
+#include "2page/page.h"
+#include "2device/device.h"
+#include "2queue/queue.h"
+#include "3page_manager/page_manager.h"
+#include "3page_manager/page_manager_worker.h"
+#include "3page_manager/page_manager_test.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_node_proxy.h"
+#include "4context/context.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+enum {
+ kPurgeAtLeast = 20
+};
+
+PageManagerState::PageManagerState(LocalEnvironment *env)
+ : config(env->config()), header(env->header()),
+ device(env->device()), lsn_manager(env->lsn_manager()),
+ cache(env->config()), needs_flush(false), purge_cache_pending(false),
+ state_page(0), last_blob_page(0), last_blob_page_id(0),
+ page_count_fetched(0), page_count_index(0), page_count_blob(0),
+ page_count_page_manager(0), cache_hits(0), cache_misses(0),
+ freelist_hits(0), freelist_misses(0)
+{
+}
+
+PageManager::PageManager(LocalEnvironment *env)
+ : m_state(env)
+{
+ /* start the worker thread */
+ m_worker.reset(new PageManagerWorker(&m_state.cache));
+}
+
+void
+PageManager::initialize(uint64_t pageid)
+{
+ Context context(0, 0, 0);
+
+ m_state.free_pages.clear();
+ if (m_state.state_page)
+ delete m_state.state_page;
+ m_state.state_page = new Page(m_state.device);
+ m_state.state_page->fetch(pageid);
+
+ Page *page = m_state.state_page;
+ uint32_t page_size = m_state.config.page_size_bytes;
+
+ // the first page stores the page ID of the last blob
+ m_state.last_blob_page_id = *(uint64_t *)page->get_payload();
+
+ while (1) {
+ ham_assert(page->get_type() == Page::kTypePageManager);
+ uint8_t *p = page->get_payload();
+ // skip m_state.last_blob_page_id?
+ if (page == m_state.state_page)
+ p += sizeof(uint64_t);
+
+ // get the overflow address
+ uint64_t overflow = *(uint64_t *)p;
+ p += 8;
+
+ // get the number of stored elements
+ uint32_t counter = *(uint32_t *)p;
+ p += 4;
+
+ // now read all pages
+ for (uint32_t i = 0; i < counter; i++) {
+ // 4 bits page_counter, 4 bits for number of following bytes
+ int page_counter = (*p & 0xf0) >> 4;
+ int num_bytes = *p & 0x0f;
+ ham_assert(page_counter > 0);
+ ham_assert(num_bytes <= 8);
+ p += 1;
+
+ uint64_t id = Pickle::decode_u64(num_bytes, p);
+ p += num_bytes;
+
+ m_state.free_pages[id * page_size] = page_counter;
+ }
+
+ // load the overflow page
+ if (overflow)
+ page = fetch(&context, overflow, 0);
+ else
+ break;
+ }
+}
+
+Page *
+PageManager::fetch(Context *context, uint64_t address, uint32_t flags)
+{
+ /* fetch the page from the cache */
+ Page *page;
+
+ if (address == 0)
+ page = m_state.header->get_header_page();
+ else
+ page = m_state.cache.get(address);
+
+ if (page) {
+ if (flags & PageManager::kNoHeader)
+ page->set_without_header(true);
+ return (safely_lock_page(context, page, true));
+ }
+
+ if ((flags & PageManager::kOnlyFromCache)
+ || m_state.config.flags & HAM_IN_MEMORY)
+ return (0);
+
+ page = new Page(m_state.device, context->db);
+ try {
+ page->fetch(address);
+ }
+ catch (Exception &ex) {
+ delete page;
+ throw ex;
+ }
+
+ ham_assert(page->get_data());
+
+ /* store the page in the list */
+ m_state.cache.put(page);
+
+ /* write to disk (if necessary) */
+ if (!(flags & PageManager::kDisableStoreState)
+ && !(flags & PageManager::kReadOnly))
+ maybe_store_state(context, false);
+
+ if (flags & PageManager::kNoHeader)
+ page->set_without_header(true);
+
+ m_state.page_count_fetched++;
+ return (safely_lock_page(context, page, false));
+}
+
+Page *
+PageManager::alloc(Context *context, uint32_t page_type, uint32_t flags)
+{
+ uint64_t address = 0;
+ Page *page = 0;
+ uint32_t page_size = m_state.config.page_size_bytes;
+ bool allocated = false;
+
+ /* first check the internal list for a free page */
+ if ((flags & PageManager::kIgnoreFreelist) == 0
+ && !m_state.free_pages.empty()) {
+ PageManagerState::FreeMap::iterator it = m_state.free_pages.begin();
+
+ address = it->first;
+ ham_assert(address % page_size == 0);
+ /* remove the page from the freelist */
+ m_state.free_pages.erase(it);
+ m_state.needs_flush = true;
+
+ m_state.freelist_hits++;
+
+ /* try to fetch the page from the cache */
+ page = m_state.cache.get(address);
+ if (page)
+ goto done;
+ /* allocate a new page structure and read the page from disk */
+ page = new Page(m_state.device, context->db);
+ page->fetch(address);
+ goto done;
+ }
+
+ m_state.freelist_misses++;
+
+ try {
+ if (!page) {
+ allocated = true;
+ page = new Page(m_state.device, context->db);
+ }
+
+ page->alloc(page_type);
+ }
+ catch (Exception &ex) {
+ if (allocated)
+ delete page;
+ throw ex;
+ }
+
+done:
+ /* clear the page with zeroes? */
+ if (flags & PageManager::kClearWithZero)
+ memset(page->get_data(), 0, page_size);
+
+ /* initialize the page; also set the 'dirty' flag to force logging */
+ page->set_type(page_type);
+ page->set_dirty(true);
+ page->set_db(context->db);
+
+ if (page->get_node_proxy()) {
+ delete page->get_node_proxy();
+ page->set_node_proxy(0);
+ }
+
+ /* store the page in the cache and the Changeset */
+ m_state.cache.put(page);
+ safely_lock_page(context, page, false);
+
+ /* write to disk (if necessary) */
+ if (!(flags & PageManager::kDisableStoreState)
+ && !(flags & PageManager::kReadOnly))
+ maybe_store_state(context, false);
+
+ switch (page_type) {
+ case Page::kTypeBindex:
+ case Page::kTypeBroot: {
+ memset(page->get_payload(), 0, sizeof(PBtreeNode));
+ m_state.page_count_index++;
+ break;
+ }
+ case Page::kTypePageManager:
+ m_state.page_count_page_manager++;
+ break;
+ case Page::kTypeBlob:
+ m_state.page_count_blob++;
+ break;
+ default:
+ break;
+ }
+
+ return (page);
+}
+
+Page *
+PageManager::alloc_multiple_blob_pages(Context *context, size_t num_pages)
+{
+ // allocate only one page? then use the normal ::alloc() method
+ if (num_pages == 1)
+ return (alloc(context, Page::kTypeBlob, 0));
+
+ Page *page = 0;
+ uint32_t page_size = m_state.config.page_size_bytes;
+
+ // Now check the freelist
+ if (!m_state.free_pages.empty()) {
+ for (PageManagerState::FreeMap::iterator it = m_state.free_pages.begin();
+ it != m_state.free_pages.end();
+ it++) {
+ if (it->second >= num_pages) {
+ for (size_t i = 0; i < num_pages; i++) {
+ if (i == 0) {
+ page = fetch(context, it->first, 0);
+ page->set_type(Page::kTypeBlob);
+ page->set_without_header(false);
+ }
+ else {
+ Page *p = fetch(context, it->first + (i * page_size), 0);
+ p->set_type(Page::kTypeBlob);
+ p->set_without_header(true);
+ }
+ }
+ if (it->second > num_pages) {
+ m_state.free_pages[it->first + num_pages * page_size]
+ = it->second - num_pages;
+ }
+ m_state.free_pages.erase(it);
+ return (page);
+ }
+ }
+ }
+
+ // Freelist lookup was not successful -> allocate new pages. Only the first
+ // page is a regular page; all others do not have page headers.
+ //
+ // disable "store state": the PageManager otherwise could alloc overflow
+ // pages in the middle of our blob sequence.
+ uint32_t flags = PageManager::kIgnoreFreelist
+ | PageManager::kDisableStoreState;
+ for (size_t i = 0; i < num_pages; i++) {
+ if (page == 0)
+ page = alloc(context, Page::kTypeBlob, flags);
+ else {
+ Page *p = alloc(context, Page::kTypeBlob, flags);
+ p->set_without_header(true);
+ }
+ }
+
+ // now store the state
+ maybe_store_state(context, false);
+ return (page);
+}
+
+void
+PageManager::fill_metrics(ham_env_metrics_t *metrics) const
+{
+ metrics->page_count_fetched = m_state.page_count_fetched;
+ metrics->page_count_flushed = Page::ms_page_count_flushed;
+ metrics->page_count_type_index = m_state.page_count_index;
+ metrics->page_count_type_blob = m_state.page_count_blob;
+ metrics->page_count_type_page_manager = m_state.page_count_page_manager;
+ metrics->freelist_hits = m_state.freelist_hits;
+ metrics->freelist_misses = m_state.freelist_misses;
+ m_state.cache.fill_metrics(metrics);
+}
+
+struct FlushAllPagesPurger
+{
+ FlushAllPagesPurger(bool delete_pages)
+ : delete_pages(delete_pages) {
+ }
+
+ bool operator()(Page *page) {
+ ScopedSpinlock lock(page->mutex());
+ page->flush();
+ return (delete_pages);
+ }
+
+ bool delete_pages;
+};
+
+void
+PageManager::flush(bool delete_pages)
+{
+ FlushAllPagesPurger purger(delete_pages);
+ m_state.cache.purge_if(purger);
+
+ if (m_state.state_page) {
+ ScopedSpinlock lock(m_state.state_page->mutex());
+ m_state.state_page->flush();
+ }
+}
+
+// Returns true if the page can be purged: page must use allocated
+// memory instead of an mmapped pointer; page must not be in use (= in
+// a changeset) and not have cursors attached
+struct PurgeProcessor
+{
+ PurgeProcessor(Page *last_blob_page, FlushPageMessage *message)
+ : last_blob_page(last_blob_page), message(message) {
+ }
+
+ bool operator()(Page *page) {
+ // the lock in here will be unlocked by the worker thread
+ if (page == last_blob_page || !page->mutex().try_lock())
+ return (false);
+ message->list.push_back(page);
+ return (true);
+ }
+
+ Page *last_blob_page;
+ FlushPageMessage *message;
+};
+
+void
+PageManager::purge_cache(Context *context)
+{
+ // do NOT purge the cache iff
+ // 1. this is an in-memory Environment
+ // 2. there's still a "purge cache" operation pending
+ // 3. the cache is not full
+ if (m_state.config.flags & HAM_IN_MEMORY
+ || m_state.purge_cache_pending
+ || !m_state.cache.is_cache_full())
+ return;
+
+ // Purge as many pages as possible to get memory usage down to the
+ // cache's limit.
+ FlushPageMessage *message = new FlushPageMessage();
+ PurgeProcessor processor(m_state.last_blob_page, message);
+ m_state.cache.purge(processor, m_state.last_blob_page);
+
+ if (message->list.size())
+ m_worker->add_to_queue(message);
+ else
+ delete message;
+}
+
+void
+PageManager::reclaim_space(Context *context)
+{
+ if (m_state.last_blob_page) {
+ m_state.last_blob_page_id = m_state.last_blob_page->get_address();
+ m_state.last_blob_page = 0;
+ }
+ ham_assert(!(m_state.config.flags & HAM_DISABLE_RECLAIM_INTERNAL));
+
+ bool do_truncate = false;
+ size_t file_size = m_state.device->file_size();
+ uint32_t page_size = m_state.config.page_size_bytes;
+
+ while (m_state.free_pages.size() > 1) {
+ PageManagerState::FreeMap::iterator fit =
+ m_state.free_pages.find(file_size - page_size);
+ if (fit != m_state.free_pages.end()) {
+ Page *page = m_state.cache.get(fit->first);
+ if (page) {
+ m_state.cache.del(page);
+ delete page;
+ }
+ file_size -= page_size;
+ do_truncate = true;
+ m_state.free_pages.erase(fit);
+ continue;
+ }
+ break;
+ }
+
+ if (do_truncate) {
+ m_state.needs_flush = true;
+ maybe_store_state(context, true);
+ m_state.device->truncate(file_size);
+ }
+}
+
+struct DbClosePurger
+{
+ DbClosePurger(LocalDatabase *db)
+ : m_db(db) {
+ }
+
+ bool operator()(Page *page) {
+ if (page->get_db() == m_db && page->get_address() != 0) {
+ ScopedSpinlock lock(page->mutex());
+ ham_assert(page->cursor_list() == 0);
+ page->flush();
+ return (true);
+ }
+ return (false);
+ }
+
+ LocalDatabase *m_db;
+};
+
+void
+PageManager::close_database(Context *context, LocalDatabase *db)
+{
+ if (m_state.last_blob_page) {
+ m_state.last_blob_page_id = m_state.last_blob_page->get_address();
+ m_state.last_blob_page = 0;
+ }
+
+ context->changeset.clear();
+
+ DbClosePurger purger(db);
+ m_state.cache.purge_if(purger);
+}
+
+void
+PageManager::del(Context *context, Page *page, size_t page_count)
+{
+ ham_assert(page_count > 0);
+
+ if (m_state.config.flags & HAM_IN_MEMORY)
+ return;
+
+ // remove all pages from the changeset, otherwise they won't be unlocked
+ context->changeset.del(page);
+ if (page_count > 1) {
+ uint32_t page_size = m_state.config.page_size_bytes;
+ for (size_t i = 1; i < page_count; i++) {
+ Page *p = m_state.cache.get(page->get_address() + i * page_size);
+ if (p && context->changeset.has(p))
+ context->changeset.del(p);
+ }
+ }
+
+ m_state.needs_flush = true;
+ m_state.free_pages[page->get_address()] = page_count;
+ ham_assert(page->get_address() % m_state.config.page_size_bytes == 0);
+
+ if (page->get_node_proxy()) {
+ delete page->get_node_proxy();
+ page->set_node_proxy(0);
+ }
+
+ // do not call maybe_store_state() - this change in the m_state is not
+ // relevant for logging.
+}
+
+void
+PageManager::reset(Context *context)
+{
+ close(context);
+
+ /* start the worker thread */
+ m_worker.reset(new PageManagerWorker(&m_state.cache));
+}
+
+void
+PageManager::close(Context *context)
+{
+ /* wait for the worker thread to stop */
+ if (m_worker.get())
+ m_worker->stop_and_join();
+
+ // store the state of the PageManager
+ if ((m_state.config.flags & HAM_IN_MEMORY) == 0
+ && (m_state.config.flags & HAM_READ_ONLY) == 0) {
+ maybe_store_state(context, true);
+ }
+
+ // reclaim unused disk space
+ // if logging is enabled: also flush the changeset to write back the
+ // modified freelist pages
+ bool try_reclaim = m_state.config.flags & HAM_DISABLE_RECLAIM_INTERNAL
+ ? false
+ : true;
+
+#ifdef WIN32
+ // Win32: it's not possible to truncate the file while there's an active
+ // mapping, therefore only reclaim if memory mapped I/O is disabled
+ if (!(m_state.config.flags & HAM_DISABLE_MMAP))
+ try_reclaim = false;
+#endif
+
+ if (try_reclaim) {
+ reclaim_space(context);
+ }
+
+ // clear the Changeset because flush() will delete all Page pointers
+ context->changeset.clear();
+
+ // flush all dirty pages to disk, then delete them
+ flush(true);
+
+ delete m_state.state_page;
+ m_state.state_page = 0;
+ m_state.last_blob_page = 0;
+}
+
+Page *
+PageManager::get_last_blob_page(Context *context)
+{
+ if (m_state.last_blob_page)
+ return (safely_lock_page(context, m_state.last_blob_page, true));
+ if (m_state.last_blob_page_id)
+ return (fetch(context, m_state.last_blob_page_id, 0));
+ return (0);
+}
+
+void
+PageManager::set_last_blob_page(Page *page)
+{
+ m_state.last_blob_page_id = 0;
+ m_state.last_blob_page = page;
+}
+
+uint64_t
+PageManager::store_state(Context *context)
+{
+ // no modifications? then simply return the old blobid
+ if (!m_state.needs_flush)
+ return (m_state.state_page ? m_state.state_page->get_address() : 0);
+
+ m_state.needs_flush = false;
+
+ // no freelist pages, no freelist state? then don't store anything
+ if (!m_state.state_page && m_state.free_pages.empty())
+ return (0);
+
+ // otherwise allocate a new page, if required
+ if (!m_state.state_page) {
+ m_state.state_page = new Page(m_state.device);
+ m_state.state_page->alloc(Page::kTypePageManager,
+ Page::kInitializeWithZeroes);
+ }
+
+ // don't bother locking the state page
+ context->changeset.put(m_state.state_page);
+
+ uint32_t page_size = m_state.config.page_size_bytes;
+
+ // make sure that the page is logged
+ Page *page = m_state.state_page;
+ page->set_dirty(true);
+
+ uint8_t *p = page->get_payload();
+
+ // store page-ID of the last allocated blob
+ *(uint64_t *)p = m_state.last_blob_page_id;
+ p += sizeof(uint64_t);
+
+ // reset the overflow pointer and the counter
+ // TODO here we lose a whole chain of overflow pointers if there was such
+ // a chain. We only save the first. That's not critical but also not nice.
+ uint64_t next_pageid = *(uint64_t *)p;
+ if (next_pageid) {
+ m_state.free_pages[next_pageid] = 1;
+ ham_assert(next_pageid % page_size == 0);
+ }
+
+ // No freelist entries? then we're done. Make sure that there's no
+ // overflow pointer or other garbage in the page!
+ if (m_state.free_pages.empty()) {
+ *(uint64_t *)p = 0;
+ p += sizeof(uint64_t);
+ *(uint32_t *)p = 0;
+ return (m_state.state_page->get_address());
+ }
+
+ PageManagerState::FreeMap::const_iterator it = m_state.free_pages.begin();
+ while (it != m_state.free_pages.end()) {
+ // this is where we will store the data
+ p = page->get_payload();
+ // skip m_state.last_blob_page_id?
+ if (page == m_state.state_page)
+ p += sizeof(uint64_t);
+ p += 8; // leave room for the pointer to the next page
+ p += 4; // leave room for the counter
+
+ uint32_t counter = 0;
+
+ while (it != m_state.free_pages.end()) {
+ // 9 bytes is the maximum amount of storage that we will need for a
+ // new entry; if it does not fit then break
+ if ((p + 9) - page->get_payload()
+ >= (ptrdiff_t)(m_state.config.page_size_bytes
+ - Page::kSizeofPersistentHeader))
+ break;
+
+ // ... and check if the next entry (and the following) are directly
+ // next to the current page
+ uint32_t page_counter = 1;
+ uint64_t base = it->first;
+ ham_assert(base % page_size == 0);
+ uint64_t current = it->first;
+
+ // move to the next entry
+ it++;
+
+ for (; it != m_state.free_pages.end() && page_counter < 16 - 1; it++) {
+ if (it->first != current + page_size)
+ break;
+ current += page_size;
+ page_counter++;
+ }
+
+ // now |base| is the start of a sequence of free pages, and the
+ // sequence has |page_counter| pages
+ //
+ // This is encoded as
+ // - 1 byte header
+ // - 4 bits for |page_counter|
+ // - 4 bits for the number of bytes following ("n")
+ // - n byte page-id (div page_size)
+ ham_assert(page_counter < 16);
+ int num_bytes = Pickle::encode_u64(p + 1, base / page_size);
+ *p = (page_counter << 4) | num_bytes;
+ p += 1 + num_bytes;
+
+ counter++;
+ }
+
+ p = page->get_payload();
+ if (page == m_state.state_page) // skip m_state.last_blob_page_id?
+ p += sizeof(uint64_t);
+ uint64_t next_pageid = *(uint64_t *)p;
+ *(uint64_t *)p = 0;
+ p += 8; // overflow page
+
+ // now store the counter
+ *(uint32_t *)p = counter;
+
+ // are we done? if not then continue with the next page
+ if (it != m_state.free_pages.end()) {
+ // allocate (or fetch) an overflow page
+ if (!next_pageid) {
+ Page *new_page = alloc(context, Page::kTypePageManager,
+ PageManager::kIgnoreFreelist);
+ // patch the overflow pointer in the old (current) page
+ p = page->get_payload();
+ if (page == m_state.state_page) // skip m_state.last_blob_page_id?
+ p += sizeof(uint64_t);
+ *(uint64_t *)p = new_page->get_address();
+
+ // reset the overflow pointer in the new page
+ page = new_page;
+ p = page->get_payload();
+ *(uint64_t *)p = 0;
+ }
+ else
+ page = fetch(context, next_pageid, 0);
+
+ // make sure that the page is logged
+ page->set_dirty(true);
+ }
+ }
+
+ return (m_state.state_page->get_address());
+}
+
+void
+PageManager::maybe_store_state(Context *context, bool force)
+{
+ if (force || (m_state.config.flags & HAM_ENABLE_RECOVERY)) {
+ uint64_t new_blobid = store_state(context);
+ if (new_blobid != m_state.header->get_page_manager_blobid()) {
+ m_state.header->set_page_manager_blobid(new_blobid);
+ // don't bother to lock the header page
+ m_state.header->get_header_page()->set_dirty(true);
+ context->changeset.put(m_state.header->get_header_page());
+ }
+ }
+}
+
+Page *
+PageManager::safely_lock_page(Context *context, Page *page,
+ bool allow_recursive_lock)
+{
+ context->changeset.put(page);
+
+ ham_assert(page->mutex().try_lock() == false);
+
+ // fetch contents again?
+ if (!page->get_data()) {
+ page->fetch(page->get_address());
+ }
+
+ return (page);
+}
+
+PageManagerTest
+PageManager::test()
+{
+ return (PageManagerTest(this));
+}
+
+PageManagerTest::PageManagerTest(PageManager *page_manager)
+ : m_sut(page_manager)
+{
+}
+
+uint64_t
+PageManagerTest::store_state()
+{
+ Context context(0, 0, 0);
+ return (m_sut->store_state(&context));
+}
+
+void
+PageManagerTest::remove_page(Page *page)
+{
+ m_sut->m_state.cache.del(page);
+}
+
+bool
+PageManagerTest::is_page_free(uint64_t pageid)
+{
+ return (m_sut->m_state.free_pages.find(pageid)
+ != m_sut->m_state.free_pages.end());
+}
+
+Page *
+PageManagerTest::fetch_page(uint64_t id)
+{
+ return (m_sut->m_state.cache.get(id));
+}
+
+void
+PageManagerTest::store_page(Page *page)
+{
+ m_sut->m_state.cache.put(page);
+}
+
+bool
+PageManagerTest::is_cache_full()
+{
+ return (m_sut->m_state.cache.is_cache_full());
+}
+
+PageManagerState *
+PageManagerTest::state()
+{
+ return (&m_sut->m_state);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.h b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.h
new file mode 100644
index 0000000000..a6593e39ae
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The PageManager allocates, fetches and frees pages. It manages the
+ * list of all pages (free and not free), and maps their virtual ID to
+ * their physical address in the file.
+ *
+ * @exception_safe: basic
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PAGE_MANAGER_H
+#define HAM_PAGE_MANAGER_H
+
+#include "0root/root.h"
+
+#include <map>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/scoped_ptr.h"
+#include "3page_manager/page_manager_state.h"
+#include "3page_manager/page_manager_test.h"
+#include "3page_manager/page_manager_worker.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class LocalDatabase;
+class LocalEnvironment;
+
+class PageManager
+{
+ public:
+ enum {
+ // flag for alloc(): Clear the full page with zeroes
+ kClearWithZero = 1,
+
+ // flag for alloc(): Ignores the freelist
+ kIgnoreFreelist = 2,
+
+ // flag for alloc(): Do not persist the PageManager state to disk
+ kDisableStoreState = 4,
+
+ // Flag for fetch(): only fetches from cache, not from disk
+ kOnlyFromCache = 1,
+
+ // Flag for fetch(): does not add page to the Changeset
+ kReadOnly = 2,
+
+ // Flag for fetch(): page is part of a multi-page blob, has no header
+ kNoHeader = 4
+ };
+
+ // Constructor
+ PageManager(LocalEnvironment *env);
+
+ // Loads the state from a blob
+ void initialize(uint64_t blobid);
+
+ // Fills in the current metrics for the PageManager, the Cache and the
+ // Freelist
+ void fill_metrics(ham_env_metrics_t *metrics) const;
+
+ // Fetches a page from disk. |flags| are bitwise OR'd: kOnlyFromCache,
+ // kReadOnly, kNoHeader...
+ // The page is locked and stored in |context->changeset|.
+ Page *fetch(Context *context, uint64_t address, uint32_t flags = 0);
+
+ // Allocates a new page. |page_type| is one of Page::kType* in page.h.
+ // |flags| are either 0 or kClearWithZero
+ // The page is locked and stored in |context->changeset|.
+ Page *alloc(Context *context, uint32_t page_type, uint32_t flags = 0);
+
+ // Allocates multiple adjacent pages.
+ // Used by the BlobManager to store blobs that span multiple pages
+ // Returns the first page in the list of pages
+ // The pages are locked and stored in |context->changeset|.
+ Page *alloc_multiple_blob_pages(Context *context, size_t num_pages);
+
+ // Flushes all pages to disk and deletes them if |delete_pages| is true
+ void flush(bool delete_pages);
+
+ // Asks the worker thread to purge the cache if the cache limits are
+ // exceeded
+ void purge_cache(Context *context);
+
+ // Reclaim file space; truncates unused file space at the end of the file.
+ void reclaim_space(Context *context);
+
+ // Flushes and closes all pages of a database
+ void close_database(Context *context, LocalDatabase *db);
+
+ // Schedules one (or many sequential) pages for deletion and adds them
+ // to the Freelist. Will not do anything if the Environment is in-memory.
+ void del(Context *context, Page *page, size_t page_count = 1);
+
+ // Resets the PageManager; calls clear(), then starts a new worker thread
+ void reset(Context *context);
+
+ // Closes the PageManager; flushes all dirty pages
+ void close(Context *context);
+
+ // Returns the Page pointer where we can add more blobs
+ Page *get_last_blob_page(Context *context);
+
+ // Sets the Page pointer where we can add more blobs
+ void set_last_blob_page(Page *page);
+
+ // Returns additional testing interfaces
+ PageManagerTest test();
+
+ private:
+ friend struct Purger;
+ friend class PageManagerTest;
+ friend class PageManagerWorker;
+
+ // Persists the PageManager's state in the file
+ uint64_t store_state(Context *context);
+
+ // Calls store_state() whenever it makes sense
+ void maybe_store_state(Context *context, bool force);
+
+ // Locks a page, fetches contents from disk if they were flushed in
+ // the meantime
+ Page *safely_lock_page(Context *context, Page *page,
+ bool allow_recursive_lock);
+
+ // The worker thread which flushes dirty pages
+ ScopedPtr<PageManagerWorker> m_worker;
+
+ // The state
+ PageManagerState m_state;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_PAGE_MANAGER_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_state.h b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_state.h
new file mode 100644
index 0000000000..dc02b02b79
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_state.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The PageManager allocates, fetches and frees pages. It manages the
+ * list of all pages (free and not free), and maps their virtual ID to
+ * their physical address in the file.
+ *
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PAGE_MANAGER_STATE_H
+#define HAM_PAGE_MANAGER_STATE_H
+
+#include "0root/root.h"
+
+#include <map>
+#include <boost/atomic.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2config/env_config.h"
+#include "3cache/cache.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Device;
+class EnvironmentHeader;
+class LocalDatabase;
+class LocalEnvironment;
+class LsnManager;
+
+/*
+ * The internal state of the PageManager
+ */
+struct PageManagerState
+{
+ // The freelist maps page-id to number of free pages (usually 1)
+ typedef std::map<uint64_t, size_t> FreeMap;
+
+ PageManagerState(LocalEnvironment *env);
+
+ // Copy of the Environment's configuration
+ const EnvironmentConfiguration config;
+
+ // The Environment's header
+ EnvironmentHeader *header;
+
+ // The Device
+ Device *device;
+
+ // The lsn manager
+ LsnManager *lsn_manager;
+
+ // The cache
+ Cache cache;
+
+ // The map with free pages
+ FreeMap free_pages;
+
+ // Whether |m_free_pages| must be flushed or not
+ bool needs_flush;
+
+ // Whether a "purge cache" operation is pending
+ boost::atomic<bool> purge_cache_pending;
+
+ // Page with the persisted state data. If multiple pages are allocated
+ // then these pages form a linked list, with |m_state_page| being the head
+ Page *state_page;
+
+ // Cached page where to add more blobs
+ Page *last_blob_page;
+
+ // Page where to add more blobs - if |m_last_blob_page| was flushed
+ uint64_t last_blob_page_id;
+
+ // tracks number of fetched pages
+ uint64_t page_count_fetched;
+
+ // tracks number of index pages
+ uint64_t page_count_index;
+
+ // tracks number of blob pages
+ uint64_t page_count_blob;
+
+ // tracks number of page manager pages
+ uint64_t page_count_page_manager;
+
+ // tracks number of cache hits
+ uint64_t cache_hits;
+
+ // tracks number of cache misses
+ uint64_t cache_misses;
+
+ // number of successful freelist hits
+ uint64_t freelist_hits;
+
+ // number of freelist misses
+ uint64_t freelist_misses;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_PAGE_MANAGER_STATE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_test.h b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_test.h
new file mode 100644
index 0000000000..741cbc8390
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_test.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A test gateway for the PageManager
+ *
+ * @exception_safe: no
+ * @thread_safe: no
+ */
+
+#ifndef HAM_PAGE_MANAGER_TEST_H
+#define HAM_PAGE_MANAGER_TEST_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3page_manager/page_manager_state.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Page;
+class PageManager;
+
+class PageManagerTest
+{
+ public:
+ // Constructor
+ PageManagerTest(PageManager *page_manager);
+
+ // Stores the local PageManager state to disk; returns the blob id
+ uint64_t store_state();
+
+ // Removes a page from the list; only for testing.
+ void remove_page(Page *page);
+
+ // Returns true if a page is free. Ignores multi-pages; only for
+ // testing and integrity checks
+ bool is_page_free(uint64_t pageid);
+
+ // Fetches a page from the cache
+ Page *fetch_page(uint64_t id);
+
+ // Stores a page in the cache
+ void store_page(Page *page);
+
+ // Returns true if the cache is full
+ bool is_cache_full();
+
+ // Returns the state
+ PageManagerState *state();
+
+ private:
+ // Reference of the PageManager instance
+ PageManager *m_sut;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_PAGE_MANAGER_TEST_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_worker.h b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_worker.h
new file mode 100644
index 0000000000..2a66189765
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/3page_manager/page_manager_worker.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The worker thread for the PageManager
+ */
+
+#ifndef HAM_PAGE_MANAGER_WORKER_H
+#define HAM_PAGE_MANAGER_WORKER_H
+
+#include "0root/root.h"
+
+#include <vector>
+#include <boost/thread.hpp>
+#include <boost/atomic.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2device/device.h"
+#include "2queue/queue.h"
+#include "2worker/worker.h"
+#include "3cache/cache.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct FlushPageMessage : public MessageBase
+{
+ // The available message types
+ enum {
+ kFlushPage = 1,
+ };
+
+ FlushPageMessage()
+ : MessageBase(kFlushPage, 0) {
+ }
+
+ std::vector<Page *> list;
+};
+
+
+class PageManagerWorker : public Worker
+{
+ public:
+ PageManagerWorker(Cache *cache)
+ : Worker(), m_cache(cache) {
+ }
+
+ private:
+ virtual void handle_message(MessageBase *message) {
+ switch (message->type) {
+ case FlushPageMessage::kFlushPage: {
+ FlushPageMessage *fpm = (FlushPageMessage *)message;
+ for (std::vector<Page *>::iterator it = fpm->list.begin();
+ it != fpm->list.end();
+ ++it) {
+ Page *page = *it;
+ ham_assert(page != 0);
+ ham_assert(page->mutex().try_lock() == false);
+ try {
+ page->flush();
+ }
+ catch (Exception &) {
+ page->mutex().unlock();
+ throw;
+ }
+ page->mutex().unlock();
+ }
+ break;
+ }
+ default:
+ ham_assert(!"shouldn't be here");
+ }
+ }
+
+ // The PageManager's cache
+ Cache *m_cache;
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_PAGE_MANAGER_WORKER_H
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4context/context.h b/plugins/Dbx_kv/src/hamsterdb/src/4context/context.h
new file mode 100644
index 0000000000..7a88aa211e
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4context/context.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_CONTEXT_H
+#define HAM_CONTEXT_H
+
+#include "0root/root.h"
+
+#include "3changeset/changeset.h"
+
+namespace hamsterdb {
+
+class Cursor;
+class LocalDatabase;
+class LocalEnvironment;
+class LocalTransaction;
+
+struct Context
+{
+ Context(LocalEnvironment *env, LocalTransaction *txn = 0,
+ LocalDatabase *db = 0)
+ : env(env), txn(txn), db(db), changeset(env) {
+ }
+
+ ~Context() {
+ changeset.clear();
+ }
+
+ LocalEnvironment *env;
+ LocalTransaction *txn;
+ LocalDatabase *db;
+
+ // Each operation has its own changeset which stores all locked pages
+ Changeset changeset;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_CONTEXT_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.cc b/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.cc
new file mode 100644
index 0000000000..57cc80a6f6
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.cc
@@ -0,0 +1,1119 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_node_proxy.h"
+#include "4cursor/cursor.h"
+#include "4env/env_local.h"
+#include "4txn/txn_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+Cursor::Cursor(LocalDatabase *db, Transaction *txn, uint32_t flags)
+ : m_db(db), m_txn(txn), m_txn_cursor(this), m_btree_cursor(this),
+ m_remote_handle(0), m_next(0), m_previous(0), m_dupecache_index(0),
+ m_lastop(0), m_last_cmp(0), m_flags(flags), m_is_first_use(true)
+{
+}
+
+Cursor::Cursor(Cursor &other)
+ : m_db(other.m_db), m_txn_cursor(this), m_btree_cursor(this)
+{
+ m_txn = other.m_txn;
+ m_remote_handle = other.m_remote_handle;
+ m_next = other.m_next;
+ m_previous = other.m_previous;
+ m_dupecache_index = other.m_dupecache_index;
+ m_lastop = other.m_lastop;
+ m_last_cmp = other.m_last_cmp;
+ m_flags = other.m_flags;
+ m_is_first_use = other.m_is_first_use;
+
+ m_btree_cursor.clone(&other.m_btree_cursor);
+ m_txn_cursor.clone(&other.m_txn_cursor);
+
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS)
+ other.m_dupecache.clone(&m_dupecache);
+}
+
+void
+Cursor::append_btree_duplicates(Context *context, BtreeCursor *btc,
+ DupeCache *dc)
+{
+ uint32_t count = btc->get_record_count(context, 0);
+ for (uint32_t i = 0; i < count; i++)
+ dc->append(DupeCacheLine(true, i));
+}
+
+void
+Cursor::update_dupecache(Context *context, uint32_t what)
+{
+ if (!(m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS))
+ return;
+
+ /* if the cache already exists: no need to continue, it should be
+ * up to date */
+ if (m_dupecache.get_count() != 0)
+ return;
+
+ if ((what & kBtree) && (what & kTxn)) {
+ if (is_nil(kBtree) && !is_nil(kTxn)) {
+ bool equal_keys;
+ sync(context, 0, &equal_keys);
+ if (!equal_keys)
+ set_to_nil(kBtree);
+ }
+ }
+
+ /* first collect all duplicates from the btree. They're already sorted,
+ * therefore we can just append them to our duplicate-cache. */
+ if ((what & kBtree) && !is_nil(kBtree))
+ append_btree_duplicates(context, &m_btree_cursor, &m_dupecache);
+
+ /* read duplicates from the txn-cursor? */
+ if ((what & kTxn) && !is_nil(kTxn)) {
+ TransactionOperation *op = m_txn_cursor.get_coupled_op();
+ TransactionNode *node = op->get_node();
+
+ if (!node)
+ return;
+
+ /* now start integrating the items from the transactions */
+ op = node->get_oldest_op();
+ while (op) {
+ Transaction *optxn = op->get_txn();
+ /* collect all ops that are valid (even those that are
+ * from conflicting transactions) */
+ if (!optxn->is_aborted()) {
+ /* a normal (overwriting) insert will overwrite ALL dupes,
+ * but an overwrite of a duplicate will only overwrite
+ * an entry in the dupecache */
+ if (op->get_flags() & TransactionOperation::kInsert) {
+ /* all existing dupes are overwritten */
+ m_dupecache.clear();
+ m_dupecache.append(DupeCacheLine(false, op));
+ }
+ else if (op->get_flags() & TransactionOperation::kInsertOverwrite) {
+ uint32_t ref = op->get_referenced_dupe();
+ if (ref) {
+ ham_assert(ref <= m_dupecache.get_count());
+ DupeCacheLine *e = m_dupecache.get_first_element();
+ (&e[ref - 1])->set_txn_op(op);
+ }
+ else {
+ /* all existing dupes are overwritten */
+ m_dupecache.clear();
+ m_dupecache.append(DupeCacheLine(false, op));
+ }
+ }
+ /* insert a duplicate key */
+ else if (op->get_flags() & TransactionOperation::kInsertDuplicate) {
+ uint32_t of = op->get_orig_flags();
+ uint32_t ref = op->get_referenced_dupe() - 1;
+ DupeCacheLine dcl(false, op);
+ if (of & HAM_DUPLICATE_INSERT_FIRST)
+ m_dupecache.insert(0, dcl);
+ else if (of & HAM_DUPLICATE_INSERT_BEFORE) {
+ m_dupecache.insert(ref, dcl);
+ }
+ else if (of & HAM_DUPLICATE_INSERT_AFTER) {
+ if (ref + 1 >= m_dupecache.get_count())
+ m_dupecache.append(dcl);
+ else
+ m_dupecache.insert(ref + 1, dcl);
+ }
+ else /* default is HAM_DUPLICATE_INSERT_LAST */
+ m_dupecache.append(dcl);
+ }
+ /* a normal erase will erase ALL duplicate keys */
+ else if (op->get_flags() & TransactionOperation::kErase) {
+ uint32_t ref = op->get_referenced_dupe();
+ if (ref) {
+ ham_assert(ref <= m_dupecache.get_count());
+ m_dupecache.erase(ref - 1);
+ }
+ else {
+ /* all existing dupes are erased */
+ m_dupecache.clear();
+ }
+ }
+ else {
+ /* everything else is a bug! */
+ ham_assert(op->get_flags() == TransactionOperation::kNop);
+ }
+ }
+
+ /* continue with the previous/older operation */
+ op = op->get_next_in_node();
+ }
+ }
+}
+
+void
+Cursor::couple_to_dupe(uint32_t dupe_id)
+{
+ DupeCacheLine *e = 0;
+
+ ham_assert(m_dupecache.get_count() >= dupe_id);
+ ham_assert(dupe_id >= 1);
+
+ /* dupe-id is a 1-based index! */
+ e = m_dupecache.get_element(dupe_id - 1);
+ if (e->use_btree()) {
+ couple_to_btree();
+ m_btree_cursor.set_duplicate_index((uint32_t)e->get_btree_dupe_idx());
+ }
+ else {
+ ham_assert(e->get_txn_op() != 0);
+ m_txn_cursor.couple_to_op(e->get_txn_op());
+ couple_to_txnop();
+ }
+ set_dupecache_index(dupe_id);
+}
+
+ham_status_t
+Cursor::check_if_btree_key_is_erased_or_overwritten(Context *context)
+{
+ ham_key_t key = {0};
+ TransactionOperation *op;
+ // TODO not threadsafe - will leak if an exception is thrown
+ Cursor *clone = get_db()->cursor_clone_impl(this);
+
+ ham_status_t st = m_btree_cursor.move(context, &key,
+ &get_db()->key_arena(get_txn()), 0, 0, 0);
+ if (st) {
+ get_db()->cursor_close(clone);
+ return (st);
+ }
+
+ st = clone->m_txn_cursor.find(&key, 0);
+ if (st) {
+ get_db()->cursor_close_impl(clone);
+ delete clone;
+ return (st);
+ }
+
+ op = clone->m_txn_cursor.get_coupled_op();
+ if (op->get_flags() & TransactionOperation::kInsertDuplicate)
+ st = HAM_KEY_NOT_FOUND;
+ get_db()->cursor_close_impl(clone);
+ delete clone;
+ return (st);
+}
+
+void
+Cursor::sync(Context *context, uint32_t flags, bool *equal_keys)
+{
+ if (equal_keys)
+ *equal_keys = false;
+
+ if (is_nil(kBtree)) {
+ if (!m_txn_cursor.get_coupled_op())
+ return;
+ ham_key_t *key = m_txn_cursor.get_coupled_op()->get_node()->get_key();
+
+ if (!(flags & kSyncOnlyEqualKeys))
+ flags = flags | ((flags & HAM_CURSOR_NEXT)
+ ? HAM_FIND_GEQ_MATCH
+ : HAM_FIND_LEQ_MATCH);
+ /* the flag |kSyncDontLoadKey| does not load the key if there's an
+ * approx match - it only positions the cursor */
+ ham_status_t st = m_btree_cursor.find(context, key, 0, 0, 0,
+ kSyncDontLoadKey | flags);
+ /* if we had a direct hit instead of an approx. match then
+ * set |equal_keys| to false; otherwise Cursor::move()
+ * will move the btree cursor again */
+ if (st == 0 && equal_keys && !ham_key_get_approximate_match_type(key))
+ *equal_keys = true;
+ }
+ else if (is_nil(kTxn)) {
+ // TODO not threadsafe - will leak if an exception is thrown
+ Cursor *clone = get_db()->cursor_clone_impl(this);
+ clone->m_btree_cursor.uncouple_from_page(context);
+ ham_key_t *key = clone->m_btree_cursor.get_uncoupled_key();
+ if (!(flags & kSyncOnlyEqualKeys))
+ flags = flags | ((flags & HAM_CURSOR_NEXT)
+ ? HAM_FIND_GEQ_MATCH
+ : HAM_FIND_LEQ_MATCH);
+
+ ham_status_t st = m_txn_cursor.find(key, kSyncDontLoadKey | flags);
+ /* if we had a direct hit instead of an approx. match then
+ * set |equal_keys| to false; otherwise Cursor::move()
+ * will move the btree cursor again */
+ if (st == 0 && equal_keys && !ham_key_get_approximate_match_type(key))
+ *equal_keys = true;
+ get_db()->cursor_close_impl(clone);
+ delete clone;
+ }
+}
+
+ham_status_t
+Cursor::move_next_dupe(Context *context)
+{
+ if (get_dupecache_index()) {
+ if (get_dupecache_index() < m_dupecache.get_count()) {
+ set_dupecache_index(get_dupecache_index() + 1);
+ couple_to_dupe(get_dupecache_index());
+ return (0);
+ }
+ }
+ return (HAM_LIMITS_REACHED);
+}
+
+ham_status_t
+Cursor::move_previous_dupe(Context *context)
+{
+ if (get_dupecache_index()) {
+ if (get_dupecache_index() > 1) {
+ set_dupecache_index(get_dupecache_index() - 1);
+ couple_to_dupe(get_dupecache_index());
+ return (0);
+ }
+ }
+ return (HAM_LIMITS_REACHED);
+}
+
+ham_status_t
+Cursor::move_first_dupe(Context *context)
+{
+ if (m_dupecache.get_count()) {
+ set_dupecache_index(1);
+ couple_to_dupe(get_dupecache_index());
+ return (0);
+ }
+ return (HAM_LIMITS_REACHED);
+}
+
+ham_status_t
+Cursor::move_last_dupe(Context *context)
+{
+ if (m_dupecache.get_count()) {
+ set_dupecache_index(m_dupecache.get_count());
+ couple_to_dupe(get_dupecache_index());
+ return (0);
+ }
+ return (HAM_LIMITS_REACHED);
+}
+
+static bool
+__txn_cursor_is_erase(TransactionCursor *txnc)
+{
+ TransactionOperation *op = txnc->get_coupled_op();
+ return (op
+ ? (op->get_flags() & TransactionOperation::kErase) != 0
+ : false);
+}
+
+int
+Cursor::compare(Context *context)
+{
+ BtreeCursor *btrc = get_btree_cursor();
+ BtreeIndex *btree = get_db()->btree_index();
+
+ TransactionNode *node = m_txn_cursor.get_coupled_op()->get_node();
+ ham_key_t *txnk = node->get_key();
+
+ ham_assert(!is_nil(0));
+ ham_assert(!m_txn_cursor.is_nil());
+
+ if (btrc->get_state() == BtreeCursor::kStateCoupled) {
+ Page *page;
+ int slot;
+ btrc->get_coupled_key(&page, &slot, 0);
+ m_last_cmp = btree->get_node_from_page(page)->compare(context, txnk, slot);
+
+ // need to fix the sort order - we compare txnk vs page[slot], but the
+ // caller expects m_last_cmp to be the comparison of page[slot] vs txnk
+ if (m_last_cmp < 0)
+ m_last_cmp = +1;
+ else if (m_last_cmp > 0)
+ m_last_cmp = -1;
+
+ return (m_last_cmp);
+ }
+ else if (btrc->get_state() == BtreeCursor::kStateUncoupled) {
+ m_last_cmp = btree->compare_keys(btrc->get_uncoupled_key(), txnk);
+ return (m_last_cmp);
+ }
+
+ ham_assert(!"shouldn't be here");
+ return (0);
+}
+
+ham_status_t
+Cursor::move_next_key_singlestep(Context *context)
+{
+ ham_status_t st = 0;
+ BtreeCursor *btrc = get_btree_cursor();
+
+ /* if both cursors point to the same key: move next with both */
+ if (m_last_cmp == 0) {
+ if (!is_nil(kBtree)) {
+ st = btrc->move(context, 0, 0, 0, 0,
+ HAM_CURSOR_NEXT | HAM_SKIP_DUPLICATES);
+ if (st == HAM_KEY_NOT_FOUND || st == HAM_CURSOR_IS_NIL) {
+ set_to_nil(kBtree); // TODO muss raus
+ if (m_txn_cursor.is_nil())
+ return (HAM_KEY_NOT_FOUND);
+ else {
+ couple_to_txnop();
+ m_last_cmp = 1;
+ }
+ }
+ }
+ if (!m_txn_cursor.is_nil()) {
+ st = m_txn_cursor.move(HAM_CURSOR_NEXT);
+ if (st == HAM_KEY_NOT_FOUND || st==HAM_CURSOR_IS_NIL) {
+ set_to_nil(kTxn); // TODO muss raus
+ if (is_nil(kBtree))
+ return (HAM_KEY_NOT_FOUND);
+ else {
+ couple_to_btree();
+ m_last_cmp = -1;
+
+ ham_status_t st2 = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st2 == HAM_TXN_CONFLICT)
+ st = st2;
+ }
+ }
+ }
+ }
+ /* if the btree-key is smaller: move it next */
+ else if (m_last_cmp < 0) {
+ st = btrc->move(context, 0, 0, 0, 0, HAM_CURSOR_NEXT | HAM_SKIP_DUPLICATES);
+ if (st == HAM_KEY_NOT_FOUND) {
+ set_to_nil(kBtree); // TODO Das muss raus!
+ if (m_txn_cursor.is_nil())
+ return (st);
+ couple_to_txnop();
+ m_last_cmp = +1;
+ }
+ else {
+ ham_status_t st2 = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st2 == HAM_TXN_CONFLICT)
+ st = st2;
+ }
+ if (m_txn_cursor.is_nil())
+ m_last_cmp = -1;
+ }
+ /* if the txn-key is smaller OR if both keys are equal: move next
+ * with the txn-key (which is chronologically newer) */
+ else {
+ st = m_txn_cursor.move(HAM_CURSOR_NEXT);
+ if (st == HAM_KEY_NOT_FOUND) {
+ set_to_nil(kTxn); // TODO Das muss raus!
+ if (is_nil(kBtree))
+ return (st);
+ couple_to_btree();
+ m_last_cmp = -1;
+ }
+ if (is_nil(kBtree))
+ m_last_cmp = 1;
+ }
+
+ /* compare keys again */
+ if (!is_nil(kBtree) && !m_txn_cursor.is_nil())
+ compare(context);
+
+ /* if there's a txn conflict: move next */
+ if (st == HAM_TXN_CONFLICT)
+ return (move_next_key_singlestep(context));
+
+ /* btree-key is smaller */
+ if (m_last_cmp < 0 || m_txn_cursor.is_nil()) {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ return (0);
+ }
+ /* txn-key is smaller */
+ else if (m_last_cmp > 0 || btrc->get_state() == BtreeCursor::kStateNil) {
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ return (0);
+ }
+ /* both keys are equal */
+ else {
+ couple_to_txnop();
+ update_dupecache(context, kTxn | kBtree);
+ return (0);
+ }
+}
+
+ham_status_t
+Cursor::move_next_key(Context *context, uint32_t flags)
+{
+ ham_status_t st;
+
+ /* are we in the middle of a duplicate list? if yes then move to the
+ * next duplicate */
+ if (get_dupecache_index() > 0 && !(flags & HAM_SKIP_DUPLICATES)) {
+ st = move_next_dupe(context);
+ if (st != HAM_LIMITS_REACHED)
+ return (st);
+ else if (st == HAM_LIMITS_REACHED && (flags & HAM_ONLY_DUPLICATES))
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ clear_dupecache();
+
+ /* either there were no duplicates or we've reached the end of the
+ * duplicate list. move next till we found a new candidate */
+ while (1) {
+ st = move_next_key_singlestep(context);
+ if (st)
+ return (st);
+
+ /* check for duplicates. the dupecache was already updated in
+ * move_next_key_singlestep() */
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) {
+ /* are there any duplicates? if not then they were all erased and
+ * we move to the previous key */
+ if (!has_duplicates())
+ continue;
+
+ /* otherwise move to the first duplicate */
+ return (move_first_dupe(context));
+ }
+
+ /* no duplicates - make sure that we've not coupled to an erased
+ * item */
+ if (is_coupled_to_txnop()) {
+ if (__txn_cursor_is_erase(&m_txn_cursor))
+ continue;
+ else
+ return (0);
+ }
+ if (is_coupled_to_btree()) {
+ st = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st == HAM_KEY_ERASED_IN_TXN)
+ continue;
+ else if (st == 0) {
+ couple_to_txnop();
+ return (0);
+ }
+ else if (st == HAM_KEY_NOT_FOUND)
+ return (0);
+ else
+ return (st);
+ }
+ else
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ ham_assert(!"should never reach this");
+ return (HAM_INTERNAL_ERROR);
+}
+
+ham_status_t
+Cursor::move_previous_key_singlestep(Context *context)
+{
+ ham_status_t st = 0;
+ BtreeCursor *btrc = get_btree_cursor();
+
+ /* if both cursors point to the same key: move previous with both */
+ if (m_last_cmp == 0) {
+ if (!is_nil(kBtree)) {
+ st = btrc->move(context, 0, 0, 0, 0,
+ HAM_CURSOR_PREVIOUS | HAM_SKIP_DUPLICATES);
+ if (st == HAM_KEY_NOT_FOUND || st == HAM_CURSOR_IS_NIL) {
+ set_to_nil(kBtree); // TODO muss raus
+ if (m_txn_cursor.is_nil())
+ return (HAM_KEY_NOT_FOUND);
+ else {
+ couple_to_txnop();
+ m_last_cmp = -1;
+ }
+ }
+ }
+ if (!m_txn_cursor.is_nil()) {
+ st = m_txn_cursor.move(HAM_CURSOR_PREVIOUS);
+ if (st == HAM_KEY_NOT_FOUND || st==HAM_CURSOR_IS_NIL) {
+ set_to_nil(kTxn); // TODO muss raus
+ if (is_nil(kBtree))
+ return (HAM_KEY_NOT_FOUND);
+ else {
+ couple_to_btree();
+ m_last_cmp = 1;
+ }
+ }
+ }
+ }
+ /* if the btree-key is greater: move previous */
+ else if (m_last_cmp > 0) {
+ st = btrc->move(context, 0, 0, 0, 0,
+ HAM_CURSOR_PREVIOUS | HAM_SKIP_DUPLICATES);
+ if (st == HAM_KEY_NOT_FOUND) {
+ set_to_nil(kBtree); // TODO Das muss raus!
+ if (m_txn_cursor.is_nil())
+ return (st);
+ couple_to_txnop();
+ m_last_cmp = -1;
+ }
+ else {
+ ham_status_t st2 = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st2 == HAM_TXN_CONFLICT)
+ st = st2;
+ }
+ if (m_txn_cursor.is_nil())
+ m_last_cmp = 1;
+ }
+ /* if the txn-key is greater OR if both keys are equal: move previous
+ * with the txn-key (which is chronologically newer) */
+ else {
+ st = m_txn_cursor.move(HAM_CURSOR_PREVIOUS);
+ if (st == HAM_KEY_NOT_FOUND) {
+ set_to_nil(kTxn); // TODO Das muss raus!
+ if (is_nil(kBtree))
+ return (st);
+ couple_to_btree();
+ m_last_cmp = 1;
+
+ ham_status_t st2 = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st2 == HAM_TXN_CONFLICT)
+ st = st2;
+ }
+ if (is_nil(kBtree))
+ m_last_cmp = -1;
+ }
+
+ /* compare keys again */
+ if (!is_nil(kBtree) && !m_txn_cursor.is_nil())
+ compare(context);
+
+ /* if there's a txn conflict: move previous */
+ if (st == HAM_TXN_CONFLICT)
+ return (move_previous_key_singlestep(context));
+
+ /* btree-key is greater */
+ if (m_last_cmp > 0 || m_txn_cursor.is_nil()) {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ return (0);
+ }
+ /* txn-key is greater */
+ else if (m_last_cmp < 0 || btrc->get_state() == BtreeCursor::kStateNil) {
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ return (0);
+ }
+ /* both keys are equal */
+ else {
+ couple_to_txnop();
+ update_dupecache(context, kTxn | kBtree);
+ return (0);
+ }
+}
+
+ham_status_t
+Cursor::move_previous_key(Context *context, uint32_t flags)
+{
+ ham_status_t st;
+
+ /* are we in the middle of a duplicate list? if yes then move to the
+ * previous duplicate */
+ if (get_dupecache_index() > 0 && !(flags & HAM_SKIP_DUPLICATES)) {
+ st = move_previous_dupe(context);
+ if (st != HAM_LIMITS_REACHED)
+ return (st);
+ else if (st == HAM_LIMITS_REACHED && (flags & HAM_ONLY_DUPLICATES))
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ clear_dupecache();
+
+ /* either there were no duplicates or we've reached the end of the
+ * duplicate list. move previous till we found a new candidate */
+ while (!is_nil(kBtree) || !m_txn_cursor.is_nil()) {
+ st = move_previous_key_singlestep(context);
+ if (st)
+ return (st);
+
+ /* check for duplicates. the dupecache was already updated in
+ * move_previous_key_singlestep() */
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) {
+ /* are there any duplicates? if not then they were all erased and
+ * we move to the previous key */
+ if (!has_duplicates())
+ continue;
+
+ /* otherwise move to the last duplicate */
+ return (move_last_dupe(context));
+ }
+
+ /* no duplicates - make sure that we've not coupled to an erased
+ * item */
+ if (is_coupled_to_txnop()) {
+ if (__txn_cursor_is_erase(&m_txn_cursor))
+ continue;
+ else
+ return (0);
+ }
+ if (is_coupled_to_btree()) {
+ st = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st == HAM_KEY_ERASED_IN_TXN)
+ continue;
+ else if (st == 0) {
+ couple_to_txnop();
+ return (0);
+ }
+ else if (st == HAM_KEY_NOT_FOUND)
+ return (0);
+ else
+ return (st);
+ }
+ else
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ return (HAM_KEY_NOT_FOUND);
+}
+
+ham_status_t
+Cursor::move_first_key_singlestep(Context *context)
+{
+ ham_status_t btrs, txns;
+ BtreeCursor *btrc = get_btree_cursor();
+
+ /* fetch the smallest key from the transaction tree. */
+ txns = m_txn_cursor.move(HAM_CURSOR_FIRST);
+ /* fetch the smallest key from the btree tree. */
+ btrs = btrc->move(context, 0, 0, 0, 0,
+ HAM_CURSOR_FIRST | HAM_SKIP_DUPLICATES);
+ /* now consolidate - if both trees are empty then return */
+ if (btrs == HAM_KEY_NOT_FOUND && txns == HAM_KEY_NOT_FOUND) {
+ return (HAM_KEY_NOT_FOUND);
+ }
+ /* if btree is empty but txn-tree is not: couple to txn */
+ else if (btrs == HAM_KEY_NOT_FOUND && txns != HAM_KEY_NOT_FOUND) {
+ if (txns == HAM_TXN_CONFLICT)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ return (0);
+ }
+ /* if txn-tree is empty but btree is not: couple to btree */
+ else if (txns == HAM_KEY_NOT_FOUND && btrs != HAM_KEY_NOT_FOUND) {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ return (0);
+ }
+ /* if both trees are not empty then compare them and couple to the
+ * smaller one */
+ else {
+ ham_assert(btrs == 0 && (txns == 0
+ || txns == HAM_KEY_ERASED_IN_TXN
+ || txns == HAM_TXN_CONFLICT));
+ compare(context);
+
+ /* both keys are equal - couple to txn; it's chronologically
+ * newer */
+ if (m_last_cmp == 0) {
+ if (txns && txns != HAM_KEY_ERASED_IN_TXN)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kBtree | kTxn);
+ }
+ /* couple to txn */
+ else if (m_last_cmp > 0) {
+ if (txns && txns != HAM_KEY_ERASED_IN_TXN)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ }
+ /* couple to btree */
+ else {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ }
+ return (0);
+ }
+}
+
+ham_status_t
+Cursor::move_first_key(Context *context, uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ /* move to the very very first key */
+ st = move_first_key_singlestep(context);
+ if (st)
+ return (st);
+
+ /* check for duplicates. the dupecache was already updated in
+ * move_first_key_singlestep() */
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) {
+ /* are there any duplicates? if not then they were all erased and we
+ * move to the previous key */
+ if (!has_duplicates())
+ return (move_next_key(context, flags));
+
+ /* otherwise move to the first duplicate */
+ return (move_first_dupe(context));
+ }
+
+ /* no duplicates - make sure that we've not coupled to an erased
+ * item */
+ if (is_coupled_to_txnop()) {
+ if (__txn_cursor_is_erase(&m_txn_cursor))
+ return (move_next_key(context, flags));
+ else
+ return (0);
+ }
+ if (is_coupled_to_btree()) {
+ st = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st == HAM_KEY_ERASED_IN_TXN)
+ return (move_next_key(context, flags));
+ else if (st == 0) {
+ couple_to_txnop();
+ return (0);
+ }
+ else if (st == HAM_KEY_NOT_FOUND)
+ return (0);
+ else
+ return (st);
+ }
+ else
+ return (HAM_KEY_NOT_FOUND);
+}
+
+ham_status_t
+Cursor::move_last_key_singlestep(Context *context)
+{
+ ham_status_t btrs, txns;
+ BtreeCursor *btrc = get_btree_cursor();
+
+ /* fetch the largest key from the transaction tree. */
+ txns = m_txn_cursor.move(HAM_CURSOR_LAST);
+ /* fetch the largest key from the btree tree. */
+ btrs = btrc->move(context, 0, 0, 0, 0, HAM_CURSOR_LAST | HAM_SKIP_DUPLICATES);
+ /* now consolidate - if both trees are empty then return */
+ if (btrs == HAM_KEY_NOT_FOUND && txns == HAM_KEY_NOT_FOUND) {
+ return (HAM_KEY_NOT_FOUND);
+ }
+ /* if btree is empty but txn-tree is not: couple to txn */
+ else if (btrs == HAM_KEY_NOT_FOUND && txns != HAM_KEY_NOT_FOUND) {
+ if (txns == HAM_TXN_CONFLICT)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ return (0);
+ }
+ /* if txn-tree is empty but btree is not: couple to btree */
+ else if (txns == HAM_KEY_NOT_FOUND && btrs != HAM_KEY_NOT_FOUND) {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ return (0);
+ }
+ /* if both trees are not empty then compare them and couple to the
+ * greater one */
+ else {
+ ham_assert(btrs == 0 && (txns == 0
+ || txns == HAM_KEY_ERASED_IN_TXN
+ || txns == HAM_TXN_CONFLICT));
+ compare(context);
+
+ /* both keys are equal - couple to txn; it's chronologically
+ * newer */
+ if (m_last_cmp == 0) {
+ if (txns && txns != HAM_KEY_ERASED_IN_TXN)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kBtree | kTxn);
+ }
+ /* couple to txn */
+ else if (m_last_cmp < 1) {
+ if (txns && txns != HAM_KEY_ERASED_IN_TXN)
+ return (txns);
+ couple_to_txnop();
+ update_dupecache(context, kTxn);
+ }
+ /* couple to btree */
+ else {
+ couple_to_btree();
+ update_dupecache(context, kBtree);
+ }
+ return (0);
+ }
+}
+
+ham_status_t
+Cursor::move_last_key(Context *context, uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ /* move to the very very last key */
+ st = move_last_key_singlestep(context);
+ if (st)
+ return (st);
+
+ /* check for duplicates. the dupecache was already updated in
+ * move_last_key_singlestep() */
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) {
+ /* are there any duplicates? if not then they were all erased and we
+ * move to the previous key */
+ if (!has_duplicates())
+ return (move_previous_key(context, flags));
+
+ /* otherwise move to the last duplicate */
+ return (move_last_dupe(context));
+ }
+
+ /* no duplicates - make sure that we've not coupled to an erased
+ * item */
+ if (is_coupled_to_txnop()) {
+ if (__txn_cursor_is_erase(&m_txn_cursor))
+ return (move_previous_key(context, flags));
+ else
+ return (0);
+ }
+ if (is_coupled_to_btree()) {
+ st = check_if_btree_key_is_erased_or_overwritten(context);
+ if (st == HAM_KEY_ERASED_IN_TXN)
+ return (move_previous_key(context, flags));
+ else if (st == 0) {
+ couple_to_txnop();
+ return (0);
+ }
+ else if (st == HAM_KEY_NOT_FOUND)
+ return (0);
+ else
+ return (st);
+ }
+ else
+ return (HAM_KEY_NOT_FOUND);
+}
+
+ham_status_t
+Cursor::move(Context *context, ham_key_t *key, ham_record_t *record,
+ uint32_t flags)
+{
+ ham_status_t st = 0;
+ bool changed_dir = false;
+ BtreeCursor *btrc = get_btree_cursor();
+
+ /* no movement requested? directly retrieve key/record */
+ if (!flags)
+ goto retrieve_key_and_record;
+
+ /* synchronize the btree and transaction cursor if the last operation was
+ * not a move next/previous OR if the direction changed */
+ if ((m_lastop == HAM_CURSOR_PREVIOUS) && (flags & HAM_CURSOR_NEXT))
+ changed_dir = true;
+ else if ((m_lastop == HAM_CURSOR_NEXT) && (flags & HAM_CURSOR_PREVIOUS))
+ changed_dir = true;
+ if (((flags & HAM_CURSOR_NEXT) || (flags & HAM_CURSOR_PREVIOUS))
+ && (m_lastop == Cursor::kLookupOrInsert
+ || changed_dir)) {
+ if (is_coupled_to_txnop())
+ set_to_nil(kBtree);
+ else
+ set_to_nil(kTxn);
+ (void)sync(context, flags, 0);
+
+ if (!m_txn_cursor.is_nil() && !is_nil(kBtree))
+ compare(context);
+ }
+
+ /* we have either skipped duplicates or reached the end of the duplicate
+ * list. btree cursor and txn cursor are synced and as close to
+ * each other as possible. Move the cursor in the requested direction. */
+ if (flags & HAM_CURSOR_NEXT) {
+ st = move_next_key(context, flags);
+ }
+ else if (flags & HAM_CURSOR_PREVIOUS) {
+ st = move_previous_key(context, flags);
+ }
+ else if (flags & HAM_CURSOR_FIRST) {
+ clear_dupecache();
+ st = move_first_key(context, flags);
+ }
+ else {
+ ham_assert(flags & HAM_CURSOR_LAST);
+ clear_dupecache();
+ st = move_last_key(context, flags);
+ }
+
+ if (st)
+ return (st);
+
+retrieve_key_and_record:
+ /* retrieve key/record, if requested */
+ if (st == 0) {
+ if (is_coupled_to_txnop()) {
+#ifdef HAM_DEBUG
+ TransactionOperation *op = m_txn_cursor.get_coupled_op();
+ ham_assert(!(op->get_flags() & TransactionOperation::kErase));
+#endif
+ try {
+ if (key)
+ m_txn_cursor.copy_coupled_key(key);
+ if (record)
+ m_txn_cursor.copy_coupled_record(record);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ }
+ else {
+ st = btrc->move(context, key, &get_db()->key_arena(get_txn()),
+ record, &get_db()->record_arena(get_txn()), 0);
+ }
+ }
+
+ return (st);
+}
+
+bool
+Cursor::is_nil(int what)
+{
+ switch (what) {
+ case kBtree:
+ return (m_btree_cursor.get_state() == BtreeCursor::kStateNil);
+ case kTxn:
+ return (m_txn_cursor.is_nil());
+ default:
+ ham_assert(what == 0);
+ return (m_btree_cursor.get_state() == BtreeCursor::kStateNil
+ && m_txn_cursor.is_nil());
+ }
+}
+
+void
+Cursor::set_to_nil(int what)
+{
+ switch (what) {
+ case kBtree:
+ m_btree_cursor.set_to_nil();
+ break;
+ case kTxn:
+ m_txn_cursor.set_to_nil();
+ couple_to_btree(); /* reset flag */
+ break;
+ default:
+ ham_assert(what == 0);
+ m_btree_cursor.set_to_nil();
+ m_txn_cursor.set_to_nil();
+ couple_to_btree(); /* reset flag */
+ m_is_first_use = true;
+ break;
+ }
+}
+
+uint32_t
+Cursor::get_record_count(Context *context, uint32_t flags)
+{
+ if (is_nil())
+ throw Exception(HAM_CURSOR_IS_NIL);
+
+ if (m_txn || is_coupled_to_txnop()) {
+ if (m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS) {
+ bool dummy;
+ sync(context, 0, &dummy);
+ update_dupecache(context, kTxn | kBtree);
+ return (m_dupecache.get_count());
+ }
+ else {
+ /* obviously the key exists, since the cursor is coupled */
+ return (1);
+ }
+ }
+
+ return (m_btree_cursor.get_record_count(context, flags));
+}
+
+uint64_t
+Cursor::get_record_size(Context *context)
+{
+ if (is_nil())
+ return (HAM_CURSOR_IS_NIL);
+
+ if (is_coupled_to_txnop())
+ return (m_txn_cursor.get_record_size());
+ else
+ return (m_btree_cursor.get_record_size(context));
+}
+
+uint32_t
+Cursor::get_duplicate_position()
+{
+ if (is_nil())
+ throw Exception(HAM_CURSOR_IS_NIL);
+
+ // use btree cursor?
+ if (m_txn_cursor.is_nil())
+ return (m_btree_cursor.get_duplicate_index());
+
+ // otherwise return the index in the duplicate cache
+ return (get_dupecache_index() - 1);
+}
+
+ham_status_t
+Cursor::overwrite(Context *context, Transaction *htxn,
+ ham_record_t *record, uint32_t flags)
+{
+ ham_status_t st = 0;
+ LocalTransaction *txn = dynamic_cast<LocalTransaction *>(htxn);
+ ham_assert(context->txn == txn);
+
+ /*
+ * if we're in transactional mode then just append an "insert/OW" operation
+ * to the txn-tree.
+ *
+ * if the txn_cursor is already coupled to a txn-op, then we can use
+ * txn_cursor_overwrite(). Otherwise we have to call db_insert_txn().
+ *
+ * If transactions are disabled then overwrite the item in the btree.
+ */
+ if (txn) {
+ if (m_txn_cursor.is_nil() && !(is_nil(0))) {
+ m_btree_cursor.uncouple_from_page(context);
+ st = m_db->insert_txn(context,
+ m_btree_cursor.get_uncoupled_key(),
+ record, flags | HAM_OVERWRITE, get_txn_cursor());
+ }
+ else {
+ // TODO also calls db->insert_txn()
+ st = m_txn_cursor.overwrite(context, txn, record);
+ }
+
+ if (st == 0)
+ couple_to_txnop();
+ }
+ else {
+ m_btree_cursor.overwrite(context, record, flags);
+ couple_to_btree();
+ }
+
+ return (st);
+}
+
+void
+Cursor::close()
+{
+ m_btree_cursor.close();
+ m_dupecache.clear();
+}
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.h b/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.h
new file mode 100644
index 0000000000..0adf400ab3
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4cursor/cursor.h
@@ -0,0 +1,555 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A Cursor is an object which is used to traverse a Database.
+ *
+ * A Cursor structure is separated into 3 components:
+ * 1. The btree cursor
+ * This cursor can traverse btrees. It is described and implemented
+ * in btree_cursor.h.
+ * 2. The txn cursor
+ * This cursor can traverse txn-trees. It is described and implemented
+ * in txn_cursor.h.
+ * 3. The upper layer
+ * This layer acts as a kind of dispatcher for both cursors. If
+ * Transactions are used, then it also uses a duplicate cache for
+ * consolidating the duplicate keys from both cursors. This layer is
+ * described and implemented in cursor.h (this file).
+ *
+ * A Cursor can have several states. It can be
+ * 1. NIL (not in list) - this is the default state, meaning that the Cursor
+ * does not point to any key. If the Cursor was initialized, then it's
+ * "NIL". If the Cursor was erased (i.e. with ham_cursor_erase) then it's
+ * also "NIL".
+ *
+ * relevant functions:
+ * Cursor::is_nil
+ * Cursor::set_to_nil
+ *
+ * 2. Coupled to the txn-cursor - meaning that the Cursor points to a key
+ * that is modified in a Transaction. Technically, the txn-cursor points
+ * to a TransactionOperation structure.
+ *
+ * relevant functions:
+ * Cursor::is_coupled_to_txnop
+ * Cursor::couple_to_txnop
+ *
+ * 3. Coupled to the btree-cursor - meaning that the Cursor points to a key
+ * that is stored in a Btree. A Btree cursor itself can then be coupled
+ * (it directly points to a page in the cache) or uncoupled, meaning that
+ * the page was purged from the cache and has to be fetched from disk when
+ * the Cursor is used again. This is described in btree_cursor.h.
+ *
+ * relevant functions:
+ * Cursor::is_coupled_to_btree
+ * Cursor::couple_to_btree
+ *
+ * The dupecache is used when information from the btree and the txn-tree
+ * is merged. The btree cursor has its private dupecache. The dupecache
+ * increases performance (and complexity).
+ *
+ * The cursor interface is used in db_local.cc. Many of the functions use
+ * a high-level cursor interface (i.e. @ref cursor_create, @ref cursor_clone)
+ * while some directly use the low-level interfaces of btree_cursor.h and
+ * txn_cursor.h. Over time i will clean this up, trying to maintain a clear
+ * separation of the 3 layers, and only accessing the top-level layer in
+ * cursor.h. This is work in progress.
+ *
+ * In order to speed up Cursor::move() we keep track of the last compare
+ * between the two cursors. i.e. if the btree cursor is currently pointing to
+ * a larger key than the txn-cursor, the 'lastcmp' field is <0 etc.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_CURSORS_H
+#define HAM_CURSORS_H
+
+#include "0root/root.h"
+
+#include <vector>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "4txn/txn_cursor.h"
+#include "3btree/btree_cursor.h"
+#include "3blob_manager/blob_manager.h"
+#include "4db/db_local.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+// A helper structure; ham_cursor_t is declared in ham/hamsterdb.h as an
+// opaque C structure, but internally we use a C++ class. The ham_cursor_t
+// struct satisfies the C compiler, and internally we just cast the pointers.
+struct ham_cursor_t
+{
+ bool _dummy;
+};
+
+namespace hamsterdb {
+
+struct Context;
+
+// A single line in the dupecache structure - can reference a btree
+// record or a txn-op
+class DupeCacheLine
+{
+ public:
+ DupeCacheLine(bool use_btree = true, uint64_t btree_dupeidx = 0)
+ : m_btree_dupeidx(btree_dupeidx), m_op(0), m_use_btree(use_btree) {
+ ham_assert(use_btree == true);
+ }
+
+ DupeCacheLine(bool use_btree, TransactionOperation *op)
+ : m_btree_dupeidx(0), m_op(op), m_use_btree(use_btree) {
+ ham_assert(use_btree == false);
+ }
+
+ // Returns true if this cache entry is a duplicate in the btree index
+ // (otherwise it's a duplicate in the transaction index)
+ bool use_btree() const {
+ return (m_use_btree);
+ }
+
+ // Returns the btree duplicate index
+ uint64_t get_btree_dupe_idx() {
+ ham_assert(m_use_btree == true);
+ return (m_btree_dupeidx);
+ }
+
+ // Sets the btree duplicate index
+ void set_btree_dupe_idx(uint64_t idx) {
+ m_use_btree = true;
+ m_btree_dupeidx = idx;
+ m_op = 0;
+ }
+
+ // Returns the txn-op duplicate
+ TransactionOperation *get_txn_op() {
+ ham_assert(m_use_btree == false);
+ return (m_op);
+ }
+
+ // Sets the txn-op duplicate
+ void set_txn_op(TransactionOperation *op) {
+ m_use_btree = false;
+ m_op = op;
+ m_btree_dupeidx = 0;
+ }
+
+ private:
+ // The btree duplicate index (of the original btree dupe table)
+ uint64_t m_btree_dupeidx;
+
+ // The txn op structure that we refer to
+ TransactionOperation *m_op;
+
+ // using btree or txn duplicates?
+ bool m_use_btree;
+};
+
+//
+// The dupecache is a cache for duplicate keys
+//
+class DupeCache {
+ public:
+ // default constructor - creates an empty dupecache with room for 8
+ // duplicates
+ DupeCache() {
+ m_elements.reserve(8);
+ }
+
+ // Returns the number of elements in the cache
+ uint32_t get_count() const {
+ return ((uint32_t)m_elements.size());
+ }
+
+ // Returns an element from the cache
+ DupeCacheLine *get_element(unsigned idx) {
+ return (&m_elements[idx]);
+ }
+
+ // Returns a pointer to the first element from the cache
+ DupeCacheLine *get_first_element() {
+ return (&m_elements[0]);
+ }
+
+ // Clones this dupe-cache into 'other'
+ void clone(DupeCache *other) {
+ other->m_elements = m_elements;
+ }
+
+ // Inserts a new item somewhere in the cache; resizes the
+ // cache if necessary
+ void insert(unsigned position, const DupeCacheLine &dcl) {
+ m_elements.insert(m_elements.begin() + position, dcl);
+ }
+
+ // Append an element to the dupecache
+ void append(const DupeCacheLine &dcl) {
+ m_elements.push_back(dcl);
+ }
+
+ // Erases an item
+ void erase(uint32_t position) {
+ m_elements.erase(m_elements.begin() + position);
+ }
+
+ // Clears the cache; frees all resources
+ void clear() {
+ m_elements.resize(0);
+ }
+
+ private:
+ // The cached elements
+ std::vector<DupeCacheLine> m_elements;
+};
+
+
+//
+// the Database Cursor
+//
+class Cursor
+{
+ public:
+ // The flags have ranges:
+ // 0 - 0x1000000-1: btree_cursor
+ // > 0x1000000: cursor
+ enum {
+ // Flags for set_to_nil, is_nil
+ kBoth = 0,
+ kBtree = 1,
+ kTxn = 2,
+
+ // Flag for sync(): do not use approx matching if the key
+ // is not available
+ kSyncOnlyEqualKeys = 0x200000,
+
+ // Flag for sync(): do not load the key if there's an approx.
+ // match. Only positions the cursor.
+ kSyncDontLoadKey = 0x100000,
+
+ // Cursor flag: cursor is coupled to the txn-cursor
+ kCoupledToTxn = 0x1000000,
+
+ // Flag for set_lastop()
+ kLookupOrInsert = 0x10000
+ };
+
+ public:
+ // Constructor; retrieves pointer to db and txn, initializes all members
+ Cursor(LocalDatabase *db, Transaction *txn = 0, uint32_t flags = 0);
+
+ // Copy constructor; used for cloning a Cursor
+ Cursor(Cursor &other);
+
+ // Destructor; sets cursor to nil
+ ~Cursor() {
+ set_to_nil();
+ }
+
+ // Returns the Database
+ LocalDatabase *get_db() {
+ return (m_db);
+ }
+
+ // Returns the Transaction handle
+ Transaction *get_txn() {
+ return (m_txn);
+ }
+
+ // Sets the Transaction handle; often used to assign a temporary
+ // Transaction to this cursor
+ void set_txn(Transaction *txn) {
+ m_txn = txn;
+ }
+
+ // Sets the cursor to nil
+ void set_to_nil(int what = kBoth);
+
+ // Returns true if a cursor is nil (Not In List - does not point to any
+ // key)
+ // |what| is one of the flags kBoth, kTxn, kBtree
+ bool is_nil(int what = kBoth);
+
+ // Couples the cursor to the btree key
+ void couple_to_btree() {
+ m_flags &= ~kCoupledToTxn;
+ }
+
+ // Returns true if a cursor is coupled to the btree
+ bool is_coupled_to_btree() const {
+ return (!(m_flags & kCoupledToTxn));
+ }
+
+ // Couples the cursor to the txn-op
+ void couple_to_txnop() {
+ m_flags |= kCoupledToTxn;
+ }
+
+ // Returns true if a cursor is coupled to a txn-op
+ bool is_coupled_to_txnop() const {
+ return ((m_flags & kCoupledToTxn) ? true : false);
+ }
+
+ // Retrieves the number of duplicates of the current key
+ uint32_t get_record_count(Context *context, uint32_t flags);
+
+ // Retrieves the duplicate position of a cursor
+ uint32_t get_duplicate_position();
+
+ // Retrieves the size of the current record
+ uint64_t get_record_size(Context *context);
+
+ // Overwrites the record of the current key
+ //
+ // The Transaction is passed as a separate pointer since it might be a
+ // local/temporary Transaction that was created only for this single
+ // operation.
+ ham_status_t overwrite(Context *context, Transaction *txn,
+ ham_record_t *record, uint32_t flags);
+
+ // Moves a Cursor (ham_cursor_move)
+ ham_status_t move(Context *context, ham_key_t *key, ham_record_t *record,
+ uint32_t flags);
+
+ // Closes an existing cursor (ham_cursor_close)
+ void close();
+
+ // Updates (or builds) the dupecache for a cursor
+ //
+ // The |what| parameter specifies if the dupecache is initialized from
+ // btree (kBtree), from txn (kTxn) or both.
+ void update_dupecache(Context *context, uint32_t what);
+
+ // Appends the duplicates of the BtreeCursor to the duplicate cache.
+ void append_btree_duplicates(Context *context, BtreeCursor *btc,
+ DupeCache *dc);
+
+ // Clears the dupecache and disconnect the Cursor from any duplicate key
+ void clear_dupecache() {
+ m_dupecache.clear();
+ set_dupecache_index(0);
+ }
+
+ // Couples the cursor to a duplicate in the dupe table
+ // dupe_id is a 1 based index!!
+ void couple_to_dupe(uint32_t dupe_id);
+
+ // Synchronizes txn- and btree-cursor
+ //
+ // If txn-cursor is nil then try to move the txn-cursor to the same key
+ // as the btree cursor.
+ // If btree-cursor is nil then try to move the btree-cursor to the same key
+ // as the txn cursor.
+ // If both are nil, or both are valid, then nothing happens
+ //
+ // |equal_key| is set to true if the keys in both cursors are equal.
+ void sync(Context *context, uint32_t flags, bool *equal_keys);
+
+ // Returns the number of duplicates in the duplicate cache
+ // The duplicate cache is updated if necessary
+ uint32_t get_dupecache_count(Context *context) {
+ if (!(m_db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS))
+ return (0);
+
+ TransactionCursor *txnc = get_txn_cursor();
+ if (txnc->get_coupled_op())
+ update_dupecache(context, kBtree | kTxn);
+ else
+ update_dupecache(context, kBtree);
+ return (m_dupecache.get_count());
+ }
+
+ // Get the 'next' Cursor in this Database
+ Cursor *get_next() {
+ return (m_next);
+ }
+
+ // Set the 'next' Cursor in this Database
+ void set_next(Cursor *next) {
+ m_next = next;
+ }
+
+ // Get the 'previous' Cursor in this Database
+ Cursor *get_previous() {
+ return (m_previous);
+ }
+
+ // Set the 'previous' Cursor in this Database
+ void set_previous(Cursor *previous) {
+ m_previous = previous;
+ }
+
+ // Returns the Transaction cursor
+ // TODO required?
+ TransactionCursor *get_txn_cursor() {
+ return (&m_txn_cursor);
+ }
+
+ // Returns the Btree cursor
+ // TODO required?
+ BtreeCursor *get_btree_cursor() {
+ return (&m_btree_cursor);
+ }
+
+ // Returns the remote Cursor handle
+ uint64_t get_remote_handle() {
+ return (m_remote_handle);
+ }
+
+ // Returns the remote Cursor handle
+ void set_remote_handle(uint64_t handle) {
+ m_remote_handle = handle;
+ }
+
+ // Returns a pointer to the duplicate cache
+ // TODO really required?
+ DupeCache *get_dupecache() {
+ return (&m_dupecache);
+ }
+
+ // Returns a pointer to the duplicate cache
+ // TODO really required?
+ const DupeCache *get_dupecache() const {
+ return (&m_dupecache);
+ }
+
+ // Returns the current index in the dupe cache
+ uint32_t get_dupecache_index() const {
+ return (m_dupecache_index);
+ }
+
+ // Sets the current index in the dupe cache
+ void set_dupecache_index(uint32_t index) {
+ m_dupecache_index = index;
+ }
+
+ // Returns true if this cursor was never used before
+ // TODO this is identical to is_nil()??
+ bool is_first_use() const {
+ return (m_is_first_use);
+ }
+
+ // Stores the current operation; needed for ham_cursor_move
+ // TODO should be private
+ void set_lastop(uint32_t lastop) {
+ m_lastop = lastop;
+ m_is_first_use = false;
+ }
+
+ private:
+ // Checks if a btree cursor points to a key that was overwritten or erased
+ // in the txn-cursor
+ //
+ // This is needed when moving the cursor backwards/forwards
+ // and consolidating the btree and the txn-tree
+ ham_status_t check_if_btree_key_is_erased_or_overwritten(Context *context);
+
+ // Compares btree and txn-cursor; stores result in lastcmp
+ int compare(Context *context);
+
+ // Returns true if this key has duplicates
+ bool has_duplicates() const {
+ return (m_dupecache.get_count() > 0);
+ }
+
+ // Moves cursor to the first duplicate
+ ham_status_t move_first_dupe(Context *context);
+
+ // Moves cursor to the last duplicate
+ ham_status_t move_last_dupe(Context *context);
+
+ // Moves cursor to the next duplicate
+ ham_status_t move_next_dupe(Context *context);
+
+ // Moves cursor to the previous duplicate
+ ham_status_t move_previous_dupe(Context *context);
+
+ // Moves cursor to the first key
+ ham_status_t move_first_key(Context *context, uint32_t flags);
+
+ // Moves cursor to the last key
+ ham_status_t move_last_key(Context *context, uint32_t flags);
+
+ // Moves cursor to the next key
+ ham_status_t move_next_key(Context *context, uint32_t flags);
+
+ // Moves cursor to the previous key
+ ham_status_t move_previous_key(Context *context, uint32_t flags);
+
+ // Moves cursor to the first key - helper function
+ ham_status_t move_first_key_singlestep(Context *context);
+
+ // Moves cursor to the last key - helper function
+ ham_status_t move_last_key_singlestep(Context *context);
+
+ // Moves cursor to the next key - helper function
+ ham_status_t move_next_key_singlestep(Context *context);
+
+ // Moves cursor to the previous key - helper function
+ ham_status_t move_previous_key_singlestep(Context *context);
+
+ // Pointer to the Database object
+ LocalDatabase *m_db;
+
+ // Pointer to the Transaction
+ Transaction *m_txn;
+
+ // A Cursor which can walk over Transaction trees
+ TransactionCursor m_txn_cursor;
+
+ // A Cursor which can walk over B+trees
+ BtreeCursor m_btree_cursor;
+
+ // The remote database handle
+ uint64_t m_remote_handle;
+
+ // Linked list of all Cursors in this Database
+ Cursor *m_next, *m_previous;
+
+ // A cache for all duplicates of the current key. needed for
+ // ham_cursor_move, ham_find and other functions. The cache is
+ // used to consolidate all duplicates of btree and txn.
+ DupeCache m_dupecache;
+
+ /** The current position of the cursor in the cache. This is a
+ * 1-based index. 0 means that the cache is not in use. */
+ uint32_t m_dupecache_index;
+
+ // The last operation (insert/find or move); needed for
+ // ham_cursor_move. Values can be HAM_CURSOR_NEXT,
+ // HAM_CURSOR_PREVIOUS or CURSOR_LOOKUP_INSERT
+ uint32_t m_lastop;
+
+ // The result of the last compare operation
+ int m_last_cmp;
+
+ // Cursor flags
+ uint32_t m_flags;
+
+ // true if this cursor was never used
+ bool m_is_first_use;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_CURSORS_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db.cc b/plugins/Dbx_kv/src/hamsterdb/src/4db/db.cc
new file mode 100644
index 0000000000..7d6cd82929
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db.cc
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4db/db.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+Database::Database(Environment *env, DatabaseConfiguration &config)
+ : m_env(env), m_config(config), m_error(0), m_context(0), m_cursor_list(0)
+{
+}
+
+ham_status_t
+Database::cursor_create(Cursor **pcursor, Transaction *txn, uint32_t flags)
+{
+ try {
+ Cursor *cursor = cursor_create_impl(txn, flags);
+
+ /* fix the linked list of cursors */
+ cursor->set_next(m_cursor_list);
+ if (m_cursor_list)
+ m_cursor_list->set_previous(cursor);
+ m_cursor_list = cursor;
+
+ if (txn)
+ txn->increase_cursor_refcount();
+
+ *pcursor = cursor;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Database::cursor_clone(Cursor **pdest, Cursor *src)
+{
+ try {
+ Cursor *dest = cursor_clone_impl(src);
+
+ // fix the linked list of cursors
+ dest->set_previous(0);
+ dest->set_next(m_cursor_list);
+ ham_assert(m_cursor_list != 0);
+ m_cursor_list->set_previous(dest);
+ m_cursor_list = dest;
+
+ // initialize the remaining fields
+ if (src->get_txn())
+ src->get_txn()->increase_cursor_refcount();
+
+ *pdest = dest;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Database::cursor_close(Cursor *cursor)
+{
+ try {
+ Cursor *p, *n;
+
+ // first close the cursor
+ cursor_close_impl(cursor);
+
+ // decrease the transaction refcount; the refcount specifies how many
+ // cursors are attached to the transaction
+ if (cursor->get_txn())
+ cursor->get_txn()->decrease_cursor_refcount();
+
+ // fix the linked list of cursors
+ p = cursor->get_previous();
+ n = cursor->get_next();
+
+ if (p)
+ p->set_next(n);
+ else
+ m_cursor_list = n;
+
+ if (n)
+ n->set_previous(p);
+
+ cursor->set_next(0);
+ cursor->set_previous(0);
+
+ delete cursor;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+// No need to catch Exceptions - they're caught in Environment::close_db
+ham_status_t
+Database::close(uint32_t flags)
+{
+ // auto-cleanup cursors?
+ if (flags & HAM_AUTO_CLEANUP) {
+ Cursor *cursor;
+ while ((cursor = m_cursor_list))
+ cursor_close(cursor);
+ }
+ else if (m_cursor_list) {
+ ham_trace(("cannot close Database if Cursors are still open"));
+ return (set_error(HAM_CURSOR_STILL_OPEN));
+ }
+
+ // the derived classes can now do the bulk of the work
+ ham_status_t st = close_impl(flags);
+ if (st)
+ return (set_error(st));
+
+ m_env = 0;
+ return (0);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db.h b/plugins/Dbx_kv/src/hamsterdb/src/4db/db.h
new file mode 100644
index 0000000000..0290cc86b0
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: no
+ */
+
+#ifndef HAM_DB_H
+#define HAM_DB_H
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb_int.h"
+#include "ham/hamsterdb_ola.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "2config/db_config.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+// A helper structure; ham_db_t is declared in ham/hamsterdb.h as an
+// opaque C structure, but internally we use a C++ class. The ham_db_t
+// struct satisfies the C compiler, and internally we just cast the pointers.
+struct ham_db_t {
+ int dummy;
+};
+
+namespace hamsterdb {
+
+class Cursor;
+struct ScanVisitor;
+
+/*
+ * An abstract base class for a Database; is overwritten for local and
+ * remote implementations
+ */
+class Database
+{
+ public:
+ // Constructor
+ Database(Environment *env, DatabaseConfiguration &config);
+
+ virtual ~Database() {
+ }
+
+ // Returns the Environment pointer
+ Environment *get_env() {
+ return (m_env);
+ }
+
+ // Returns the Database's configuration
+ const DatabaseConfiguration &config() const {
+ return (m_config);
+ }
+
+ // Returns the runtime-flags - the flags are "mixed" with the flags from
+ // the Environment
+ uint32_t get_flags() {
+ return (m_env->get_flags() | m_config.flags);
+ }
+
+ // Returns the database name
+ uint16_t name() const {
+ return (m_config.db_name);
+ }
+
+ // Sets the database name
+ void set_name(uint16_t name) {
+ m_config.db_name = name;
+ }
+
+ // Fills in the current metrics
+ virtual void fill_metrics(ham_env_metrics_t *metrics) = 0;
+
+ // Returns Database parameters (ham_db_get_parameters)
+ virtual ham_status_t get_parameters(ham_parameter_t *param) = 0;
+
+ // Checks Database integrity (ham_db_check_integrity)
+ virtual ham_status_t check_integrity(uint32_t flags) = 0;
+
+ // Returns the number of keys (ham_db_get_key_count)
+ virtual ham_status_t count(Transaction *txn, bool distinct,
+ uint64_t *pcount) = 0;
+
+ // Scans the whole database, applies a processor function
+ virtual ham_status_t scan(Transaction *txn, ScanVisitor *visitor,
+ bool distinct) = 0;
+
+ // Inserts a key/value pair (ham_db_insert, ham_cursor_insert)
+ virtual ham_status_t insert(Cursor *cursor, Transaction *txn,
+ ham_key_t *key, ham_record_t *record, uint32_t flags) = 0;
+
+ // Erase a key/value pair (ham_db_erase, ham_cursor_erase)
+ virtual ham_status_t erase(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ uint32_t flags) = 0;
+
+ // Lookup of a key/value pair (ham_db_find, ham_cursor_find)
+ virtual ham_status_t find(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags) = 0;
+
+ // Creates a cursor (ham_cursor_create)
+ virtual ham_status_t cursor_create(Cursor **pcursor, Transaction *txn,
+ uint32_t flags);
+
+ // Clones a cursor (ham_cursor_clone)
+ virtual ham_status_t cursor_clone(Cursor **pdest, Cursor *src);
+
+ // Returns number of duplicates (ham_cursor_get_record_count)
+ virtual ham_status_t cursor_get_record_count(Cursor *cursor,
+ uint32_t flags, uint32_t *pcount) = 0;
+
+ // Returns position in duplicate list (ham_cursor_get_duplicate_position)
+ virtual ham_status_t cursor_get_duplicate_position(Cursor *cursor,
+ uint32_t *pposition) = 0;
+
+ // Get current record size (ham_cursor_get_record_size)
+ virtual ham_status_t cursor_get_record_size(Cursor *cursor,
+ uint64_t *psize) = 0;
+
+ // Overwrites the record of a cursor (ham_cursor_overwrite)
+ virtual ham_status_t cursor_overwrite(Cursor *cursor,
+ ham_record_t *record, uint32_t flags) = 0;
+
+ // Moves a cursor, returns key and/or record (ham_cursor_move)
+ virtual ham_status_t cursor_move(Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags) = 0;
+
+ // Closes a cursor (ham_cursor_close)
+ ham_status_t cursor_close(Cursor *cursor);
+
+ // Closes the Database (ham_db_close)
+ ham_status_t close(uint32_t flags);
+
+ // Returns the last error code
+ ham_status_t get_error() const {
+ return (m_error);
+ }
+
+ // Sets the last error code
+ ham_status_t set_error(ham_status_t e) {
+ return ((m_error = e));
+ }
+
+ // Returns the user-provided context pointer (ham_get_context_data)
+ void *get_context_data() {
+ return (m_context);
+ }
+
+ // Sets the user-provided context pointer (ham_set_context_data)
+ void set_context_data(void *ctxt) {
+ m_context = ctxt;
+ }
+
+ // Returns the head of the linked list with all cursors
+ Cursor *cursor_list() {
+ return (m_cursor_list);
+ }
+
+ // Returns the memory buffer for the key data: the per-database buffer
+ // if |txn| is null or temporary, otherwise the buffer from the |txn|
+ ByteArray &key_arena(Transaction *txn) {
+ return ((txn == 0 || (txn->get_flags() & HAM_TXN_TEMPORARY))
+ ? m_key_arena
+ : txn->key_arena());
+ }
+
+ // Returns the memory buffer for the record data: the per-database buffer
+ // if |txn| is null or temporary, otherwise the buffer from the |txn|
+ ByteArray &record_arena(Transaction *txn) {
+ return ((txn == 0 || (txn->get_flags() & HAM_TXN_TEMPORARY))
+ ? m_record_arena
+ : txn->record_arena());
+ }
+
+ protected:
+ // Creates a cursor; this is the actual implementation
+ virtual Cursor *cursor_create_impl(Transaction *txn, uint32_t flags) = 0;
+
+ // Clones a cursor; this is the actual implementation
+ virtual Cursor *cursor_clone_impl(Cursor *src) = 0;
+
+ // Closes a cursor; this is the actual implementation
+ virtual void cursor_close_impl(Cursor *c) = 0;
+
+ // Closes a database; this is the actual implementation
+ virtual ham_status_t close_impl(uint32_t flags) = 0;
+
+ // the current Environment
+ Environment *m_env;
+
+ // the configuration settings
+ DatabaseConfiguration m_config;
+
+ // the last error code
+ ham_status_t m_error;
+
+ // the user-provided context data
+ void *m_context;
+
+ // linked list of all cursors
+ Cursor *m_cursor_list;
+
+ // This is where key->data points to when returning a
+ // key to the user; used if Transactions are disabled
+ ByteArray m_key_arena;
+
+ // This is where record->data points to when returning a
+ // record to the user; used if Transactions are disabled
+ ByteArray m_record_arena;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DB_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.cc b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.cc
new file mode 100644
index 0000000000..849eb4e7aa
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.cc
@@ -0,0 +1,1776 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <boost/scope_exit.hpp>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1mem/mem.h"
+#include "1os/os.h"
+#include "2page/page.h"
+#include "2device/device.h"
+#include "3page_manager/page_manager.h"
+#include "3journal/journal.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_index_factory.h"
+#include "3btree/btree_cursor.h"
+#include "3btree/btree_stats.h"
+#include "4db/db_local.h"
+#include "4context/context.h"
+#include "4cursor/cursor.h"
+#include "4txn/txn_local.h"
+#include "4txn/txn_cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+ham_status_t
+LocalDatabase::check_insert_conflicts(Context *context, TransactionNode *node,
+ ham_key_t *key, uint32_t flags)
+{
+ TransactionOperation *op = 0;
+
+ /*
+ * pick the tree_node of this key, and walk through each operation
+ * in reverse chronological order (from newest to oldest):
+ * - is this op part of an aborted txn? then skip it
+ * - is this op part of a committed txn? then look at the
+ * operation in detail
+ * - is this op part of an txn which is still active? return an error
+ * because we've found a conflict
+ * - if a committed txn has erased the item then there's no need
+ * to continue checking older, committed txns
+ */
+ op = node->get_newest_op();
+ while (op) {
+ LocalTransaction *optxn = op->get_txn();
+ if (optxn->is_aborted())
+ ; /* nop */
+ else if (optxn->is_committed() || context->txn == optxn) {
+ /* if key was erased then it doesn't exist and can be
+ * inserted without problems */
+ if (op->get_flags() & TransactionOperation::kIsFlushed)
+ ; /* nop */
+ else if (op->get_flags() & TransactionOperation::kErase)
+ return (0);
+ /* if the key already exists then we can only continue if
+ * we're allowed to overwrite it or to insert a duplicate */
+ else if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)
+ || (op->get_flags() & TransactionOperation::kInsertDuplicate)) {
+ if ((flags & HAM_OVERWRITE) || (flags & HAM_DUPLICATE))
+ return (0);
+ else
+ return (HAM_DUPLICATE_KEY);
+ }
+ else if (!(op->get_flags() & TransactionOperation::kNop)) {
+ ham_assert(!"shouldn't be here");
+ return (HAM_DUPLICATE_KEY);
+ }
+ }
+ else { /* txn is still active */
+ return (HAM_TXN_CONFLICT);
+ }
+
+ op = op->get_previous_in_node();
+ }
+
+ /*
+ * we've successfully checked all un-flushed transactions and there
+ * were no conflicts. Now check all transactions which are already
+ * flushed - basically that's identical to a btree lookup.
+ *
+ * however we can skip this check if we do not care about duplicates.
+ */
+ if ((flags & HAM_OVERWRITE)
+ || (flags & HAM_DUPLICATE)
+ || (get_flags() & (HAM_RECORD_NUMBER32 | HAM_RECORD_NUMBER64)))
+ return (0);
+
+ ham_status_t st = m_btree_index->find(context, 0, key, 0, 0, 0, flags);
+ switch (st) {
+ case HAM_KEY_NOT_FOUND:
+ return (0);
+ case HAM_SUCCESS:
+ return (HAM_DUPLICATE_KEY);
+ default:
+ return (st);
+ }
+}
+
+ham_status_t
+LocalDatabase::check_erase_conflicts(Context *context, TransactionNode *node,
+ ham_key_t *key, uint32_t flags)
+{
+ TransactionOperation *op = 0;
+
+ /*
+ * pick the tree_node of this key, and walk through each operation
+ * in reverse chronological order (from newest to oldest):
+ * - is this op part of an aborted txn? then skip it
+ * - is this op part of a committed txn? then look at the
+ * operation in detail
+ * - is this op part of an txn which is still active? return an error
+ * because we've found a conflict
+ * - if a committed txn has erased the item then there's no need
+ * to continue checking older, committed txns
+ */
+ op = node->get_newest_op();
+ while (op) {
+ Transaction *optxn = op->get_txn();
+ if (optxn->is_aborted())
+ ; /* nop */
+ else if (optxn->is_committed() || context->txn == optxn) {
+ if (op->get_flags() & TransactionOperation::kIsFlushed)
+ ; /* nop */
+ /* if key was erased then it doesn't exist and we fail with
+ * an error */
+ else if (op->get_flags() & TransactionOperation::kErase)
+ return (HAM_KEY_NOT_FOUND);
+ /* if the key exists then we're successful */
+ else if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)
+ || (op->get_flags() & TransactionOperation::kInsertDuplicate)) {
+ return (0);
+ }
+ else if (!(op->get_flags() & TransactionOperation::kNop)) {
+ ham_assert(!"shouldn't be here");
+ return (HAM_KEY_NOT_FOUND);
+ }
+ }
+ else { /* txn is still active */
+ return (HAM_TXN_CONFLICT);
+ }
+
+ op = op->get_previous_in_node();
+ }
+
+ /*
+ * we've successfully checked all un-flushed transactions and there
+ * were no conflicts. Now check all transactions which are already
+ * flushed - basically that's identical to a btree lookup.
+ */
+ return (m_btree_index->find(context, 0, key, 0, 0, 0, flags));
+}
+
+ham_status_t
+LocalDatabase::insert_txn(Context *context, ham_key_t *key,
+ ham_record_t *record, uint32_t flags, TransactionCursor *cursor)
+{
+ ham_status_t st = 0;
+ TransactionOperation *op;
+ bool node_created = false;
+
+ /* get (or create) the node for this key */
+ TransactionNode *node = m_txn_index->get(key, 0);
+ if (!node) {
+ node = new TransactionNode(this, key);
+ node_created = true;
+ // TODO only store when the operation is successful?
+ m_txn_index->store(node);
+ }
+
+ // check for conflicts of this key
+ //
+ // !!
+ // afterwards, clear the changeset; check_insert_conflicts()
+ // checks if a key already exists, and this fills the changeset
+ st = check_insert_conflicts(context, node, key, flags);
+ if (st) {
+ if (node_created) {
+ m_txn_index->remove(node);
+ delete node;
+ }
+ return (st);
+ }
+
+ // append a new operation to this node
+ op = node->append(context->txn, flags,
+ (flags & HAM_PARTIAL) |
+ ((flags & HAM_DUPLICATE)
+ ? TransactionOperation::kInsertDuplicate
+ : (flags & HAM_OVERWRITE)
+ ? TransactionOperation::kInsertOverwrite
+ : TransactionOperation::kInsert),
+ lenv()->next_lsn(), key, record);
+
+ // if there's a cursor then couple it to the op; also store the
+ // dupecache-index in the op (it's needed for DUPLICATE_INSERT_BEFORE/NEXT) */
+ if (cursor) {
+ Cursor *c = cursor->get_parent();
+ if (c->get_dupecache_index())
+ op->set_referenced_dupe(c->get_dupecache_index());
+
+ cursor->couple_to_op(op);
+
+ // all other cursors need to increment their dupe index, if their
+ // index is > this cursor's index
+ increment_dupe_index(context, node, c, c->get_dupecache_index());
+ }
+
+ // append journal entry
+ if (m_env->get_flags() & HAM_ENABLE_RECOVERY
+ && m_env->get_flags() & HAM_ENABLE_TRANSACTIONS) {
+ Journal *j = lenv()->journal();
+ j->append_insert(this, context->txn, key, record,
+ flags & HAM_DUPLICATE ? flags : flags | HAM_OVERWRITE,
+ op->get_lsn());
+ }
+
+ ham_assert(st == 0);
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::find_txn(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags)
+{
+ ham_status_t st = 0;
+ TransactionOperation *op = 0;
+ bool first_loop = true;
+ bool exact_is_erased = false;
+
+ ByteArray *pkey_arena = &key_arena(context->txn);
+ ByteArray *precord_arena = &record_arena(context->txn);
+
+ ham_key_set_intflags(key,
+ (ham_key_get_intflags(key) & (~BtreeKey::kApproximate)));
+
+ /* get the node for this key (but don't create a new one if it does
+ * not yet exist) */
+ TransactionNode *node = m_txn_index->get(key, flags);
+
+ /*
+ * pick the node of this key, and walk through each operation
+ * in reverse chronological order (from newest to oldest):
+ * - is this op part of an aborted txn? then skip it
+ * - is this op part of a committed txn? then look at the
+ * operation in detail
+ * - is this op part of an txn which is still active? return an error
+ * because we've found a conflict
+ * - if a committed txn has erased the item then there's no need
+ * to continue checking older, committed txns
+ */
+retry:
+ if (node)
+ op = node->get_newest_op();
+ while (op) {
+ Transaction *optxn = op->get_txn();
+ if (optxn->is_aborted())
+ ; /* nop */
+ else if (optxn->is_committed() || context->txn == optxn) {
+ if (op->get_flags() & TransactionOperation::kIsFlushed)
+ ; /* nop */
+ /* if key was erased then it doesn't exist and we can return
+ * immediately
+ *
+ * if an approximate match is requested then move to the next
+ * or previous node
+ */
+ else if (op->get_flags() & TransactionOperation::kErase) {
+ if (first_loop
+ && !(ham_key_get_intflags(key) & BtreeKey::kApproximate))
+ exact_is_erased = true;
+ first_loop = false;
+ if (flags & HAM_FIND_LT_MATCH) {
+ node = node->get_previous_sibling();
+ if (!node)
+ break;
+ ham_key_set_intflags(key,
+ (ham_key_get_intflags(key) | BtreeKey::kApproximate));
+ goto retry;
+ }
+ else if (flags & HAM_FIND_GT_MATCH) {
+ node = node->get_next_sibling();
+ if (!node)
+ break;
+ ham_key_set_intflags(key,
+ (ham_key_get_intflags(key) | BtreeKey::kApproximate));
+ goto retry;
+ }
+ /* if a duplicate was deleted then check if there are other duplicates
+ * left */
+ st = HAM_KEY_NOT_FOUND;
+ // TODO merge both calls
+ if (cursor) {
+ cursor->get_txn_cursor()->couple_to_op(op);
+ cursor->couple_to_txnop();
+ }
+ if (op->get_referenced_dupe() > 1) {
+ // not the first dupe - there are other dupes
+ st = 0;
+ }
+ else if (op->get_referenced_dupe() == 1) {
+ // check if there are other dupes
+ bool is_equal;
+ (void)cursor->sync(context, Cursor::kSyncOnlyEqualKeys, &is_equal);
+ if (!is_equal) // TODO merge w/ line above?
+ cursor->set_to_nil(Cursor::kBtree);
+ st = cursor->get_dupecache_count(context) ? 0 : HAM_KEY_NOT_FOUND;
+ }
+ return (st);
+ }
+ /* if the key already exists then return its record; do not
+ * return pointers to TransactionOperation::get_record, because it may be
+ * flushed and the user's pointers would be invalid */
+ else if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)
+ || (op->get_flags() & TransactionOperation::kInsertDuplicate)) {
+ if (cursor) { // TODO merge those calls
+ cursor->get_txn_cursor()->couple_to_op(op);
+ cursor->couple_to_txnop();
+ }
+ // approx match? leave the loop and continue
+ // with the btree
+ if (ham_key_get_intflags(key) & BtreeKey::kApproximate)
+ break;
+ // otherwise copy the record and return
+ if (record)
+ return (LocalDatabase::copy_record(this, context->txn, op, record));
+ return (0);
+ }
+ else if (!(op->get_flags() & TransactionOperation::kNop)) {
+ ham_assert(!"shouldn't be here");
+ return (HAM_KEY_NOT_FOUND);
+ }
+ }
+ else { /* txn is still active */
+ return (HAM_TXN_CONFLICT);
+ }
+
+ op = op->get_previous_in_node();
+ }
+
+ /*
+ * if there was an approximate match: check if the btree provides
+ * a better match
+ *
+ * TODO use alloca or ByteArray instead of Memory::allocate()
+ */
+ if (op && ham_key_get_intflags(key) & BtreeKey::kApproximate) {
+ ham_key_t txnkey = {0};
+ ham_key_t *k = op->get_node()->get_key();
+ txnkey.size = k->size;
+ txnkey._flags = BtreeKey::kApproximate;
+ txnkey.data = Memory::allocate<uint8_t>(txnkey.size);
+ memcpy(txnkey.data, k->data, txnkey.size);
+
+ ham_key_set_intflags(key, 0);
+
+ // the "exact match" key was erased? then don't fetch it again
+ if (exact_is_erased)
+ flags = flags & (~HAM_FIND_EXACT_MATCH);
+
+ // now lookup in the btree
+ if (cursor)
+ cursor->set_to_nil(Cursor::kBtree);
+ st = m_btree_index->find(context, cursor, key, pkey_arena, record,
+ precord_arena, flags);
+ if (st == HAM_KEY_NOT_FOUND) {
+ if (!(key->flags & HAM_KEY_USER_ALLOC) && txnkey.data) {
+ pkey_arena->resize(txnkey.size);
+ key->data = pkey_arena->get_ptr();
+ }
+ if (txnkey.data) {
+ ::memcpy(key->data, txnkey.data, txnkey.size);
+ Memory::release(txnkey.data);
+ }
+ key->size = txnkey.size;
+ key->_flags = txnkey._flags;
+
+ if (cursor) { // TODO merge those calls
+ cursor->get_txn_cursor()->couple_to_op(op);
+ cursor->couple_to_txnop();
+ }
+ if (record)
+ return (LocalDatabase::copy_record(this, context->txn, op, record));
+ return (0);
+ }
+ else if (st)
+ return (st);
+ // the btree key is a direct match? then return it
+ if ((!(ham_key_get_intflags(key) & BtreeKey::kApproximate))
+ && (flags & HAM_FIND_EXACT_MATCH)) {
+ Memory::release(txnkey.data);
+ if (cursor)
+ cursor->couple_to_btree();
+ return (0);
+ }
+ // if there's an approx match in the btree: compare both keys and
+ // use the one that is closer. if the btree is closer: make sure
+ // that it was not erased or overwritten in a transaction
+ int cmp = m_btree_index->compare_keys(key, &txnkey);
+ bool use_btree = false;
+ if (flags & HAM_FIND_GT_MATCH) {
+ if (cmp < 0)
+ use_btree = true;
+ }
+ else if (flags & HAM_FIND_LT_MATCH) {
+ if (cmp > 0)
+ use_btree = true;
+ }
+ else
+ ham_assert(!"shouldn't be here");
+
+ if (use_btree) {
+ Memory::release(txnkey.data);
+ // lookup again, with the same flags and the btree key.
+ // this will check if the key was erased or overwritten
+ // in a transaction
+ st = find_txn(context, cursor, key, record, flags | HAM_FIND_EXACT_MATCH);
+ if (st == 0)
+ ham_key_set_intflags(key,
+ (ham_key_get_intflags(key) | BtreeKey::kApproximate));
+ return (st);
+ }
+ else { // use txn
+ if (!(key->flags & HAM_KEY_USER_ALLOC) && txnkey.data) {
+ pkey_arena->resize(txnkey.size);
+ key->data = pkey_arena->get_ptr();
+ }
+ if (txnkey.data) {
+ ::memcpy(key->data, txnkey.data, txnkey.size);
+ Memory::release(txnkey.data);
+ }
+ key->size = txnkey.size;
+ key->_flags = txnkey._flags;
+
+ if (cursor) { // TODO merge those calls
+ cursor->get_txn_cursor()->couple_to_op(op);
+ cursor->couple_to_txnop();
+ }
+ if (record)
+ return (LocalDatabase::copy_record(this, context->txn, op, record));
+ return (0);
+ }
+ }
+
+ /*
+ * no approximate match:
+ *
+ * we've successfully checked all un-flushed transactions and there
+ * were no conflicts, and we have not found the key: now try to
+ * lookup the key in the btree.
+ */
+ return (m_btree_index->find(context, cursor, key, pkey_arena, record,
+ precord_arena, flags));
+}
+
+ham_status_t
+LocalDatabase::erase_txn(Context *context, ham_key_t *key, uint32_t flags,
+ TransactionCursor *cursor)
+{
+ ham_status_t st = 0;
+ TransactionOperation *op;
+ bool node_created = false;
+ Cursor *pc = 0;
+ if (cursor)
+ pc = cursor->get_parent();
+
+ /* get (or create) the node for this key */
+ TransactionNode *node = m_txn_index->get(key, 0);
+ if (!node) {
+ node = new TransactionNode(this, key);
+ node_created = true;
+ // TODO only store when the operation is successful?
+ m_txn_index->store(node);
+ }
+
+ /* check for conflicts of this key - but only if we're not erasing a
+ * duplicate key. dupes are checked for conflicts in _local_cursor_move TODO that function no longer exists */
+ if (!pc || (!pc->get_dupecache_index())) {
+ st = check_erase_conflicts(context, node, key, flags);
+ if (st) {
+ if (node_created) {
+ m_txn_index->remove(node);
+ delete node;
+ }
+ return (st);
+ }
+ }
+
+ /* append a new operation to this node */
+ op = node->append(context->txn, flags, TransactionOperation::kErase,
+ lenv()->next_lsn(), key, 0);
+
+ /* is this function called through ham_cursor_erase? then add the
+ * duplicate ID */
+ if (cursor) {
+ if (pc->get_dupecache_index())
+ op->set_referenced_dupe(pc->get_dupecache_index());
+ }
+
+ /* the current op has no cursors attached; but if there are any
+ * other ops in this node and in this transaction, then they have to
+ * be set to nil. This only nil's txn-cursors! */
+ nil_all_cursors_in_node(context->txn, pc, node);
+
+ /* in addition we nil all btree cursors which are coupled to this key */
+ nil_all_cursors_in_btree(context, pc, node->get_key());
+
+ /* append journal entry */
+ if (m_env->get_flags() & HAM_ENABLE_RECOVERY
+ && m_env->get_flags() & HAM_ENABLE_TRANSACTIONS) {
+ Journal *j = lenv()->journal();
+ j->append_erase(this, context->txn, key, 0,
+ flags | HAM_ERASE_ALL_DUPLICATES, op->get_lsn());
+ }
+
+ ham_assert(st == 0);
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::create(Context *context, PBtreeHeader *btree_header)
+{
+ /* set the flags; strip off run-time (per session) flags for the btree */
+ uint32_t persistent_flags = get_flags();
+ persistent_flags &= ~(HAM_CACHE_UNLIMITED
+ | HAM_DISABLE_MMAP
+ | HAM_ENABLE_FSYNC
+ | HAM_READ_ONLY
+ | HAM_ENABLE_RECOVERY
+ | HAM_AUTO_RECOVERY
+ | HAM_ENABLE_TRANSACTIONS);
+
+ switch (m_config.key_type) {
+ case HAM_TYPE_UINT8:
+ m_config.key_size = 1;
+ break;
+ case HAM_TYPE_UINT16:
+ m_config.key_size = 2;
+ break;
+ case HAM_TYPE_REAL32:
+ case HAM_TYPE_UINT32:
+ m_config.key_size = 4;
+ break;
+ case HAM_TYPE_REAL64:
+ case HAM_TYPE_UINT64:
+ m_config.key_size = 8;
+ break;
+ }
+
+ // if we cannot fit at least 10 keys in a page then refuse to continue
+ if (m_config.key_size != HAM_KEY_SIZE_UNLIMITED) {
+ if (lenv()->config().page_size_bytes / (m_config.key_size + 8) < 10) {
+ ham_trace(("key size too large; either increase page_size or decrease "
+ "key size"));
+ return (HAM_INV_KEY_SIZE);
+ }
+ }
+
+ // fixed length records:
+ //
+ // if records are <= 8 bytes OR if we can fit at least 500 keys AND
+ // records into the leaf then store the records in the leaf;
+ // otherwise they're allocated as a blob
+ if (m_config.record_size != HAM_RECORD_SIZE_UNLIMITED) {
+ if (m_config.record_size <= 8
+ || (m_config.record_size <= kInlineRecordThreshold
+ && lenv()->config().page_size_bytes
+ / (m_config.key_size + m_config.record_size) > 500)) {
+ persistent_flags |= HAM_FORCE_RECORDS_INLINE;
+ m_config.flags |= HAM_FORCE_RECORDS_INLINE;
+ }
+ }
+
+ // create the btree
+ m_btree_index.reset(new BtreeIndex(this, btree_header, persistent_flags,
+ m_config.key_type, m_config.key_size));
+
+ /* initialize the btree */
+ m_btree_index->create(context, m_config.key_type, m_config.key_size,
+ m_config.record_size);
+
+ /* the header page is now dirty */
+ Page *header = lenv()->page_manager()->fetch(context, 0);
+ header->set_dirty(true);
+
+ /* and the TransactionIndex */
+ m_txn_index.reset(new TransactionIndex(this));
+
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::open(Context *context, PBtreeHeader *btree_header)
+{
+ /*
+ * set the database flags; strip off the persistent flags that may have been
+ * set by the caller, before mixing in the persistent flags as obtained
+ * from the btree.
+ */
+ uint32_t flags = get_flags();
+ flags &= ~(HAM_CACHE_UNLIMITED
+ | HAM_DISABLE_MMAP
+ | HAM_ENABLE_FSYNC
+ | HAM_READ_ONLY
+ | HAM_ENABLE_RECOVERY
+ | HAM_AUTO_RECOVERY
+ | HAM_ENABLE_TRANSACTIONS);
+
+ m_config.key_type = btree_header->get_key_type();
+ m_config.key_size = btree_header->get_key_size();
+
+ /* create the BtreeIndex */
+ m_btree_index.reset(new BtreeIndex(this, btree_header,
+ flags | btree_header->get_flags(),
+ btree_header->get_key_type(),
+ btree_header->get_key_size()));
+
+ ham_assert(!(m_btree_index->get_flags() & HAM_CACHE_UNLIMITED));
+ ham_assert(!(m_btree_index->get_flags() & HAM_DISABLE_MMAP));
+ ham_assert(!(m_btree_index->get_flags() & HAM_ENABLE_FSYNC));
+ ham_assert(!(m_btree_index->get_flags() & HAM_READ_ONLY));
+ ham_assert(!(m_btree_index->get_flags() & HAM_ENABLE_RECOVERY));
+ ham_assert(!(m_btree_index->get_flags() & HAM_AUTO_RECOVERY));
+ ham_assert(!(m_btree_index->get_flags() & HAM_ENABLE_TRANSACTIONS));
+
+ /* initialize the btree */
+ m_btree_index->open();
+
+ /* create the TransactionIndex - TODO only if txn's are enabled? */
+ m_txn_index.reset(new TransactionIndex(this));
+
+ /* merge the non-persistent database flag with the persistent flags from
+ * the btree index */
+ m_config.flags = config().flags | m_btree_index->get_flags();
+ m_config.key_size = m_btree_index->get_key_size();
+ m_config.key_type = m_btree_index->get_key_type();
+ m_config.record_size = m_btree_index->get_record_size();
+
+ // fetch the current record number
+ if ((get_flags() & (HAM_RECORD_NUMBER32 | HAM_RECORD_NUMBER64))) {
+ ham_key_t key = {};
+ Cursor *c = new Cursor(this, 0, 0);
+ ham_status_t st = cursor_move_impl(context, c, &key, 0, HAM_CURSOR_LAST);
+ cursor_close(c);
+ if (st)
+ return (st == HAM_KEY_NOT_FOUND ? 0 : st);
+
+ if (get_flags() & HAM_RECORD_NUMBER32)
+ m_recno = *(uint32_t *)key.data;
+ else
+ m_recno = *(uint64_t *)key.data;
+ }
+
+ return (0);
+}
+
+struct MetricsVisitor : public BtreeVisitor {
+ MetricsVisitor(ham_env_metrics_t *metrics)
+ : m_metrics(metrics) {
+ }
+
+ // Specifies if the visitor modifies the node
+ virtual bool is_read_only() const {
+ return (true);
+ }
+
+ // called for each node
+ virtual void operator()(Context *context, BtreeNodeProxy *node) {
+ if (node->is_leaf())
+ node->fill_metrics(&m_metrics->btree_leaf_metrics);
+ else
+ node->fill_metrics(&m_metrics->btree_internal_metrics);
+ }
+
+ ham_env_metrics_t *m_metrics;
+};
+
+void
+LocalDatabase::fill_metrics(ham_env_metrics_t *metrics)
+{
+ metrics->btree_leaf_metrics.database_name = name();
+ metrics->btree_internal_metrics.database_name = name();
+
+ try {
+ MetricsVisitor visitor(metrics);
+ Context context(lenv(), 0, this);
+ m_btree_index->visit_nodes(&context, visitor, true);
+
+ // calculate the "avg" values
+ BtreeStatistics::finalize_metrics(&metrics->btree_leaf_metrics);
+ BtreeStatistics::finalize_metrics(&metrics->btree_internal_metrics);
+ }
+ catch (Exception &) {
+ }
+}
+
+ham_status_t
+LocalDatabase::get_parameters(ham_parameter_t *param)
+{
+ try {
+ Context context(lenv(), 0, this);
+
+ Page *page = 0;
+ ham_parameter_t *p = param;
+
+ if (p) {
+ for (; p->name; p++) {
+ switch (p->name) {
+ case HAM_PARAM_KEY_SIZE:
+ p->value = m_config.key_size;
+ break;
+ case HAM_PARAM_KEY_TYPE:
+ p->value = m_config.key_type;
+ break;
+ case HAM_PARAM_RECORD_SIZE:
+ p->value = m_config.record_size;
+ break;
+ case HAM_PARAM_FLAGS:
+ p->value = (uint64_t)get_flags();
+ break;
+ case HAM_PARAM_DATABASE_NAME:
+ p->value = (uint64_t)name();
+ break;
+ case HAM_PARAM_MAX_KEYS_PER_PAGE:
+ p->value = 0;
+ page = lenv()->page_manager()->fetch(&context,
+ m_btree_index->get_root_address(),
+ PageManager::kReadOnly);
+ if (page) {
+ BtreeNodeProxy *node = m_btree_index->get_node_from_page(page);
+ p->value = node->estimate_capacity();
+ }
+ break;
+ case HAM_PARAM_RECORD_COMPRESSION:
+ p->value = 0;
+ break;
+ case HAM_PARAM_KEY_COMPRESSION:
+ p->value = 0;
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)p->name));
+ throw Exception(HAM_INV_PARAMETER);
+ }
+ }
+ }
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::check_integrity(uint32_t flags)
+{
+ try {
+ Context context(lenv(), 0, this);
+
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(&context);
+
+ /* call the btree function */
+ m_btree_index->check_integrity(&context, flags);
+
+ /* call the txn function */
+ //m_txn_index->check_integrity(flags);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::count(Transaction *htxn, bool distinct, uint64_t *pcount)
+{
+ LocalTransaction *txn = dynamic_cast<LocalTransaction *>(htxn);
+
+ try {
+ Context context(lenv(), txn, this);
+
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(&context);
+
+ /*
+ * call the btree function - this will retrieve the number of keys
+ * in the btree
+ */
+ uint64_t keycount = m_btree_index->count(&context, distinct);
+
+ /*
+ * if transactions are enabled, then also sum up the number of keys
+ * from the transaction tree
+ */
+ if (get_flags() & HAM_ENABLE_TRANSACTIONS)
+ keycount += m_txn_index->count(&context, txn, distinct);
+
+ *pcount = keycount;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::scan(Transaction *txn, ScanVisitor *visitor, bool distinct)
+{
+ ham_status_t st = 0;
+
+ try {
+ Context context(lenv(), (LocalTransaction *)txn, this);
+
+ Page *page;
+ ham_key_t key = {0};
+
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(&context);
+
+ /* create a cursor, move it to the first key */
+ Cursor *cursor = cursor_create_impl(txn, 0);
+
+ st = cursor_move_impl(&context, cursor, &key, 0, HAM_CURSOR_FIRST);
+ if (st)
+ goto bail;
+
+ /* only transaction keys? then use a regular cursor */
+ if (!cursor->is_coupled_to_btree()) {
+ do {
+ /* process the key */
+ (*visitor)(key.data, key.size, distinct
+ ? cursor->get_record_count(&context, 0)
+ : 1);
+ } while ((st = cursor_move_impl(&context, cursor, &key,
+ 0, HAM_CURSOR_NEXT)) == 0);
+ goto bail;
+ }
+
+ /* only btree keys? then traverse page by page */
+ if (!(get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_assert(cursor->is_coupled_to_btree());
+
+ do {
+ // get the coupled page
+ cursor->get_btree_cursor()->get_coupled_key(&page);
+ BtreeNodeProxy *node = m_btree_index->get_node_from_page(page);
+ // and let the btree node perform the remaining work
+ node->scan(&context, visitor, 0, distinct);
+ } while (cursor->get_btree_cursor()->move_to_next_page(&context) == 0);
+
+ goto bail;
+ }
+
+ /* mixed txn/btree load? if there are btree nodes which are NOT modified
+ * in transactions then move the scan to the btree node. Otherwise use
+ * a regular cursor */
+ while (true) {
+ if (!cursor->is_coupled_to_btree())
+ break;
+
+ int slot;
+ cursor->get_btree_cursor()->get_coupled_key(&page, &slot);
+ BtreeNodeProxy *node = m_btree_index->get_node_from_page(page);
+
+ /* are transactions present? then check if the next txn key is >= btree[0]
+ * and <= btree[n] */
+ ham_key_t *txnkey = 0;
+ if (cursor->get_txn_cursor()->get_coupled_op())
+ txnkey = cursor->get_txn_cursor()->get_coupled_op()->get_node()->get_key();
+ // no (more) transactional keys left - process the current key, then
+ // scan the remaining keys directly in the btree
+ if (!txnkey) {
+ /* process the key */
+ (*visitor)(key.data, key.size, distinct
+ ? cursor->get_record_count(&context, 0)
+ : 1);
+ break;
+ }
+
+ /* if yes: use the cursor to traverse the page */
+ if (node->compare(&context, txnkey, 0) >= 0
+ && node->compare(&context, txnkey, node->get_count() - 1) <= 0) {
+ do {
+ Page *new_page = 0;
+ if (cursor->is_coupled_to_btree())
+ cursor->get_btree_cursor()->get_coupled_key(&new_page);
+ /* break the loop if we've reached the next page */
+ if (new_page && new_page != page) {
+ page = new_page;
+ break;
+ }
+ /* process the key */
+ (*visitor)(key.data, key.size, distinct
+ ? cursor->get_record_count(&context, 0)
+ : 1);
+ } while ((st = cursor_move_impl(&context, cursor, &key,
+ 0, HAM_CURSOR_NEXT)) == 0);
+
+ if (st != HAM_SUCCESS)
+ goto bail;
+ }
+ else {
+ /* Otherwise traverse directly in the btree page. This is the fastest
+ * code path. */
+ node->scan(&context, visitor, slot, distinct);
+ /* and then move to the next page */
+ if (cursor->get_btree_cursor()->move_to_next_page(&context) != 0)
+ break;
+ }
+ }
+
+ /* pick up the remaining transactional keys */
+ while ((st = cursor_move_impl(&context, cursor, &key,
+ 0, HAM_CURSOR_NEXT)) == 0) {
+ (*visitor)(key.data, key.size, distinct
+ ? cursor->get_record_count(&context, 0)
+ : 1);
+ }
+
+bail:
+ if (cursor)
+ cursor_close_impl(cursor);
+ return (st == HAM_KEY_NOT_FOUND ? 0 : st);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::insert(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ Context context(lenv(), (LocalTransaction *)txn, this);
+
+ try {
+ if (m_config.flags & (HAM_RECORD_NUMBER32 | HAM_RECORD_NUMBER64)) {
+ if (key->size == 0 && key->data == 0) {
+ // ok!
+ }
+ else if (key->size == 0 && key->data != 0) {
+ ham_trace(("for record number keys set key size to 0, "
+ "key->data to null"));
+ return (HAM_INV_PARAMETER);
+ }
+ else if (key->size != m_config.key_size) {
+ ham_trace(("invalid key size (%u instead of %u)",
+ key->size, m_config.key_size));
+ return (HAM_INV_KEY_SIZE);
+ }
+ }
+ else if (m_config.key_size != HAM_KEY_SIZE_UNLIMITED
+ && key->size != m_config.key_size) {
+ ham_trace(("invalid key size (%u instead of %u)",
+ key->size, m_config.key_size));
+ return (HAM_INV_KEY_SIZE);
+ }
+ if (m_config.record_size != HAM_RECORD_SIZE_UNLIMITED
+ && record->size != m_config.record_size) {
+ ham_trace(("invalid record size (%u instead of %u)",
+ record->size, m_config.record_size));
+ return (HAM_INV_RECORD_SIZE);
+ }
+
+ ByteArray *arena = &key_arena(txn);
+
+ /*
+ * record number: make sure that we have a valid key structure,
+ * and lazy load the last used record number
+ *
+ * TODO TODO
+ * too much duplicated code
+ */
+ uint64_t recno = 0;
+ if (get_flags() & HAM_RECORD_NUMBER64) {
+ if (flags & HAM_OVERWRITE) {
+ ham_assert(key->size == sizeof(uint64_t));
+ ham_assert(key->data != 0);
+ recno = *(uint64_t *)key->data;
+ }
+ else {
+ /* get the record number and increment it */
+ recno = next_record_number();
+ }
+
+ /* allocate memory for the key */
+ if (!key->data) {
+ arena->resize(sizeof(uint64_t));
+ key->data = arena->get_ptr();
+ }
+ key->size = sizeof(uint64_t);
+ *(uint64_t *)key->data = recno;
+
+ /* A recno key is always appended sequentially */
+ flags |= HAM_HINT_APPEND;
+ }
+ else if (get_flags() & HAM_RECORD_NUMBER32) {
+ if (flags & HAM_OVERWRITE) {
+ ham_assert(key->size == sizeof(uint32_t));
+ ham_assert(key->data != 0);
+ recno = *(uint32_t *)key->data;
+ }
+ else {
+ /* get the record number and increment it */
+ recno = next_record_number();
+ }
+
+ /* allocate memory for the key */
+ if (!key->data) {
+ arena->resize(sizeof(uint32_t));
+ key->data = arena->get_ptr();
+ }
+ key->size = sizeof(uint32_t);
+ *(uint32_t *)key->data = (uint32_t)recno;
+
+ /* A recno key is always appended sequentially */
+ flags |= HAM_HINT_APPEND;
+ }
+
+ ham_status_t st = 0;
+ LocalTransaction *local_txn = 0;
+
+ /* purge cache if necessary */
+ if (!txn && (get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ local_txn = begin_temp_txn();
+ context.txn = local_txn;
+ }
+
+ st = insert_impl(&context, cursor, key, record, flags);
+ return (finalize(&context, st, local_txn));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::erase(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ uint32_t flags)
+{
+ Context context(lenv(), (LocalTransaction *)txn, this);
+
+ try {
+ ham_status_t st = 0;
+ LocalTransaction *local_txn = 0;
+
+ if (cursor) {
+ if (cursor->is_nil())
+ throw Exception(HAM_CURSOR_IS_NIL);
+ if (cursor->is_coupled_to_txnop()) // TODO rewrite the next line, it's ugly
+ key = cursor->get_txn_cursor()->get_coupled_op()->get_node()->get_key();
+ else // cursor->is_coupled_to_btree()
+ key = 0;
+ }
+
+ if (key) {
+ if (m_config.key_size != HAM_KEY_SIZE_UNLIMITED
+ && key->size != m_config.key_size) {
+ ham_trace(("invalid key size (%u instead of %u)",
+ key->size, m_config.key_size));
+ return (HAM_INV_KEY_SIZE);
+ }
+ }
+
+ if (!txn && (get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ local_txn = begin_temp_txn();
+ context.txn = local_txn;
+ }
+
+ st = erase_impl(&context, cursor, key, flags);
+ return (finalize(&context, st, local_txn));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::find(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ Context context(lenv(), (LocalTransaction *)txn, this);
+
+ try {
+ ham_status_t st = 0;
+
+ /* Duplicates AND Transactions require a Cursor because only
+ * Cursors can build lists of duplicates.
+ * TODO not exception safe - if find() throws then the cursor is not closed
+ */
+ if (!cursor
+ && (get_flags() & (HAM_ENABLE_DUPLICATE_KEYS|HAM_ENABLE_TRANSACTIONS))) {
+ Cursor *c = cursor_create_impl(txn, 0);
+ st = find(c, txn, key, record, flags);
+ cursor_close_impl(c);
+ delete c;
+ return (st);
+ }
+
+ if (m_config.key_size != HAM_KEY_SIZE_UNLIMITED
+ && key->size != m_config.key_size) {
+ ham_trace(("invalid key size (%u instead of %u)",
+ key->size, m_config.key_size));
+ return (HAM_INV_KEY_SIZE);
+ }
+
+ // cursor: reset the dupecache, set to nil
+ // TODO merge both calls, only set to nil if find() was successful
+ if (cursor) {
+ cursor->clear_dupecache();
+ cursor->set_to_nil(Cursor::kBoth);
+ }
+
+ st = find_impl(&context, cursor, key, record, flags);
+ if (st)
+ return (finalize(&context, st, 0));
+
+ if (cursor) {
+ // make sure that txn-cursor and btree-cursor point to the same keys
+ if (get_flags() & HAM_ENABLE_TRANSACTIONS) {
+ bool is_equal;
+ (void)cursor->sync(&context, Cursor::kSyncOnlyEqualKeys, &is_equal);
+ if (!is_equal && cursor->is_coupled_to_txnop())
+ cursor->set_to_nil(Cursor::kBtree);
+ }
+
+ /* if the key has duplicates: build a duplicate table, then couple to the
+ * first/oldest duplicate */
+ if (get_flags() & HAM_ENABLE_DUPLICATES)
+ cursor->clear_dupecache();
+
+ if (cursor->get_dupecache_count(&context)) {
+ DupeCacheLine *e = cursor->get_dupecache()->get_first_element();
+ if (e->use_btree())
+ cursor->couple_to_btree();
+ else
+ cursor->couple_to_txnop();
+ cursor->couple_to_dupe(1); // 1-based index!
+ if (record) { // TODO don't copy record if it was already
+ // copied in find_impl
+ if (cursor->is_coupled_to_txnop())
+ cursor->get_txn_cursor()->copy_coupled_record(record);
+ else {
+ Transaction *txn = cursor->get_txn();
+ st = cursor->get_btree_cursor()->move(&context, 0, 0, record,
+ &record_arena(txn), 0);
+ }
+ }
+ }
+
+ /* set a flag that the cursor just completed an Insert-or-find
+ * operation; this information is needed in ham_cursor_move */
+ cursor->set_lastop(Cursor::kLookupOrInsert);
+ }
+
+ return (finalize(&context, st, 0));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+Cursor *
+LocalDatabase::cursor_create_impl(Transaction *txn, uint32_t flags)
+{
+ return (new Cursor(this, txn, flags));
+}
+
+Cursor *
+LocalDatabase::cursor_clone_impl(Cursor *src)
+{
+ return (new Cursor(*src));
+}
+
+ham_status_t
+LocalDatabase::cursor_get_record_count(Cursor *cursor, uint32_t flags,
+ uint32_t *pcount)
+{
+ try {
+ Context context(lenv(), (LocalTransaction *)cursor->get_txn(), this);
+ *pcount = cursor->get_record_count(&context, flags);
+ return (0);
+ }
+ catch (Exception &ex) {
+ *pcount = 0;
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::cursor_get_duplicate_position(Cursor *cursor,
+ uint32_t *pposition)
+{
+ try {
+ *pposition = cursor->get_duplicate_position();
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::cursor_get_record_size(Cursor *cursor, uint64_t *psize)
+{
+ try {
+ Context context(lenv(), (LocalTransaction *)cursor->get_txn(), this);
+ *psize = cursor->get_record_size(&context);
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::cursor_overwrite(Cursor *cursor,
+ ham_record_t *record, uint32_t flags)
+{
+ Context context(lenv(), (LocalTransaction *)cursor->get_txn(), this);
+
+ try {
+ ham_status_t st = 0;
+ Transaction *local_txn = 0;
+
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(&context);
+
+ /* if user did not specify a transaction, but transactions are enabled:
+ * create a temporary one */
+ if (!cursor->get_txn() && (get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ local_txn = begin_temp_txn();
+ context.txn = (LocalTransaction *)local_txn;
+ }
+
+ /* this function will do all the work */
+ st = cursor->overwrite(&context, cursor->get_txn()
+ ? cursor->get_txn()
+ : local_txn,
+ record, flags);
+ return (finalize(&context, st, local_txn));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::cursor_move(Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ try {
+ Context context(lenv(), (LocalTransaction *)cursor->get_txn(),
+ this);
+
+ return (cursor_move_impl(&context, cursor, key, record, flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+LocalDatabase::cursor_move_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags)
+{
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(context);
+
+ /*
+ * if the cursor was never used before and the user requests a NEXT then
+ * move the cursor to FIRST; if the user requests a PREVIOUS we set it
+ * to LAST, resp.
+ *
+ * if the cursor was already used but is nil then we've reached EOF,
+ * and a NEXT actually tries to move to the LAST key (and PREVIOUS
+ * moves to FIRST)
+ *
+ * TODO the btree-cursor has identical code which can be removed
+ */
+ if (cursor->is_nil(0)) {
+ if (flags & HAM_CURSOR_NEXT) {
+ flags &= ~HAM_CURSOR_NEXT;
+ if (cursor->is_first_use())
+ flags |= HAM_CURSOR_FIRST;
+ else
+ flags |= HAM_CURSOR_LAST;
+ }
+ else if (flags & HAM_CURSOR_PREVIOUS) {
+ flags &= ~HAM_CURSOR_PREVIOUS;
+ if (cursor->is_first_use())
+ flags |= HAM_CURSOR_LAST;
+ else
+ flags |= HAM_CURSOR_FIRST;
+ }
+ }
+
+ ham_status_t st = 0;
+
+ /* in non-transactional mode - just call the btree function and return */
+ if (!(get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ return (cursor->get_btree_cursor()->move(context,
+ key, &key_arena(context->txn),
+ record, &record_arena(context->txn), flags));
+ }
+
+ /* everything else is handled by the cursor function */
+ st = cursor->move(context, key, record, flags);
+
+ /* store the direction */
+ if (flags & HAM_CURSOR_NEXT)
+ cursor->set_lastop(HAM_CURSOR_NEXT);
+ else if (flags & HAM_CURSOR_PREVIOUS)
+ cursor->set_lastop(HAM_CURSOR_PREVIOUS);
+ else
+ cursor->set_lastop(0);
+
+ if (st) {
+ if (st == HAM_KEY_ERASED_IN_TXN)
+ st = HAM_KEY_NOT_FOUND;
+ /* trigger a sync when the function is called again */
+ cursor->set_lastop(0);
+ return (st);
+ }
+
+ return (0);
+}
+
+void
+LocalDatabase::cursor_close_impl(Cursor *cursor)
+{
+ cursor->close();
+}
+
+ham_status_t
+LocalDatabase::close_impl(uint32_t flags)
+{
+ Context context(lenv(), 0, this);
+
+ /* check if this database is modified by an active transaction */
+ if (m_txn_index) {
+ TransactionNode *node = m_txn_index->get_first();
+ while (node) {
+ TransactionOperation *op = node->get_newest_op();
+ while (op) {
+ Transaction *optxn = op->get_txn();
+ if (!optxn->is_committed() && !optxn->is_aborted()) {
+ ham_trace(("cannot close a Database that is modified by "
+ "a currently active Transaction"));
+ return (set_error(HAM_TXN_STILL_OPEN));
+ }
+ op = op->get_previous_in_node();
+ }
+ node = node->get_next_sibling();
+ }
+ }
+
+ /* in-memory-database: free all allocated blobs */
+ if (m_btree_index && m_env->get_flags() & HAM_IN_MEMORY)
+ m_btree_index->release(&context);
+
+ /*
+ * flush all pages of this database (but not the header page,
+ * it's still required and will be flushed below)
+ */
+ lenv()->page_manager()->close_database(&context, this);
+
+ return (0);
+}
+
+void
+LocalDatabase::increment_dupe_index(Context *context, TransactionNode *node,
+ Cursor *skip, uint32_t start)
+{
+ Cursor *c = m_cursor_list;
+
+ while (c) {
+ bool hit = false;
+
+ if (c == skip || c->is_nil(0))
+ goto next;
+
+ /* if cursor is coupled to an op in the same node: increment
+ * duplicate index (if required) */
+ if (c->is_coupled_to_txnop()) {
+ TransactionCursor *txnc = c->get_txn_cursor();
+ TransactionNode *n = txnc->get_coupled_op()->get_node();
+ if (n == node)
+ hit = true;
+ }
+ /* if cursor is coupled to the same key in the btree: increment
+ * duplicate index (if required) */
+ else if (c->get_btree_cursor()->points_to(context, node->get_key())) {
+ hit = true;
+ }
+
+ if (hit) {
+ if (c->get_dupecache_index() > start)
+ c->set_dupecache_index(c->get_dupecache_index() + 1);
+ }
+
+next:
+ c = c->get_next();
+ }
+}
+
+void
+LocalDatabase::nil_all_cursors_in_node(LocalTransaction *txn, Cursor *current,
+ TransactionNode *node)
+{
+ TransactionOperation *op = node->get_newest_op();
+ while (op) {
+ TransactionCursor *cursor = op->cursor_list();
+ while (cursor) {
+ Cursor *parent = cursor->get_parent();
+ // is the current cursor to a duplicate? then adjust the
+ // coupled duplicate index of all cursors which point to a duplicate
+ if (current) {
+ if (current->get_dupecache_index()) {
+ if (current->get_dupecache_index() < parent->get_dupecache_index()) {
+ parent->set_dupecache_index(parent->get_dupecache_index() - 1);
+ cursor = cursor->get_coupled_next();
+ continue;
+ }
+ else if (current->get_dupecache_index() > parent->get_dupecache_index()) {
+ cursor = cursor->get_coupled_next();
+ continue;
+ }
+ // else fall through
+ }
+ }
+ parent->couple_to_btree(); // TODO merge these two lines
+ parent->set_to_nil(Cursor::kTxn);
+ // set a flag that the cursor just completed an Insert-or-find
+ // operation; this information is needed in ham_cursor_move
+ // (in this aspect, an erase is the same as insert/find)
+ parent->set_lastop(Cursor::kLookupOrInsert);
+
+ cursor = op->cursor_list();
+ }
+
+ op = op->get_previous_in_node();
+ }
+}
+
+ham_status_t
+LocalDatabase::copy_record(LocalDatabase *db, Transaction *txn,
+ TransactionOperation *op, ham_record_t *record)
+{
+ ByteArray *arena = &db->record_arena(txn);
+
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ arena->resize(op->get_record()->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, op->get_record()->data, op->get_record()->size);
+ record->size = op->get_record()->size;
+ return (0);
+}
+
+void
+LocalDatabase::nil_all_cursors_in_btree(Context *context, Cursor *current,
+ ham_key_t *key)
+{
+ Cursor *c = m_cursor_list;
+
+ /* foreach cursor in this database:
+ * if it's nil or coupled to the txn: skip it
+ * if it's coupled to btree AND uncoupled: compare keys; set to nil
+ * if keys are identical
+ * if it's uncoupled to btree AND coupled: compare keys; set to nil
+ * if keys are identical; (TODO - improve performance by nil'ling
+ * all other cursors from the same btree page)
+ *
+ * do NOT nil the current cursor - it's coupled to the key, and the
+ * coupled key is still needed by the caller
+ */
+ while (c) {
+ if (c->is_nil(0) || c == current)
+ goto next;
+ if (c->is_coupled_to_txnop())
+ goto next;
+
+ if (c->get_btree_cursor()->points_to(context, key)) {
+ /* is the current cursor to a duplicate? then adjust the
+ * coupled duplicate index of all cursors which point to a
+ * duplicate */
+ if (current) {
+ if (current->get_dupecache_index()) {
+ if (current->get_dupecache_index() < c->get_dupecache_index()) {
+ c->set_dupecache_index(c->get_dupecache_index() - 1);
+ goto next;
+ }
+ else if (current->get_dupecache_index() > c->get_dupecache_index()) {
+ goto next;
+ }
+ /* else fall through */
+ }
+ }
+ c->set_to_nil(0);
+ }
+next:
+ c = c->get_next();
+ }
+}
+
+ham_status_t
+LocalDatabase::flush_txn_operation(Context *context, LocalTransaction *txn,
+ TransactionOperation *op)
+{
+ ham_status_t st = 0;
+ TransactionNode *node = op->get_node();
+
+ /*
+ * depending on the type of the operation: actually perform the
+ * operation on the btree
+ *
+ * if the txn-op has a cursor attached, then all (txn)cursors
+ * which are coupled to this op have to be uncoupled, and their
+ * parent (btree) cursor must be coupled to the btree item instead.
+ */
+ if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)
+ || (op->get_flags() & TransactionOperation::kInsertDuplicate)) {
+ uint32_t additional_flag =
+ (op->get_flags() & TransactionOperation::kInsertDuplicate)
+ ? HAM_DUPLICATE
+ : HAM_OVERWRITE;
+ if (!op->cursor_list()) {
+ st = m_btree_index->insert(context, 0, node->get_key(), op->get_record(),
+ op->get_orig_flags() | additional_flag);
+ }
+ else {
+ TransactionCursor *tc1 = op->cursor_list();
+ Cursor *c1 = tc1->get_parent();
+ /* pick the first cursor, get the parent/btree cursor and
+ * insert the key/record pair in the btree. The btree cursor
+ * then will be coupled to this item. */
+ st = m_btree_index->insert(context, c1, node->get_key(), op->get_record(),
+ op->get_orig_flags() | additional_flag);
+ if (!st) {
+ /* uncouple the cursor from the txn-op, and remove it */
+ c1->couple_to_btree(); // TODO merge these two calls
+ c1->set_to_nil(Cursor::kTxn);
+
+ /* all other (btree) cursors need to be coupled to the same
+ * item as the first one. */
+ TransactionCursor *tc2;
+ while ((tc2 = op->cursor_list())) {
+ Cursor *c2 = tc2->get_parent();
+ c2->get_btree_cursor()->clone(c1->get_btree_cursor());
+ c2->couple_to_btree(); // TODO merge these two calls
+ c2->set_to_nil(Cursor::kTxn);
+ }
+ }
+ }
+ }
+ else if (op->get_flags() & TransactionOperation::kErase) {
+ st = m_btree_index->erase(context, 0, node->get_key(),
+ op->get_referenced_dupe(), op->get_flags());
+ if (st == HAM_KEY_NOT_FOUND)
+ st = 0;
+ }
+
+ return (st);
+}
+
+ham_status_t
+LocalDatabase::drop(Context *context)
+{
+ m_btree_index->release(context);
+ return (0);
+}
+
+ham_status_t
+LocalDatabase::insert_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ lenv()->page_manager()->purge_cache(context);
+
+ /*
+ * if transactions are enabled: only insert the key/record pair into
+ * the Transaction structure. Otherwise immediately write to the btree.
+ */
+ if (context->txn || m_env->get_flags() & HAM_ENABLE_TRANSACTIONS)
+ st = insert_txn(context, key, record, flags, cursor
+ ? cursor->get_txn_cursor()
+ : 0);
+ else
+ st = m_btree_index->insert(context, cursor, key, record, flags);
+
+ // couple the cursor to the inserted key
+ if (st == 0 && cursor) {
+ if (m_env->get_flags() & HAM_ENABLE_TRANSACTIONS) {
+ DupeCache *dc = cursor->get_dupecache();
+ // TODO required? should have happened in insert_txn
+ cursor->couple_to_txnop();
+ /* the cursor is coupled to the txn-op; nil the btree-cursor to
+ * trigger a sync() call when fetching the duplicates */
+ // TODO merge with the line above
+ cursor->set_to_nil(Cursor::kBtree);
+
+ /* reset the dupecache, otherwise cursor->get_dupecache_count()
+ * does not update the dupecache correctly */
+ dc->clear();
+
+ /* if duplicate keys are enabled: set the duplicate index of
+ * the new key */
+ if (st == 0 && cursor->get_dupecache_count(context)) {
+ TransactionOperation *op = cursor->get_txn_cursor()->get_coupled_op();
+ ham_assert(op != 0);
+
+ for (uint32_t i = 0; i < dc->get_count(); i++) {
+ DupeCacheLine *l = dc->get_element(i);
+ if (!l->use_btree() && l->get_txn_op() == op) {
+ cursor->set_dupecache_index(i + 1);
+ break;
+ }
+ }
+ }
+ }
+ else {
+ // TODO required? should have happened in BtreeInsertAction
+ cursor->couple_to_btree();
+ }
+
+ /* set a flag that the cursor just completed an Insert-or-find
+ * operation; this information is needed in ham_cursor_move */
+ cursor->set_lastop(Cursor::kLookupOrInsert);
+ }
+
+ return (st);
+}
+
+ham_status_t
+LocalDatabase::find_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags)
+{
+ /* purge cache if necessary */
+ lenv()->page_manager()->purge_cache(context);
+
+ /*
+ * if transactions are enabled: read keys from transaction trees,
+ * otherwise read immediately from disk
+ */
+ if (context->txn || m_env->get_flags() & HAM_ENABLE_TRANSACTIONS)
+ return (find_txn(context, cursor, key, record, flags));
+
+ return (m_btree_index->find(context, cursor, key, &key_arena(context->txn),
+ record, &record_arena(context->txn), flags));
+}
+
+ham_status_t
+LocalDatabase::erase_impl(Context *context, Cursor *cursor, ham_key_t *key,
+ uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ /*
+ * if transactions are enabled: append a 'erase key' operation into
+ * the txn tree; otherwise immediately erase the key from disk
+ */
+ if (context->txn || m_env->get_flags() & HAM_ENABLE_TRANSACTIONS) {
+ if (cursor) {
+ /*
+ * !!
+ * we have two cases:
+ *
+ * 1. the cursor is coupled to a btree item (or uncoupled, but not nil)
+ * and the txn_cursor is nil; in that case, we have to
+ * - uncouple the btree cursor
+ * - insert the erase-op for the key which is used by the btree cursor
+ *
+ * 2. the cursor is coupled to a txn-op; in this case, we have to
+ * - insert the erase-op for the key which is used by the txn-op
+ *
+ * TODO clean up this whole mess. code should be like
+ *
+ * if (txn)
+ * erase_txn(txn, cursor->get_key(), 0, cursor->get_txn_cursor());
+ */
+ /* case 1 described above */
+ if (cursor->is_coupled_to_btree()) {
+ cursor->set_to_nil(Cursor::kTxn);
+ cursor->get_btree_cursor()->uncouple_from_page(context);
+ st = erase_txn(context, cursor->get_btree_cursor()->get_uncoupled_key(),
+ 0, cursor->get_txn_cursor());
+ }
+ /* case 2 described above */
+ else {
+ // TODO this line is ugly
+ st = erase_txn(context,
+ cursor->get_txn_cursor()->get_coupled_op()->get_key(),
+ 0, cursor->get_txn_cursor());
+ }
+ }
+ else {
+ st = erase_txn(context, key, flags, 0);
+ }
+ }
+ else {
+ st = m_btree_index->erase(context, cursor, key, 0, flags);
+ }
+
+ /* on success: verify that cursor is now nil */
+ if (cursor && st == 0) {
+ cursor->set_to_nil(0);
+ cursor->couple_to_btree(); // TODO why?
+ ham_assert(cursor->get_txn_cursor()->is_nil());
+ ham_assert(cursor->is_nil(0));
+ cursor->clear_dupecache(); // TODO merge with set_to_nil()
+ }
+
+ return (st);
+}
+
+ham_status_t
+LocalDatabase::finalize(Context *context, ham_status_t status,
+ Transaction *local_txn)
+{
+ LocalEnvironment *env = lenv();
+
+ if (status) {
+ if (local_txn) {
+ context->changeset.clear();
+ env->txn_manager()->abort(local_txn);
+ }
+ return (status);
+ }
+
+ if (local_txn) {
+ context->changeset.clear();
+ env->txn_manager()->commit(local_txn);
+ }
+ else if (env->get_flags() & HAM_ENABLE_RECOVERY
+ && !(env->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ context->changeset.flush(env->next_lsn());
+ }
+ return (0);
+}
+
+LocalTransaction *
+LocalDatabase::begin_temp_txn()
+{
+ LocalTransaction *txn;
+ ham_status_t st = lenv()->txn_begin((Transaction **)&txn, 0,
+ HAM_TXN_TEMPORARY | HAM_DONT_LOCK);
+ if (st)
+ throw Exception(st);
+ return (txn);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.h b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.h
new file mode 100644
index 0000000000..0d08bd79ed
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_local.h
@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: no
+ */
+
+#ifndef HAM_DB_LOCAL_H
+#define HAM_DB_LOCAL_H
+
+#include "0root/root.h"
+
+#include <limits>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/scoped_ptr.h"
+#include "3btree/btree_index.h"
+#include "4txn/txn_local.h"
+#include "4db/db.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class TransactionNode;
+class TransactionIndex;
+class TransactionCursor;
+class TransactionOperation;
+class LocalEnvironment;
+class LocalTransaction;
+
+template<typename T>
+class RecordNumberFixture;
+
+//
+// The database implementation for local file access
+//
+class LocalDatabase : public Database {
+ public:
+ enum {
+ // The default threshold for inline records
+ kInlineRecordThreshold = 32
+ };
+
+ // Constructor
+ LocalDatabase(Environment *env, DatabaseConfiguration &config)
+ : Database(env, config), m_recno(0), m_cmp_func(0) {
+ }
+
+ // Returns the btree index
+ BtreeIndex *btree_index() {
+ return (m_btree_index.get());
+ }
+
+ // Returns the transactional index
+ TransactionIndex *txn_index() {
+ return (m_txn_index.get());
+ }
+
+ // Returns the LocalEnvironment instance
+ LocalEnvironment *lenv() {
+ return ((LocalEnvironment *)m_env);
+ }
+
+ // Creates a new Database
+ ham_status_t create(Context *context, PBtreeHeader *btree_header);
+
+ // Opens an existing Database
+ ham_status_t open(Context *context, PBtreeHeader *btree_header);
+
+ // Erases this Database
+ ham_status_t drop(Context *context);
+
+ // Fills in the current metrics
+ virtual void fill_metrics(ham_env_metrics_t *metrics);
+
+ // Returns Database parameters (ham_db_get_parameters)
+ virtual ham_status_t get_parameters(ham_parameter_t *param);
+
+ // Checks Database integrity (ham_db_check_integrity)
+ virtual ham_status_t check_integrity(uint32_t flags);
+
+ // Returns the number of keys
+ virtual ham_status_t count(Transaction *txn, bool distinct,
+ uint64_t *pcount);
+
+ // Scans the whole database, applies a processor function
+ virtual ham_status_t scan(Transaction *txn, ScanVisitor *visitor,
+ bool distinct);
+
+ // Inserts a key/value pair (ham_db_insert, ham_cursor_insert)
+ virtual ham_status_t insert(Cursor *cursor, Transaction *txn,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // Erase a key/value pair (ham_db_erase, ham_cursor_erase)
+ virtual ham_status_t erase(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ uint32_t flags);
+
+ // Lookup of a key/value pair (ham_db_find, ham_cursor_find)
+ virtual ham_status_t find(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+ // Returns number of duplicates (ham_cursor_get_record_count)
+ virtual ham_status_t cursor_get_record_count(Cursor *cursor, uint32_t flags,
+ uint32_t *pcount);
+
+ // Returns position in duplicate list (ham_cursor_get_duplicate_position)
+ virtual ham_status_t cursor_get_duplicate_position(Cursor *cursor,
+ uint32_t *pposition);
+
+ // Get current record size (ham_cursor_get_record_size)
+ virtual ham_status_t cursor_get_record_size(Cursor *cursor,
+ uint64_t *psize);
+
+ // Overwrites the record of a cursor (ham_cursor_overwrite)
+ virtual ham_status_t cursor_overwrite(Cursor *cursor,
+ ham_record_t *record, uint32_t flags);
+
+ // Moves a cursor, returns key and/or record (ham_cursor_move)
+ virtual ham_status_t cursor_move(Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+ // Inserts a key/record pair in a txn node; if cursor is not NULL it will
+ // be attached to the new txn_op structure
+ // TODO this should be private
+ ham_status_t insert_txn(Context *context, ham_key_t *key,
+ ham_record_t *record, uint32_t flags,
+ TransactionCursor *cursor);
+
+ // Returns the default comparison function
+ ham_compare_func_t compare_func() {
+ return (m_cmp_func);
+ }
+
+ // Sets the default comparison function (ham_db_set_compare_func)
+ ham_status_t set_compare_func(ham_compare_func_t f) {
+ if (m_config.key_type != HAM_TYPE_CUSTOM) {
+ ham_trace(("ham_set_compare_func only allowed for HAM_TYPE_CUSTOM "
+ "databases!"));
+ return (HAM_INV_PARAMETER);
+ }
+ m_cmp_func = f;
+ return (0);
+ }
+
+ // Flushes a TransactionOperation to the btree
+ // TODO should be private
+ ham_status_t flush_txn_operation(Context *context, LocalTransaction *txn,
+ TransactionOperation *op);
+
+ protected:
+ friend class Cursor;
+
+ // Copies the ham_record_t structure from |op| into |record|
+ static ham_status_t copy_record(LocalDatabase *db, Transaction *txn,
+ TransactionOperation *op, ham_record_t *record);
+
+ // Creates a cursor; this is the actual implementation
+ virtual Cursor *cursor_create_impl(Transaction *txn, uint32_t flags);
+
+ // Clones a cursor; this is the actual implementation
+ virtual Cursor *cursor_clone_impl(Cursor *src);
+
+ // Closes a cursor; this is the actual implementation
+ virtual void cursor_close_impl(Cursor *c);
+
+ // Closes a database; this is the actual implementation
+ virtual ham_status_t close_impl(uint32_t flags);
+
+ private:
+ friend struct DbFixture;
+ friend struct HamsterdbFixture;
+ friend struct ExtendedKeyFixture;
+ friend class RecordNumberFixture<uint32_t>;
+ friend class RecordNumberFixture<uint64_t>;
+
+ // Erases a key/record pair from a txn; on success, cursor will be set to
+ // nil
+ ham_status_t erase_txn(Context *context, ham_key_t *key, uint32_t flags,
+ TransactionCursor *cursor);
+
+ // Lookup of a key/record pair in the Transaction index and in the btree,
+ // if transactions are disabled/not successful; copies the
+ // record into |record|. Also performs approx. matching.
+ ham_status_t find_txn(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // Moves a cursor, returns key and/or record (ham_cursor_move)
+ ham_status_t cursor_move_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // The actual implementation of insert()
+ ham_status_t insert_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // The actual implementation of find()
+ ham_status_t find_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // The actual implementation of erase()
+ ham_status_t erase_impl(Context *context, Cursor *cursor,
+ ham_key_t *key, uint32_t flags);
+
+ // Finalizes an operation by committing or aborting the |local_txn|
+ // and clearing or flushing the Changeset.
+ // Returns |status|.
+ ham_status_t finalize(Context *context, ham_status_t status,
+ Transaction *local_txn);
+
+ // Begins a new temporary Transaction
+ LocalTransaction *begin_temp_txn();
+
+ // returns the next record number
+ uint64_t next_record_number() {
+ m_recno++;
+ if (m_config.flags & HAM_RECORD_NUMBER32
+ && m_recno > std::numeric_limits<uint32_t>::max())
+ throw Exception(HAM_LIMITS_REACHED);
+ else if (m_recno == 0)
+ throw Exception(HAM_LIMITS_REACHED);
+ return (m_recno);
+ }
+
+ // Checks if an insert operation conflicts with another txn; this is the
+ // case if the same key is modified by another active txn.
+ ham_status_t check_insert_conflicts(Context *context, TransactionNode *node,
+ ham_key_t *key, uint32_t flags);
+
+ // Checks if an erase operation conflicts with another txn; this is the
+ // case if the same key is modified by another active txn.
+ ham_status_t check_erase_conflicts(Context *context, TransactionNode *node,
+ ham_key_t *key, uint32_t flags);
+
+ // Increments dupe index of all cursors with a dupe index > |start|;
+ // only cursor |skip| is ignored
+ void increment_dupe_index(Context *context, TransactionNode *node,
+ Cursor *skip, uint32_t start);
+
+ // Sets all cursors attached to a TransactionNode to nil
+ void nil_all_cursors_in_node(LocalTransaction *txn, Cursor *current,
+ TransactionNode *node);
+
+ // Sets all cursors to nil if they point to |key| in the btree index
+ void nil_all_cursors_in_btree(Context *context, Cursor *current,
+ ham_key_t *key);
+
+ // the current record number
+ uint64_t m_recno;
+
+ // the btree index
+ ScopedPtr<BtreeIndex> m_btree_index;
+
+ // the transaction index
+ ScopedPtr<TransactionIndex> m_txn_index;
+
+ // the comparison function
+ ham_compare_func_t m_cmp_func;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_DB_LOCAL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.cc b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.cc
new file mode 100644
index 0000000000..58bd49f4db
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.cc
@@ -0,0 +1,635 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/scoped_ptr.h"
+#include "2protobuf/protocol.h"
+#include "4db/db_remote.h"
+#include "4env/env_remote.h"
+#include "4txn/txn_remote.h"
+#include "4cursor/cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+ham_status_t
+RemoteDatabase::get_parameters(ham_parameter_t *param)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ Protocol request(Protocol::DB_GET_PARAMETERS_REQUEST);
+ request.mutable_db_get_parameters_request()->set_db_handle(m_remote_handle);
+
+ ham_parameter_t *p = param;
+ if (p) {
+ for (; p->name; p++)
+ request.mutable_db_get_parameters_request()->add_names(p->name);
+ }
+
+ ScopedPtr<Protocol> reply(env->perform_request(&request));
+
+ ham_assert(reply->has_db_get_parameters_reply());
+
+ ham_status_t st = reply->db_get_parameters_reply().status();
+ if (st)
+ throw Exception(st);
+
+ p = param;
+ while (p && p->name) {
+ switch (p->name) {
+ case HAM_PARAM_FLAGS:
+ ham_assert(reply->db_get_parameters_reply().has_flags());
+ p->value = reply->db_get_parameters_reply().flags();
+ break;
+ case HAM_PARAM_KEY_SIZE:
+ ham_assert(reply->db_get_parameters_reply().has_key_size());
+ p->value = reply->db_get_parameters_reply().key_size();
+ break;
+ case HAM_PARAM_RECORD_SIZE:
+ ham_assert(reply->db_get_parameters_reply().has_record_size());
+ p->value = reply->db_get_parameters_reply().record_size();
+ break;
+ case HAM_PARAM_KEY_TYPE:
+ ham_assert(reply->db_get_parameters_reply().has_key_type());
+ p->value = reply->db_get_parameters_reply().key_type();
+ break;
+ case HAM_PARAM_DATABASE_NAME:
+ ham_assert(reply->db_get_parameters_reply().has_dbname());
+ p->value = reply->db_get_parameters_reply().dbname();
+ break;
+ case HAM_PARAM_MAX_KEYS_PER_PAGE:
+ ham_assert(reply->db_get_parameters_reply().has_keys_per_page());
+ p->value = reply->db_get_parameters_reply().keys_per_page();
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)p->name));
+ break;
+ }
+ p++;
+ }
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::check_integrity(uint32_t flags)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ Protocol request(Protocol::DB_CHECK_INTEGRITY_REQUEST);
+ request.mutable_db_check_integrity_request()->set_db_handle(m_remote_handle);
+ request.mutable_db_check_integrity_request()->set_flags(flags);
+
+ std::auto_ptr<Protocol> reply(env->perform_request(&request));
+
+ ham_assert(reply->has_db_check_integrity_reply());
+
+ return (reply->db_check_integrity_reply().status());
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::count(Transaction *htxn, bool distinct, uint64_t *pcount)
+{
+ try {
+ RemoteEnvironment *env = renv();
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(htxn);
+
+ SerializedWrapper request;
+ request.id = kDbGetKeyCountRequest;
+ request.db_count_request.db_handle = m_remote_handle;
+ request.db_count_request.txn_handle = txn
+ ? txn->get_remote_handle()
+ : 0;
+ request.db_count_request.distinct = distinct;
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+
+ ham_assert(reply.id == kDbGetKeyCountReply);
+
+ ham_status_t st = reply.db_count_reply.status;
+ if (st)
+ return (st);
+
+ *pcount = reply.db_count_reply.keycount;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::insert(Cursor *cursor, Transaction *htxn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ try {
+ bool send_key = true;
+ RemoteEnvironment *env = renv();
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(htxn);
+
+ ByteArray *arena = &key_arena(txn);
+
+ /* recno: do not send the key */
+ if (get_flags() & HAM_RECORD_NUMBER32) {
+ send_key = false;
+ if (!key->data) {
+ arena->resize(sizeof(uint32_t));
+ key->data = arena->get_ptr();
+ key->size = sizeof(uint32_t);
+ }
+ }
+ else if (get_flags() & HAM_RECORD_NUMBER64) {
+ send_key = false;
+ if (!key->data) {
+ arena->resize(sizeof(uint64_t));
+ key->data = arena->get_ptr();
+ key->size = sizeof(uint64_t);
+ }
+ }
+
+ SerializedWrapper request;
+ SerializedWrapper reply;
+
+ if (cursor) {
+ SerializedWrapper request;
+ request.id = kCursorInsertRequest;
+ request.cursor_insert_request.cursor_handle = cursor->get_remote_handle();
+ request.cursor_insert_request.flags = flags;
+ if (send_key) {
+ request.cursor_insert_request.has_key = true;
+ request.cursor_insert_request.key.has_data = true;
+ request.cursor_insert_request.key.data.size = key->size;
+ request.cursor_insert_request.key.data.value = (uint8_t *)key->data;
+ request.cursor_insert_request.key.flags = key->flags;
+ request.cursor_insert_request.key.intflags = key->_flags;
+ }
+ if (record) {
+ request.cursor_insert_request.has_record = true;
+ request.cursor_insert_request.record.has_data = true;
+ request.cursor_insert_request.record.data.size = record->size;
+ request.cursor_insert_request.record.data.value = (uint8_t *)record->data;
+ request.cursor_insert_request.record.flags = record->flags;
+ request.cursor_insert_request.record.partial_size = record->partial_size;
+ request.cursor_insert_request.record.partial_offset = record->partial_offset;
+ }
+
+ env->perform_request(&request, &reply);
+
+ ham_assert(reply.id == kCursorInsertReply);
+
+ ham_status_t st = reply.cursor_insert_reply.status;
+ if (st)
+ return (st);
+
+ if (reply.cursor_insert_reply.has_key) {
+ ham_assert(key->size == reply.cursor_insert_reply.key.data.size);
+ ham_assert(key->data != 0);
+ ::memcpy(key->data, reply.cursor_insert_reply.key.data.value, key->size);
+ }
+ }
+ else {
+ request.id = kDbInsertRequest;
+ request.db_insert_request.db_handle = m_remote_handle;
+ request.db_insert_request.txn_handle = txn ? txn->get_remote_handle() : 0;
+ request.db_insert_request.flags = flags;
+ if (key && !(get_flags() & (HAM_RECORD_NUMBER32 | HAM_RECORD_NUMBER64))) {
+ request.db_insert_request.has_key = true;
+ request.db_insert_request.key.has_data = true;
+ request.db_insert_request.key.data.size = key->size;
+ request.db_insert_request.key.data.value = (uint8_t *)key->data;
+ request.db_insert_request.key.flags = key->flags;
+ request.db_insert_request.key.intflags = key->_flags;
+ }
+ if (record) {
+ request.db_insert_request.has_record = true;
+ request.db_insert_request.record.has_data = true;
+ request.db_insert_request.record.data.size = record->size;
+ request.db_insert_request.record.data.value = (uint8_t *)record->data;
+ request.db_insert_request.record.flags = record->flags;
+ request.db_insert_request.record.partial_size = record->partial_size;
+ request.db_insert_request.record.partial_offset = record->partial_offset;
+ }
+
+ env->perform_request(&request, &reply);
+
+ ham_assert(reply.id == kDbInsertReply);
+
+ ham_status_t st = reply.db_insert_reply.status;
+ if (st)
+ return (st);
+
+ if (reply.db_insert_reply.has_key) {
+ ham_assert(key->data != 0);
+ ham_assert(key->size == reply.db_insert_reply.key.data.size);
+ ::memcpy(key->data, reply.db_insert_reply.key.data.value, key->size);
+ }
+ }
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::erase(Cursor *cursor, Transaction *htxn, ham_key_t *key,
+ uint32_t flags)
+{
+ try {
+ if (cursor) {
+ SerializedWrapper request;
+ request.id = kCursorEraseRequest;
+ request.cursor_erase_request.cursor_handle = cursor->get_remote_handle();
+ request.cursor_erase_request.flags = flags;
+
+ SerializedWrapper reply;
+ renv()->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorEraseReply);
+ return (reply.cursor_erase_reply.status);
+ }
+
+ RemoteEnvironment *env = renv();
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(htxn);
+
+ SerializedWrapper request;
+ request.id = kDbEraseRequest;
+ request.db_erase_request.db_handle = m_remote_handle;
+ request.db_erase_request.txn_handle = txn ? txn->get_remote_handle() : 0;
+ request.db_erase_request.flags = flags;
+ request.db_erase_request.key.has_data = true;
+ request.db_erase_request.key.data.size = key->size;
+ request.db_erase_request.key.data.value = (uint8_t *)key->data;
+ request.db_erase_request.key.flags = key->flags;
+ request.db_erase_request.key.intflags = key->_flags;
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+
+ ham_assert(reply.id == kDbEraseReply);
+
+ return (reply.db_erase_reply.status);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::find(Cursor *cursor, Transaction *htxn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ try {
+ if (cursor && !htxn)
+ htxn = cursor->get_txn();
+
+ RemoteEnvironment *env = renv();
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(htxn);
+
+ SerializedWrapper request;
+ request.id = kDbFindRequest;
+ request.db_find_request.db_handle = m_remote_handle;
+ request.db_find_request.cursor_handle = cursor ? cursor->get_remote_handle() : 0;
+ request.db_find_request.txn_handle = txn ? txn->get_remote_handle() : 0;
+ request.db_find_request.flags = flags;
+ request.db_find_request.key.has_data = true;
+ request.db_find_request.key.data.size = key->size;
+ request.db_find_request.key.data.value = (uint8_t *)key->data;
+ request.db_find_request.key.flags = key->flags;
+ request.db_find_request.key.intflags = key->_flags;
+ if (record) {
+ request.db_find_request.has_record = true;
+ request.db_find_request.record.has_data = true;
+ request.db_find_request.record.data.size = record->size;
+ request.db_find_request.record.data.value = (uint8_t *)record->data;
+ request.db_find_request.record.flags = record->flags;
+ request.db_find_request.record.partial_size = record->partial_size;
+ request.db_find_request.record.partial_offset = record->partial_offset;
+ }
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+ ham_assert(reply.id == kDbFindReply);
+
+ ByteArray *pkey_arena = &key_arena(txn);
+ ByteArray *rec_arena = &record_arena(txn);
+
+ ham_status_t st = reply.db_find_reply.status;
+ if (st == 0) {
+ /* approx. matching: need to copy the _flags and the key data! */
+ if (reply.db_find_reply.has_key) {
+ ham_assert(key);
+ key->_flags = reply.db_find_reply.key.intflags;
+ key->size = (uint16_t)reply.db_find_reply.key.data.size;
+ if (!(key->flags & HAM_KEY_USER_ALLOC)) {
+ pkey_arena->resize(key->size);
+ key->data = pkey_arena->get_ptr();
+ }
+ ::memcpy(key->data, (void *)reply.db_find_reply.key.data.value,
+ key->size);
+ }
+ if (record && reply.db_find_reply.has_record) {
+ record->size = reply.db_find_reply.record.data.size;
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ rec_arena->resize(record->size);
+ record->data = rec_arena->get_ptr();
+ }
+ ::memcpy(record->data, (void *)reply.db_find_reply.record.data.value,
+ record->size);
+ }
+ }
+ return (st);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+Cursor *
+RemoteDatabase::cursor_create_impl(Transaction *htxn, uint32_t flags)
+{
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(htxn);
+
+ SerializedWrapper request;
+ request.id = kCursorCreateRequest;
+ request.cursor_create_request.db_handle = m_remote_handle;
+ request.cursor_create_request.txn_handle = txn
+ ? txn->get_remote_handle()
+ : 0;
+ request.cursor_create_request.flags = flags;
+
+ SerializedWrapper reply;
+ renv()->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorCreateReply);
+ ham_status_t st = reply.cursor_create_reply.status;
+ if (st)
+ return (0);
+
+ Cursor *c = new Cursor((LocalDatabase *)this); // TODO this cast is evil!!
+ c->set_remote_handle(reply.cursor_create_reply.cursor_handle);
+ return (c);
+}
+
+Cursor *
+RemoteDatabase::cursor_clone_impl(Cursor *src)
+{
+ SerializedWrapper request;
+ request.id = kCursorCloneRequest;
+ request.cursor_clone_request.cursor_handle = src->get_remote_handle();
+
+ SerializedWrapper reply;
+ renv()->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorCloneReply);
+ ham_status_t st = reply.cursor_clone_reply.status;
+ if (st)
+ return (0);
+
+ Cursor *c = new Cursor(src->get_db());
+ c->set_remote_handle(reply.cursor_clone_reply.cursor_handle);
+ return (c);
+}
+
+ham_status_t
+RemoteDatabase::cursor_get_record_count(Cursor *cursor, uint32_t flags,
+ uint32_t *pcount)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ SerializedWrapper request;
+ request.id = kCursorGetRecordCountRequest;
+ request.cursor_get_record_count_request.cursor_handle =
+ cursor->get_remote_handle();
+ request.cursor_get_record_count_request.flags = flags;
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorGetRecordCountReply);
+
+ ham_status_t st = reply.cursor_get_record_count_reply.status;
+ if (st == 0)
+ *pcount = reply.cursor_get_record_count_reply.count;
+ else
+ *pcount = 0;
+ return (st);
+ }
+ catch (Exception &ex) {
+ *pcount = 0;
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::cursor_get_duplicate_position(Cursor *cursor,
+ uint32_t *pposition)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ SerializedWrapper request;
+ request.id = kCursorGetDuplicatePositionRequest;
+ request.cursor_get_duplicate_position_request.cursor_handle =
+ cursor->get_remote_handle();
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorGetDuplicatePositionReply);
+
+ ham_status_t st = reply.cursor_get_duplicate_position_reply.status;
+ if (st == 0)
+ *pposition = reply.cursor_get_duplicate_position_reply.position;
+ return (st);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::cursor_get_record_size(Cursor *cursor, uint64_t *psize)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ SerializedWrapper request;
+ request.id = kCursorGetRecordSizeRequest;
+ request.cursor_get_record_size_request.cursor_handle =
+ cursor->get_remote_handle();
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorGetRecordSizeReply);
+
+ ham_status_t st = reply.cursor_get_record_size_reply.status;
+ if (st == 0)
+ *psize = reply.cursor_get_record_size_reply.size;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::cursor_overwrite(Cursor *cursor,
+ ham_record_t *record, uint32_t flags)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ SerializedWrapper request;
+ request.id = kCursorOverwriteRequest;
+ request.cursor_overwrite_request.cursor_handle = cursor->get_remote_handle();
+ request.cursor_overwrite_request.flags = flags;
+
+ if (record->size > 0) {
+ request.cursor_overwrite_request.record.has_data = true;
+ request.cursor_overwrite_request.record.data.size = record->size;
+ request.cursor_overwrite_request.record.data.value = (uint8_t *)record->data;
+ }
+ request.cursor_overwrite_request.record.flags = record->flags;
+ request.cursor_overwrite_request.record.partial_size = record->partial_size;
+ request.cursor_overwrite_request.record.partial_offset = record->partial_offset;
+
+ SerializedWrapper reply;
+ env->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorOverwriteReply);
+
+ return (reply.cursor_overwrite_reply.status);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+RemoteDatabase::cursor_move(Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ try {
+ RemoteEnvironment *env = renv();
+
+ RemoteTransaction *txn = dynamic_cast<RemoteTransaction *>(cursor->get_txn());
+ ByteArray *pkey_arena = &key_arena(txn);
+ ByteArray *prec_arena = &record_arena(txn);
+
+ Protocol request(Protocol::CURSOR_MOVE_REQUEST);
+ request.mutable_cursor_move_request()->set_cursor_handle(cursor->get_remote_handle());
+ request.mutable_cursor_move_request()->set_flags(flags);
+ if (key)
+ Protocol::assign_key(request.mutable_cursor_move_request()->mutable_key(),
+ key, false);
+ if (record)
+ Protocol::assign_record(request.mutable_cursor_move_request()->mutable_record(),
+ record, false);
+
+ ScopedPtr<Protocol> reply(env->perform_request(&request));
+
+ ham_assert(reply->has_cursor_move_reply() != 0);
+
+ ham_status_t st = reply->cursor_move_reply().status();
+ if (st)
+ return (st);
+
+ /* modify key/record, but make sure that USER_ALLOC is respected! */
+ if (reply->cursor_move_reply().has_key()) {
+ ham_assert(key);
+ key->_flags = reply->cursor_move_reply().key().intflags();
+ key->size = (uint16_t)reply->cursor_move_reply().key().data().size();
+ if (!(key->flags & HAM_KEY_USER_ALLOC)) {
+ pkey_arena->resize(key->size);
+ key->data = pkey_arena->get_ptr();
+ }
+ memcpy(key->data, (void *)&reply->cursor_move_reply().key().data()[0],
+ key->size);
+ }
+
+ /* same for the record */
+ if (reply->cursor_move_reply().has_record()) {
+ ham_assert(record);
+ record->size = reply->cursor_move_reply().record().data().size();
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ prec_arena->resize(record->size);
+ record->data = prec_arena->get_ptr();
+ }
+ memcpy(record->data, (void *)&reply->cursor_move_reply().record().data()[0],
+ record->size);
+ }
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+void
+RemoteDatabase::cursor_close_impl(Cursor *cursor)
+{
+ SerializedWrapper request;
+ request.id = kCursorCloseRequest;
+ request.cursor_close_request.cursor_handle = cursor->get_remote_handle();
+
+ SerializedWrapper reply;
+ renv()->perform_request(&request, &reply);
+ ham_assert(reply.id == kCursorCloseReply);
+}
+
+ham_status_t
+RemoteDatabase::close_impl(uint32_t flags)
+{
+ RemoteEnvironment *env = renv();
+
+ // do not set HAM_DONT_LOCK over the network
+ flags &= ~HAM_DONT_LOCK;
+
+ Protocol request(Protocol::DB_CLOSE_REQUEST);
+ request.mutable_db_close_request()->set_db_handle(m_remote_handle);
+ request.mutable_db_close_request()->set_flags(flags);
+
+ ScopedPtr<Protocol> reply(env->perform_request(&request));
+
+ ham_assert(reply->has_db_close_reply());
+
+ ham_status_t st = reply->db_close_reply().status();
+ if (st == 0)
+ m_remote_handle = 0;
+
+ return (st);
+}
+
+
+} // namespace hamsterdb
+
+#endif // HAM_ENABLE_REMOTE
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.h b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.h
new file mode 100644
index 0000000000..1a492418bc
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4db/db_remote.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_DB_REMOTE_H
+#define HAM_DB_REMOTE_H
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4db/db.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class Environment;
+class RemoteEnvironment;
+
+/*
+ * The database implementation for remote file access
+ */
+class RemoteDatabase : public Database
+{
+ public:
+ RemoteDatabase(Environment *env, DatabaseConfiguration config,
+ uint64_t remote_handle)
+ : Database(env, config), m_remote_handle(remote_handle) {
+ }
+
+ // Fills in the current metrics
+ virtual void fill_metrics(ham_env_metrics_t *metrics) { }
+
+ // Returns Database parameters (ham_db_get_parameters)
+ virtual ham_status_t get_parameters(ham_parameter_t *param);
+
+ // Checks Database integrity (ham_db_check_integrity)
+ virtual ham_status_t check_integrity(uint32_t flags);
+
+ // Returns the number of keys
+ virtual ham_status_t count(Transaction *txn, bool distinct,
+ uint64_t *pcount);
+
+ // Scans the whole database, applies a processor function
+ virtual ham_status_t scan(Transaction *txn, ScanVisitor *visitor,
+ bool distinct) {
+ return (HAM_NOT_IMPLEMENTED);
+ }
+
+ // Inserts a key/value pair (ham_db_insert, ham_cursor_insert)
+ virtual ham_status_t insert(Cursor *cursor, Transaction *txn,
+ ham_key_t *key, ham_record_t *record, uint32_t flags);
+
+ // Erase a key/value pair (ham_db_erase, ham_cursor_erase)
+ virtual ham_status_t erase(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ uint32_t flags);
+
+ // Lookup of a key/value pair (ham_db_find, ham_cursor_find)
+ virtual ham_status_t find(Cursor *cursor, Transaction *txn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+ // Returns number of duplicates (ham_cursor_get_record_count)
+ virtual ham_status_t cursor_get_record_count(Cursor *cursor, uint32_t flags,
+ uint32_t *pcount);
+
+ // Returns position in duplicate list (ham_cursor_get_duplicate_position)
+ virtual ham_status_t cursor_get_duplicate_position(Cursor *cursor,
+ uint32_t *pposition);
+
+ // Get current record size (ham_cursor_get_record_size)
+ virtual ham_status_t cursor_get_record_size(Cursor *cursor,
+ uint64_t *psize);
+
+ // Overwrites the record of a cursor (ham_cursor_overwrite)
+ virtual ham_status_t cursor_overwrite(Cursor *cursor,
+ ham_record_t *record, uint32_t flags);
+
+ // Moves a cursor, returns key and/or record (ham_cursor_move)
+ virtual ham_status_t cursor_move(Cursor *cursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags);
+
+ protected:
+ // Creates a cursor; this is the actual implementation
+ virtual Cursor *cursor_create_impl(Transaction *txn, uint32_t flags);
+
+ // Clones a cursor; this is the actual implementation
+ virtual Cursor *cursor_clone_impl(Cursor *src);
+
+ // Closes a cursor; this is the actual implementation
+ virtual void cursor_close_impl(Cursor *c);
+
+ // Closes a database; this is the actual implementation
+ virtual ham_status_t close_impl(uint32_t flags);
+
+ private:
+ // Returns the RemoteEnvironment instance
+ RemoteEnvironment *renv() {
+ return ((RemoteEnvironment *)m_env);
+ }
+
+ // the remote database handle
+ uint64_t m_remote_handle;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENABLE_REMOTE */
+
+#endif /* HAM_DB_REMOTE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env.cc b/plugins/Dbx_kv/src/hamsterdb/src/4env/env.cc
new file mode 100644
index 0000000000..6e3a494f6d
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env.cc
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4db/db.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+namespace hamsterdb {
+
+ham_status_t
+Environment::create()
+{
+ try {
+ return (do_create());
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::open()
+{
+ try {
+ return (do_open());
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::get_database_names(uint16_t *names, uint32_t *count)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_get_database_names(names, count));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::get_parameters(ham_parameter_t *param)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_get_parameters(param));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::flush(uint32_t flags)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_flush(flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::create_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ try {
+ ScopedLock lock(m_mutex);
+
+ ham_status_t st = do_create_db(pdb, config, param);
+
+ // on success: store the open database in the environment's list of
+ // opened databases
+ if (st == 0) {
+ m_database_map[config.db_name] = *pdb;
+ /* flush the environment to make sure that the header page is written
+ * to disk */
+ if (st == 0)
+ st = do_flush(0);
+ }
+ else {
+ if (*pdb)
+ (void)ham_db_close((ham_db_t *)*pdb, HAM_DONT_LOCK);
+ }
+ return (st);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::open_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ try {
+ ScopedLock lock(m_mutex);
+
+ /* make sure that this database is not yet open */
+ if (m_database_map.find(config.db_name) != m_database_map.end())
+ return (HAM_DATABASE_ALREADY_OPEN);
+
+ ham_status_t st = do_open_db(pdb, config, param);
+
+ // on success: store the open database in the environment's list of
+ // opened databases
+ if (st == 0)
+ m_database_map[config.db_name] = *pdb;
+ else {
+ if (*pdb)
+ (void)ham_db_close((ham_db_t *)*pdb, HAM_DONT_LOCK);
+ }
+ return (st);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::rename_db(uint16_t oldname, uint16_t newname, uint32_t flags)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_rename_db(oldname, newname, flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::erase_db(uint16_t dbname, uint32_t flags)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_erase_db(dbname, flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::close_db(Database *db, uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ try {
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(m_mutex);
+
+ uint16_t dbname = db->name();
+
+ // flush committed Transactions
+ st = do_flush(HAM_FLUSH_COMMITTED_TRANSACTIONS);
+ if (st)
+ return (st);
+
+ st = db->close(flags);
+ if (st)
+ return (st);
+
+ m_database_map.erase(dbname);
+ delete db;
+
+ /* in-memory database: make sure that a database with the same name
+ * can be re-created */
+ if (m_config.flags & HAM_IN_MEMORY)
+ do_erase_db(dbname, 0);
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::txn_begin(Transaction **ptxn, const char *name, uint32_t flags)
+{
+ try {
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(m_mutex);
+
+ if (!(m_config.flags & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("transactions are disabled (see HAM_ENABLE_TRANSACTIONS)"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *ptxn = do_txn_begin(name, flags);
+ return (0);
+ }
+ catch (Exception &ex) {
+ *ptxn = 0;
+ return (ex.code);
+ }
+}
+
+std::string
+Environment::txn_get_name(Transaction *txn)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (txn->get_name());
+ }
+ catch (Exception &) {
+ return ("");
+ }
+}
+
+ham_status_t
+Environment::txn_commit(Transaction *txn, uint32_t flags)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_txn_commit(txn, flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::txn_abort(Transaction *txn, uint32_t flags)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ return (do_txn_abort(txn, flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::close(uint32_t flags)
+{
+ ham_status_t st = 0;
+
+ try {
+ ScopedLock lock(m_mutex);
+
+ /* auto-abort (or commit) all pending transactions */
+ if (m_txn_manager.get()) {
+ Transaction *t;
+
+ while ((t = m_txn_manager->get_oldest_txn())) {
+ if (!t->is_aborted() && !t->is_committed()) {
+ if (flags & HAM_TXN_AUTO_COMMIT)
+ st = m_txn_manager->commit(t, 0);
+ else /* if (flags & HAM_TXN_AUTO_ABORT) */
+ st = m_txn_manager->abort(t, 0);
+ if (st)
+ return (st);
+ }
+
+ m_txn_manager->flush_committed_txns();
+ }
+ }
+
+ /* flush all remaining transactions */
+ if (m_txn_manager)
+ m_txn_manager->flush_committed_txns();
+
+ /* close all databases */
+ Environment::DatabaseMap::iterator it = m_database_map.begin();
+ while (it != m_database_map.end()) {
+ Environment::DatabaseMap::iterator it2 = it; it++;
+ Database *db = it2->second;
+ if (flags & HAM_AUTO_CLEANUP)
+ st = close_db(db, flags | HAM_DONT_LOCK);
+ else
+ st = db->close(flags);
+ if (st)
+ return (st);
+ }
+ m_database_map.clear();
+
+ return (do_close(flags));
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+ham_status_t
+Environment::fill_metrics(ham_env_metrics_t *metrics)
+{
+ try {
+ ScopedLock lock(m_mutex);
+ do_fill_metrics(metrics);
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+EnvironmentTest
+Environment::test()
+{
+ return (EnvironmentTest(m_config));
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env.h
new file mode 100644
index 0000000000..c0841151df
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: nothrow
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_ENV_H
+#define HAM_ENV_H
+
+#include "0root/root.h"
+
+#include <map>
+#include <string>
+
+#include "ham/hamsterdb_int.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/mutex.h"
+#include "1base/scoped_ptr.h"
+#include "2config/db_config.h"
+#include "2config/env_config.h"
+#include "4txn/txn.h"
+#include "4env/env_test.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+// A helper structure; ham_env_t is declared in ham/hamsterdb.h as an
+// opaque C structure, but internally we use a C++ class. The ham_env_t
+// struct satisfies the C compiler, and internally we just cast the pointers.
+struct ham_env_t {
+ int dummy;
+};
+
+namespace hamsterdb {
+
+class Database;
+class Transaction;
+
+//
+// The Environment is the "root" of all hamsterdb objects. It's a container
+// for multiple databases and transactions.
+//
+// This class provides exception handling and locking mechanisms, then
+// dispatches all calls to LocalEnvironment or RemoteEnvironment.
+//
+class Environment
+{
+ public:
+ // Constructor
+ Environment(EnvironmentConfiguration &config)
+ : m_config(config) {
+ }
+
+ virtual ~Environment() {
+ }
+
+ // Returns the flags which were set when creating/opening the Environment
+ uint32_t get_flags() const {
+ return (m_config.flags);
+ }
+
+ // Returns the Environment's configuration
+ const EnvironmentConfiguration &config() const {
+ return (m_config);
+ }
+
+ // Returns this Environment's mutex
+ Mutex &mutex() {
+ return (m_mutex);
+ }
+
+ // Creates a new Environment (ham_env_create)
+ ham_status_t create();
+
+ // Opens a new Environment (ham_env_open)
+ ham_status_t open();
+
+ // Returns all database names (ham_env_get_database_names)
+ ham_status_t get_database_names(uint16_t *names, uint32_t *count);
+
+ // Returns environment parameters and flags (ham_env_get_parameters)
+ ham_status_t get_parameters(ham_parameter_t *param);
+
+ // Flushes the environment and its databases to disk (ham_env_flush)
+ ham_status_t flush(uint32_t flags);
+
+ // Creates a new database in the environment (ham_env_create_db)
+ ham_status_t create_db(Database **db, DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Opens an existing database in the environment (ham_env_open_db)
+ ham_status_t open_db(Database **db, DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Renames a database in the Environment (ham_env_rename_db)
+ ham_status_t rename_db(uint16_t oldname, uint16_t newname, uint32_t flags);
+
+ // Erases (deletes) a database from the Environment (ham_env_erase_db)
+ ham_status_t erase_db(uint16_t name, uint32_t flags);
+
+ // Closes an existing database in the environment (ham_db_close)
+ ham_status_t close_db(Database *db, uint32_t flags);
+
+ // Begins a new transaction (ham_txn_begin)
+ ham_status_t txn_begin(Transaction **ptxn, const char *name,
+ uint32_t flags);
+
+ // Returns the name of a Transaction
+ std::string txn_get_name(Transaction *txn);
+
+ // Commits a transaction (ham_txn_commit)
+ ham_status_t txn_commit(Transaction *txn, uint32_t flags);
+
+ // Commits a transaction (ham_txn_abort)
+ ham_status_t txn_abort(Transaction *txn, uint32_t flags);
+
+ // Closes the Environment (ham_env_close)
+ ham_status_t close(uint32_t flags);
+
+ // Fills in the current metrics
+ ham_status_t fill_metrics(ham_env_metrics_t *metrics);
+
+ // Returns a test object
+ EnvironmentTest test();
+
+ protected:
+ // Creates a new Environment (ham_env_create)
+ virtual ham_status_t do_create() = 0;
+
+ // Opens a new Environment (ham_env_open)
+ virtual ham_status_t do_open() = 0;
+
+ // Returns all database names (ham_env_get_database_names)
+ virtual ham_status_t do_get_database_names(uint16_t *names,
+ uint32_t *count) = 0;
+
+ // Returns environment parameters and flags (ham_env_get_parameters)
+ virtual ham_status_t do_get_parameters(ham_parameter_t *param) = 0;
+
+ // Flushes the environment and its databases to disk (ham_env_flush)
+ virtual ham_status_t do_flush(uint32_t flags) = 0;
+
+ // Creates a new database in the environment (ham_env_create_db)
+ virtual ham_status_t do_create_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param) = 0;
+
+ // Opens an existing database in the environment (ham_env_open_db)
+ virtual ham_status_t do_open_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param) = 0;
+
+ // Renames a database in the Environment (ham_env_rename_db)
+ virtual ham_status_t do_rename_db(uint16_t oldname, uint16_t newname,
+ uint32_t flags) = 0;
+
+ // Erases (deletes) a database from the Environment (ham_env_erase_db)
+ virtual ham_status_t do_erase_db(uint16_t name, uint32_t flags) = 0;
+
+ // Begins a new transaction (ham_txn_begin)
+ virtual Transaction *do_txn_begin(const char *name, uint32_t flags) = 0;
+
+ // Commits a transaction (ham_txn_commit)
+ virtual ham_status_t do_txn_commit(Transaction *txn, uint32_t flags) = 0;
+
+ // Commits a transaction (ham_txn_abort)
+ virtual ham_status_t do_txn_abort(Transaction *txn, uint32_t flags) = 0;
+
+ // Closes the Environment (ham_env_close)
+ virtual ham_status_t do_close(uint32_t flags) = 0;
+
+ // Fills in the current metrics
+ virtual void do_fill_metrics(ham_env_metrics_t *metrics) const = 0;
+
+ protected:
+ // A mutex to serialize access to this Environment
+ Mutex m_mutex;
+
+ // The Environment's configuration
+ EnvironmentConfiguration m_config;
+
+ // The Transaction manager; can be 0
+ ScopedPtr<TransactionManager> m_txn_manager;
+
+ // A map of all opened Databases
+ typedef std::map<uint16_t, Database *> DatabaseMap;
+ DatabaseMap m_database_map;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENV_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_header.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_header.h
new file mode 100644
index 0000000000..56c5a5fcb8
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_header.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_ENV_HEADER_H
+#define HAM_ENV_HEADER_H
+
+#include "0root/root.h"
+
+#include <map>
+#include <string>
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "2page/page.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+#include "1base/packstart.h"
+
+/**
+ * the persistent file header
+ */
+typedef HAM_PACK_0 struct HAM_PACK_1
+{
+ /** magic cookie - always "ham\0" */
+ uint8_t _magic[4];
+
+ /** version information - major, minor, rev, file */
+ uint8_t _version[4];
+
+ /** reserved */
+ uint64_t _reserved1;
+
+ /** size of the page */
+ uint32_t _page_size;
+
+ /** maximum number of databases for this environment */
+ uint16_t _max_databases;
+
+ /** PRO: for storing journal compression algorithm */
+ uint8_t _journal_compression;
+
+ /** reserved */
+ uint8_t _reserved3;
+
+ /** blob id of the PageManager's state */
+ uint64_t _page_manager_blobid;
+
+ /*
+ * following here:
+ *
+ * 1. the private data of the index btree(s)
+ * -> see get_btree_header()
+ */
+} HAM_PACK_2 PEnvironmentHeader;
+
+#include "1base/packstop.h"
+
+class EnvironmentHeader
+{
+ public:
+ // Constructor
+ EnvironmentHeader(Page *page)
+ : m_header_page(page) {
+ }
+
+ // Sets the 'magic' field of a file header
+ void set_magic(uint8_t m1, uint8_t m2, uint8_t m3, uint8_t m4) {
+ get_header()->_magic[0] = m1;
+ get_header()->_magic[1] = m2;
+ get_header()->_magic[2] = m3;
+ get_header()->_magic[3] = m4;
+ }
+
+ // Returns true if the magic matches
+ bool verify_magic(uint8_t m1, uint8_t m2, uint8_t m3, uint8_t m4) {
+ if (get_header()->_magic[0] != m1)
+ return (false);
+ if (get_header()->_magic[1] != m2)
+ return (false);
+ if (get_header()->_magic[2] != m3)
+ return (false);
+ if (get_header()->_magic[3] != m4)
+ return (false);
+ return (true);
+ }
+
+ // Returns byte |i| of the 'version'-header
+ uint8_t get_version(int i) {
+ return (get_header()->_version[i]);
+ }
+
+ // Sets the version of a file header
+ void set_version(uint8_t major, uint8_t minor, uint8_t revision,
+ uint8_t file) {
+ get_header()->_version[0] = major;
+ get_header()->_version[1] = minor;
+ get_header()->_version[2] = revision;
+ get_header()->_version[3] = file;
+ }
+
+ // Returns get the maximum number of databases for this file
+ uint16_t get_max_databases() {
+ return (get_header()->_max_databases);
+ }
+
+ // Sets the maximum number of databases for this file
+ void set_max_databases(uint16_t max_databases) {
+ get_header()->_max_databases = max_databases;
+ }
+
+ // Returns the page size from the header page
+ uint32_t page_size() {
+ return (get_header()->_page_size);
+ }
+
+ // Sets the page size in the header page
+ void set_page_size(uint32_t page_size) {
+ get_header()->_page_size = page_size;
+ }
+
+ // Returns the PageManager's blob id
+ uint64_t get_page_manager_blobid() {
+ return (get_header()->_page_manager_blobid);
+ }
+
+ // Sets the page size in the header page
+ void set_page_manager_blobid(uint64_t blobid) {
+ get_header()->_page_manager_blobid = blobid;
+ }
+
+ // Returns the Journal compression configuration
+ int get_journal_compression(int *level) {
+ *level = get_header()->_journal_compression & 0x0f;
+ return (get_header()->_journal_compression >> 4);
+ }
+
+ // Sets the Journal compression configuration
+ void set_journal_compression(int algorithm, int level) {
+ get_header()->_journal_compression = (algorithm << 4) | level;
+ }
+
+ // Returns the header page with persistent configuration settings
+ Page *get_header_page() {
+ return (m_header_page);
+ }
+
+ private:
+ // Returns a pointer to the header data
+ PEnvironmentHeader *get_header() {
+ return ((PEnvironmentHeader *)(m_header_page->get_payload()));
+ }
+
+ // The header page of the Environment
+ Page *m_header_page;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENV_HEADER_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.cc b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.cc
new file mode 100644
index 0000000000..7ba0280d7a
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.cc
@@ -0,0 +1,760 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/os.h"
+#include "2device/device_factory.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_stats.h"
+#include "3blob_manager/blob_manager_factory.h"
+#include "3journal/journal.h"
+#include "3page_manager/page_manager.h"
+#include "4db/db.h"
+#include "4txn/txn.h"
+#include "4txn/txn_local.h"
+#include "4env/env_local.h"
+#include "4cursor/cursor.h"
+#include "4context/context.h"
+#include "4txn/txn_cursor.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+namespace hamsterdb {
+
+LocalEnvironment::LocalEnvironment(EnvironmentConfiguration &config)
+ : Environment(config)
+{
+}
+
+void
+LocalEnvironment::recover(uint32_t flags)
+{
+ Context context(this);
+
+ ham_status_t st = 0;
+ m_journal.reset(new Journal(this));
+
+ ham_assert(get_flags() & HAM_ENABLE_RECOVERY);
+
+ try {
+ m_journal->open();
+ }
+ catch (Exception &ex) {
+ if (ex.code == HAM_FILE_NOT_FOUND) {
+ m_journal->create();
+ return;
+ }
+ }
+
+ /* success - check if we need recovery */
+ if (!m_journal->is_empty()) {
+ if (flags & HAM_AUTO_RECOVERY) {
+ m_journal->recover((LocalTransactionManager *)m_txn_manager.get());
+ }
+ else {
+ st = HAM_NEED_RECOVERY;
+ goto bail;
+ }
+ }
+
+bail:
+ /* in case of errors: close log and journal, but do not delete the files */
+ if (st) {
+ m_journal->close(true);
+ throw Exception(st);
+ }
+
+ /* reset the page manager */
+ m_page_manager->reset(&context);
+}
+
+PBtreeHeader *
+LocalEnvironment::btree_header(int i)
+{
+ PBtreeHeader *d = (PBtreeHeader *)
+ (m_header->get_header_page()->get_payload()
+ + sizeof(PEnvironmentHeader));
+ return (d + i);
+}
+
+LocalEnvironmentTest
+LocalEnvironment::test()
+{
+ return (LocalEnvironmentTest(this));
+}
+
+ham_status_t
+LocalEnvironment::do_create()
+{
+ if (m_config.flags & HAM_IN_MEMORY)
+ m_config.flags |= HAM_DISABLE_RECLAIM_INTERNAL;
+
+ /* initialize the device if it does not yet exist */
+ m_blob_manager.reset(BlobManagerFactory::create(this, m_config.flags));
+ m_device.reset(DeviceFactory::create(m_config));
+ if (m_config.flags & HAM_ENABLE_TRANSACTIONS)
+ m_txn_manager.reset(new LocalTransactionManager(this));
+
+ /* create the file */
+ m_device->create();
+
+ /* allocate the header page */
+ Page *page = new Page(m_device.get());
+ page->alloc(Page::kTypeHeader, m_config.page_size_bytes);
+ ::memset(page->get_data(), 0, m_config.page_size_bytes);
+ page->set_type(Page::kTypeHeader);
+ page->set_dirty(true);
+
+ m_header.reset(new EnvironmentHeader(page));
+
+ /* initialize the header */
+ m_header->set_magic('H', 'A', 'M', '\0');
+ m_header->set_version(HAM_VERSION_MAJ, HAM_VERSION_MIN, HAM_VERSION_REV,
+ HAM_FILE_VERSION);
+ m_header->set_page_size(m_config.page_size_bytes);
+ m_header->set_max_databases(m_config.max_databases);
+
+ /* load page manager after setting up the blobmanager and the device! */
+ m_page_manager.reset(new PageManager(this));
+
+ /* create a logfile and a journal (if requested) */
+ if (get_flags() & HAM_ENABLE_RECOVERY) {
+ m_journal.reset(new Journal(this));
+ m_journal->create();
+ }
+
+ /* flush the header page - this will write through disk if logging is
+ * enabled */
+ if (get_flags() & HAM_ENABLE_RECOVERY)
+ m_header->get_header_page()->flush();
+
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_open()
+{
+ ham_status_t st = 0;
+
+ Context context(this);
+
+ /* Initialize the device if it does not yet exist. The page size will
+ * be filled in later (at this point in time, it's still unknown) */
+ m_blob_manager.reset(BlobManagerFactory::create(this, m_config.flags));
+ m_device.reset(DeviceFactory::create(m_config));
+
+ /* open the file */
+ m_device->open();
+
+ if (m_config.flags & HAM_ENABLE_TRANSACTIONS)
+ m_txn_manager.reset(new LocalTransactionManager(this));
+
+ /*
+ * read the database header
+ *
+ * !!!
+ * now this is an ugly problem - the database header spans one page, but
+ * what's the size of this page? chances are good that it's the default
+ * page-size, but we really can't be sure.
+ *
+ * read 512 byte and extract the "real" page size, then read
+ * the real page.
+ */
+ {
+ Page *page = 0;
+ uint8_t hdrbuf[512];
+
+ /*
+ * in here, we're going to set up a faked headerpage for the
+ * duration of this call; BE VERY CAREFUL: we MUST clean up
+ * at the end of this section or we'll be in BIG trouble!
+ */
+ Page fakepage(m_device.get());
+ fakepage.set_data((PPageData *)hdrbuf);
+
+ /* create the configuration object */
+ m_header.reset(new EnvironmentHeader(&fakepage));
+
+ /*
+ * now fetch the header data we need to get an estimate of what
+ * the database is made of really.
+ */
+ m_device->read(0, hdrbuf, sizeof(hdrbuf));
+
+ m_config.page_size_bytes = m_header->page_size();
+
+ /** check the file magic */
+ if (!m_header->verify_magic('H', 'A', 'M', '\0')) {
+ ham_log(("invalid file type"));
+ st = HAM_INV_FILE_HEADER;
+ goto fail_with_fake_cleansing;
+ }
+
+ /* check the database version; everything with a different file version
+ * is incompatible */
+ if (m_header->get_version(3) != HAM_FILE_VERSION) {
+ ham_log(("invalid file version"));
+ st = HAM_INV_FILE_VERSION;
+ goto fail_with_fake_cleansing;
+ }
+ else if (m_header->get_version(0) == 1 &&
+ m_header->get_version(1) == 0 &&
+ m_header->get_version(2) <= 9) {
+ ham_log(("invalid file version; < 1.0.9 is not supported"));
+ st = HAM_INV_FILE_VERSION;
+ goto fail_with_fake_cleansing;
+ }
+
+ st = 0;
+
+fail_with_fake_cleansing:
+
+ /* undo the headerpage fake first! */
+ fakepage.set_data(0);
+ m_header.reset(0);
+
+ /* exit when an error was signaled */
+ if (st) {
+ if (m_device->is_open())
+ m_device->close();
+ return (st);
+ }
+
+ /* now read the "real" header page and store it in the Environment */
+ page = new Page(m_device.get());
+ page->fetch(0);
+ m_header.reset(new EnvironmentHeader(page));
+ }
+
+ /* load page manager after setting up the blobmanager and the device! */
+ m_page_manager.reset(new PageManager(this));
+
+ /* check if recovery is required */
+ if (get_flags() & HAM_ENABLE_RECOVERY)
+ recover(m_config.flags);
+
+ /* load the state of the PageManager */
+ if (m_header->get_page_manager_blobid() != 0)
+ m_page_manager->initialize(m_header->get_page_manager_blobid());
+
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_get_database_names(uint16_t *names, uint32_t *count)
+{
+ uint16_t name;
+ uint32_t i = 0;
+ uint32_t max_names = 0;
+
+ max_names = *count;
+ *count = 0;
+
+ /* copy each database name to the array */
+ ham_assert(m_header->get_max_databases() > 0);
+ for (i = 0; i < m_header->get_max_databases(); i++) {
+ name = btree_header(i)->get_dbname();
+ if (name == 0)
+ continue;
+
+ if (*count >= max_names)
+ return (HAM_LIMITS_REACHED);
+
+ names[(*count)++] = name;
+ }
+
+ return 0;
+}
+
+ham_status_t
+LocalEnvironment::do_get_parameters(ham_parameter_t *param)
+{
+ ham_parameter_t *p = param;
+
+ if (p) {
+ for (; p->name; p++) {
+ switch (p->name) {
+ case HAM_PARAM_CACHE_SIZE:
+ p->value = m_config.cache_size_bytes;
+ break;
+ case HAM_PARAM_PAGE_SIZE:
+ p->value = m_config.page_size_bytes;
+ break;
+ case HAM_PARAM_MAX_DATABASES:
+ p->value = m_header->get_max_databases();
+ break;
+ case HAM_PARAM_FLAGS:
+ p->value = get_flags();
+ break;
+ case HAM_PARAM_FILEMODE:
+ p->value = m_config.file_mode;
+ break;
+ case HAM_PARAM_FILENAME:
+ if (m_config.filename.size())
+ p->value = (uint64_t)(PTR_TO_U64(m_config.filename.c_str()));
+ else
+ p->value = 0;
+ break;
+ case HAM_PARAM_LOG_DIRECTORY:
+ if (m_config.log_filename.size())
+ p->value = (uint64_t)(PTR_TO_U64(m_config.log_filename.c_str()));
+ else
+ p->value = 0;
+ break;
+ case HAM_PARAM_JOURNAL_SWITCH_THRESHOLD:
+ p->value = m_config.journal_switch_threshold;
+ break;
+ case HAM_PARAM_JOURNAL_COMPRESSION:
+ p->value = 0;
+ break;
+ case HAM_PARAM_POSIX_FADVISE:
+ p->value = m_config.posix_advice;
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)p->name));
+ return (HAM_INV_PARAMETER);
+ }
+ }
+ }
+
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_flush(uint32_t flags)
+{
+ Context context(this, 0, 0);
+
+ /* flush all committed transactions */
+ if (m_txn_manager)
+ m_txn_manager->flush_committed_txns(&context);
+
+ if (flags & HAM_FLUSH_COMMITTED_TRANSACTIONS || get_flags() & HAM_IN_MEMORY)
+ return (0);
+
+ /* flush the header page */
+ m_header->get_header_page()->flush();
+
+ /* flush all open pages to disk */
+ m_page_manager->flush(false);
+
+ /* flush the device - this usually causes a fsync() */
+ m_device->flush();
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_create_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ if (get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot create database in a read-only environment"));
+ return (HAM_WRITE_PROTECTED);
+ }
+
+ if (param) {
+ for (; param->name; param++) {
+ switch (param->name) {
+ case HAM_PARAM_RECORD_COMPRESSION:
+ ham_trace(("Record compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_KEY_COMPRESSION:
+ ham_trace(("Key compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_KEY_TYPE:
+ config.key_type = (uint16_t)param->value;
+ break;
+ case HAM_PARAM_KEY_SIZE:
+ if (param->value != 0) {
+ if (param->value > 0xffff) {
+ ham_trace(("invalid key size %u - must be < 0xffff"));
+ return (HAM_INV_KEY_SIZE);
+ }
+ if (config.flags & HAM_RECORD_NUMBER32) {
+ if (param->value > 0 && param->value != sizeof(uint32_t)) {
+ ham_trace(("invalid key size %u - must be 4 for "
+ "HAM_RECORD_NUMBER32 databases",
+ (unsigned)param->value));
+ return (HAM_INV_KEY_SIZE);
+ }
+ }
+ if (config.flags & HAM_RECORD_NUMBER64) {
+ if (param->value > 0 && param->value != sizeof(uint64_t)) {
+ ham_trace(("invalid key size %u - must be 8 for "
+ "HAM_RECORD_NUMBER64 databases",
+ (unsigned)param->value));
+ return (HAM_INV_KEY_SIZE);
+ }
+ }
+ config.key_size = (uint16_t)param->value;
+ }
+ break;
+ case HAM_PARAM_RECORD_SIZE:
+ config.record_size = (uint32_t)param->value;
+ break;
+ default:
+ ham_trace(("invalid parameter 0x%x (%d)", param->name, param->name));
+ return (HAM_INV_PARAMETER);
+ }
+ }
+ }
+
+ if (config.flags & HAM_RECORD_NUMBER32) {
+ if (config.key_type == HAM_TYPE_UINT8
+ || config.key_type == HAM_TYPE_UINT16
+ || config.key_type == HAM_TYPE_UINT64
+ || config.key_type == HAM_TYPE_REAL32
+ || config.key_type == HAM_TYPE_REAL64) {
+ ham_trace(("HAM_RECORD_NUMBER32 not allowed in combination with "
+ "fixed length type"));
+ return (HAM_INV_PARAMETER);
+ }
+ config.key_type = HAM_TYPE_UINT32;
+ }
+ else if (config.flags & HAM_RECORD_NUMBER64) {
+ if (config.key_type == HAM_TYPE_UINT8
+ || config.key_type == HAM_TYPE_UINT16
+ || config.key_type == HAM_TYPE_UINT32
+ || config.key_type == HAM_TYPE_REAL32
+ || config.key_type == HAM_TYPE_REAL64) {
+ ham_trace(("HAM_RECORD_NUMBER64 not allowed in combination with "
+ "fixed length type"));
+ return (HAM_INV_PARAMETER);
+ }
+ config.key_type = HAM_TYPE_UINT64;
+ }
+
+ uint32_t mask = HAM_FORCE_RECORDS_INLINE
+ | HAM_FLUSH_WHEN_COMMITTED
+ | HAM_ENABLE_DUPLICATE_KEYS
+ | HAM_RECORD_NUMBER32
+ | HAM_RECORD_NUMBER64;
+ if (config.flags & ~mask) {
+ ham_trace(("invalid flags(s) 0x%x", config.flags & ~mask));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* create a new Database object */
+ LocalDatabase *db = new LocalDatabase(this, config);
+
+ Context context(this, 0, db);
+
+ /* check if this database name is unique */
+ uint16_t dbi;
+ for (uint32_t i = 0; i < m_header->get_max_databases(); i++) {
+ uint16_t name = btree_header(i)->get_dbname();
+ if (!name)
+ continue;
+ if (name == config.db_name) {
+ delete db;
+ return (HAM_DATABASE_ALREADY_EXISTS);
+ }
+ }
+
+ /* find a free slot in the PBtreeHeader array and store the name */
+ for (dbi = 0; dbi < m_header->get_max_databases(); dbi++) {
+ uint16_t name = btree_header(dbi)->get_dbname();
+ if (!name) {
+ btree_header(dbi)->set_dbname(config.db_name);
+ break;
+ }
+ }
+ if (dbi == m_header->get_max_databases()) {
+ delete db;
+ return (HAM_LIMITS_REACHED);
+ }
+
+ mark_header_page_dirty(&context);
+
+ /* initialize the Database */
+ ham_status_t st = db->create(&context, btree_header(dbi));
+ if (st) {
+ delete db;
+ return (st);
+ }
+
+ /* force-flush the changeset */
+ if (get_flags() & HAM_ENABLE_RECOVERY)
+ context.changeset.flush(next_lsn());
+
+ *pdb = db;
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_open_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ *pdb = 0;
+
+ uint32_t mask = HAM_FORCE_RECORDS_INLINE
+ | HAM_FLUSH_WHEN_COMMITTED
+ | HAM_READ_ONLY;
+ if (config.flags & ~mask) {
+ ham_trace(("invalid flags(s) 0x%x", config.flags & ~mask));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (param) {
+ for (; param->name; param++) {
+ switch (param->name) {
+ case HAM_PARAM_RECORD_COMPRESSION:
+ ham_trace(("Record compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_KEY_COMPRESSION:
+ ham_trace(("Key compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ default:
+ ham_trace(("invalid parameter 0x%x (%d)", param->name, param->name));
+ return (HAM_INV_PARAMETER);
+ }
+ }
+ }
+
+ /* create a new Database object */
+ LocalDatabase *db = new LocalDatabase(this, config);
+
+ Context context(this, 0, db);
+
+ ham_assert(0 != m_header->get_header_page());
+
+ /* search for a database with this name */
+ uint16_t dbi;
+ for (dbi = 0; dbi < m_header->get_max_databases(); dbi++) {
+ uint16_t name = btree_header(dbi)->get_dbname();
+ if (!name)
+ continue;
+ if (config.db_name == name)
+ break;
+ }
+
+ if (dbi == m_header->get_max_databases()) {
+ delete db;
+ return (HAM_DATABASE_NOT_FOUND);
+ }
+
+ /* open the database */
+ ham_status_t st = db->open(&context, btree_header(dbi));
+ if (st) {
+ delete db;
+ ham_trace(("Database could not be opened"));
+ return (st);
+ }
+
+ *pdb = db;
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_rename_db(uint16_t oldname, uint16_t newname,
+ uint32_t flags)
+{
+ Context context(this);
+
+ /*
+ * check if a database with the new name already exists; also search
+ * for the database with the old name
+ */
+ uint16_t max = m_header->get_max_databases();
+ uint16_t slot = max;
+ ham_assert(max > 0);
+ for (uint16_t dbi = 0; dbi < max; dbi++) {
+ uint16_t name = btree_header(dbi)->get_dbname();
+ if (name == newname)
+ return (HAM_DATABASE_ALREADY_EXISTS);
+ if (name == oldname)
+ slot = dbi;
+ }
+
+ if (slot == max)
+ return (HAM_DATABASE_NOT_FOUND);
+
+ /* replace the database name with the new name */
+ btree_header(slot)->set_dbname(newname);
+ mark_header_page_dirty(&context);
+
+ /* if the database with the old name is currently open: notify it */
+ Environment::DatabaseMap::iterator it = m_database_map.find(oldname);
+ if (it != m_database_map.end()) {
+ Database *db = it->second;
+ it->second->set_name(newname);
+ m_database_map.erase(oldname);
+ m_database_map.insert(DatabaseMap::value_type(newname, db));
+ }
+
+ return (0);
+}
+
+ham_status_t
+LocalEnvironment::do_erase_db(uint16_t name, uint32_t flags)
+{
+ /* check if this database is still open */
+ if (m_database_map.find(name) != m_database_map.end())
+ return (HAM_DATABASE_ALREADY_OPEN);
+
+ /*
+ * if it's an in-memory environment then it's enough to purge the
+ * database from the environment header
+ */
+ if (get_flags() & HAM_IN_MEMORY) {
+ for (uint16_t dbi = 0; dbi < m_header->get_max_databases(); dbi++) {
+ PBtreeHeader *desc = btree_header(dbi);
+ if (name == desc->get_dbname()) {
+ desc->set_dbname(0);
+ return (0);
+ }
+ }
+ return (HAM_DATABASE_NOT_FOUND);
+ }
+
+ /* temporarily load the database */
+ LocalDatabase *db;
+ DatabaseConfiguration config;
+ config.db_name = name;
+ ham_status_t st = do_open_db((Database **)&db, config, 0);
+ if (st)
+ return (st);
+
+ Context context(this, 0, db);
+
+ /*
+ * delete all blobs and extended keys, also from the cache and
+ * the extkey-cache
+ *
+ * also delete all pages and move them to the freelist; if they're
+ * cached, delete them from the cache
+ */
+ st = db->drop(&context);
+ if (st)
+ return (st);
+
+ /* now set database name to 0 and set the header page to dirty */
+ for (uint16_t dbi = 0; dbi < m_header->get_max_databases(); dbi++) {
+ PBtreeHeader *desc = btree_header(dbi);
+ if (name == desc->get_dbname()) {
+ desc->set_dbname(0);
+ break;
+ }
+ }
+
+ mark_header_page_dirty(&context);
+ context.changeset.clear();
+
+ (void)ham_db_close((ham_db_t *)db, HAM_DONT_LOCK);
+
+ return (0);
+}
+
+Transaction *
+LocalEnvironment::do_txn_begin(const char *name, uint32_t flags)
+{
+ Transaction *txn = new LocalTransaction(this, name, flags);
+ m_txn_manager->begin(txn);
+ return (txn);
+}
+
+ham_status_t
+LocalEnvironment::do_txn_commit(Transaction *txn, uint32_t flags)
+{
+ return (m_txn_manager->commit(txn, flags));
+}
+
+ham_status_t
+LocalEnvironment::do_txn_abort(Transaction *txn, uint32_t flags)
+{
+ return (m_txn_manager->abort(txn, flags));
+}
+
+ham_status_t
+LocalEnvironment::do_close(uint32_t flags)
+{
+ Context context(this);
+
+ /* flush all committed transactions */
+ if (m_txn_manager)
+ m_txn_manager->flush_committed_txns(&context);
+
+ /* flush all pages and the freelist, reduce the file size */
+ if (m_page_manager)
+ m_page_manager->close(&context);
+
+ /* if we're not in read-only mode, and not an in-memory-database,
+ * and the dirty-flag is true: flush the page-header to disk */
+ if (m_header && m_header->get_header_page() && !(get_flags() & HAM_IN_MEMORY)
+ && m_device.get() && m_device.get()->is_open()
+ && (!(get_flags() & HAM_READ_ONLY))) {
+ m_header->get_header_page()->flush();
+ }
+
+ /* close the header page */
+ if (m_header && m_header->get_header_page()) {
+ Page *page = m_header->get_header_page();
+ if (page->get_data())
+ m_device->free_page(page);
+ delete page;
+ m_header.reset();
+ }
+
+ /* close the device */
+ if (m_device) {
+ if (m_device->is_open()) {
+ if (!(get_flags() & HAM_READ_ONLY))
+ m_device->flush();
+ m_device->close();
+ }
+ }
+
+ /* close the log and the journal */
+ if (m_journal)
+ m_journal->close(!!(flags & HAM_DONT_CLEAR_LOG));
+
+ return (0);
+}
+
+void
+LocalEnvironment::do_fill_metrics(ham_env_metrics_t *metrics) const
+{
+ // PageManager metrics (incl. cache and freelist)
+ m_page_manager->fill_metrics(metrics);
+ // the BlobManagers
+ m_blob_manager->fill_metrics(metrics);
+ // the Journal (if available)
+ if (m_journal)
+ m_journal->fill_metrics(metrics);
+ // the (first) database
+ if (!m_database_map.empty()) {
+ LocalDatabase *db = (LocalDatabase *)m_database_map.begin()->second;
+ db->fill_metrics(metrics);
+ }
+ // and of the btrees
+ BtreeIndex::fill_metrics(metrics);
+ // SIMD support enabled?
+ metrics->simd_lane_width = os_get_simd_lane_width();
+}
+
+void
+LocalEnvironmentTest::set_journal(Journal *journal)
+{
+ m_env->m_journal.reset(journal);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.h
new file mode 100644
index 0000000000..7800ee37de
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_ENV_LOCAL_H
+#define HAM_ENV_LOCAL_H
+
+#include "ham/hamsterdb.h"
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/scoped_ptr.h"
+#include "2lsn_manager/lsn_manager.h"
+#include "3journal/journal.h"
+#include "4env/env.h"
+#include "4env/env_header.h"
+#include "4env/env_local_test.h"
+#include "4context/context.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class PBtreeHeader;
+class PFreelistPayload;
+class Journal;
+class PageManager;
+class BlobManager;
+class LocalTransaction;
+struct MessageBase;
+
+//
+// The Environment implementation for local file access
+//
+class LocalEnvironment : public Environment
+{
+ public:
+ LocalEnvironment(EnvironmentConfiguration &config);
+
+ // Returns the Device object
+ Device *device() {
+ return (m_device.get());
+ }
+
+ // Returns the Environment's header object with the persistent configuration
+ EnvironmentHeader *header() {
+ return (m_header.get());
+ }
+
+ // Returns the blob manager
+ BlobManager *blob_manager() {
+ return (m_blob_manager.get());
+ }
+
+ // Returns the PageManager instance
+ PageManager *page_manager() {
+ return (m_page_manager.get());
+ }
+
+ // Returns the Journal
+ Journal *journal() {
+ return (m_journal.get());
+ }
+
+ // Returns the lsn manager
+ LsnManager *lsn_manager() {
+ return (&m_lsn_manager);
+ }
+
+ // The transaction manager
+ TransactionManager *txn_manager() {
+ return (m_txn_manager.get());
+ }
+
+ // Increments the lsn and returns the incremented value
+ uint64_t next_lsn() {
+ return (m_lsn_manager.next());
+ }
+
+ // Returns a test gateway
+ LocalEnvironmentTest test();
+
+ protected:
+ // Creates a new Environment (ham_env_create)
+ virtual ham_status_t do_create();
+
+ // Opens a new Environment (ham_env_open)
+ virtual ham_status_t do_open();
+
+ // Returns all database names (ham_env_get_database_names)
+ virtual ham_status_t do_get_database_names(uint16_t *names,
+ uint32_t *count);
+
+ // Returns environment parameters and flags (ham_env_get_parameters)
+ virtual ham_status_t do_get_parameters(ham_parameter_t *param);
+
+ // Flushes the environment and its databases to disk (ham_env_flush)
+ virtual ham_status_t do_flush(uint32_t flags);
+
+ // Creates a new database in the environment (ham_env_create_db)
+ virtual ham_status_t do_create_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Opens an existing database in the environment (ham_env_open_db)
+ virtual ham_status_t do_open_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Renames a database in the Environment (ham_env_rename_db)
+ virtual ham_status_t do_rename_db(uint16_t oldname, uint16_t newname,
+ uint32_t flags);
+
+ // Erases (deletes) a database from the Environment (ham_env_erase_db)
+ virtual ham_status_t do_erase_db(uint16_t name, uint32_t flags);
+
+ // Begins a new transaction (ham_txn_begin)
+ virtual Transaction *do_txn_begin(const char *name, uint32_t flags);
+
+ // Commits a transaction (ham_txn_commit)
+ virtual ham_status_t do_txn_commit(Transaction *txn, uint32_t flags);
+
+ // Commits a transaction (ham_txn_abort)
+ virtual ham_status_t do_txn_abort(Transaction *txn, uint32_t flags);
+
+ // Closes the Environment (ham_env_close)
+ virtual ham_status_t do_close(uint32_t flags);
+
+ // Fills in the current metrics
+ virtual void do_fill_metrics(ham_env_metrics_t *metrics) const;
+
+ private:
+ friend class LocalEnvironmentTest;
+
+ // Runs the recovery process
+ void recover(uint32_t flags);
+
+ // Get the btree configuration of the database #i, where |i| is a
+ // zero-based index
+ PBtreeHeader *btree_header(int i);
+
+ // Sets the dirty-flag of the header page and adds the header page
+ // to the Changeset (if recovery is enabled)
+ void mark_header_page_dirty(Context *context) {
+ Page *page = m_header->get_header_page();
+ page->set_dirty(true);
+ if (get_flags() & HAM_ENABLE_RECOVERY)
+ context->changeset.put(page);
+ }
+
+ // The Environment's header page/configuration
+ ScopedPtr<EnvironmentHeader> m_header;
+
+ // The device instance (either a file or an in-memory-db)
+ ScopedPtr<Device> m_device;
+
+ // The BlobManager instance
+ ScopedPtr<BlobManager> m_blob_manager;
+
+ // The PageManager instance
+ ScopedPtr<PageManager> m_page_manager;
+
+ // The logical journal
+ ScopedPtr<Journal> m_journal;
+
+ // The lsn manager
+ LsnManager m_lsn_manager;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENV_LOCAL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local_test.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local_test.h
new file mode 100644
index 0000000000..ea045e18dc
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_local_test.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: no
+ * @thread_safe: no
+ */
+
+#ifndef HAM_ENV_LOCAL_TEST_H
+#define HAM_ENV_LOCAL_TEST_H
+
+#include "ham/hamsterdb.h"
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Journal;
+class LocalEnvironment;
+
+class LocalEnvironmentTest
+{
+ public:
+ LocalEnvironmentTest(LocalEnvironment *env)
+ : m_env(env) {
+ }
+
+ // Sets a new journal object
+ void set_journal(Journal *journal);
+
+ private:
+ LocalEnvironment *m_env;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENV_LOCAL_TEST_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.cc b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.cc
new file mode 100644
index 0000000000..6e53543c8b
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.cc
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/os.h"
+#include "1base/scoped_ptr.h"
+#include "2protobuf/protocol.h"
+#include "4cursor/cursor.h"
+#include "4db/db_remote.h"
+#include "4env/env_remote.h"
+#include "4txn/txn_remote.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+RemoteEnvironment::RemoteEnvironment(EnvironmentConfiguration config)
+ : Environment(config), m_remote_handle(0), m_buffer(1024 * 4)
+{
+}
+
+Protocol *
+RemoteEnvironment::perform_request(Protocol *request)
+{
+ // use ByteArray to avoid frequent reallocs!
+ m_buffer.clear();
+
+ if (!request->pack(&m_buffer)) {
+ ham_log(("protoype Protocol::pack failed"));
+ throw Exception(HAM_INTERNAL_ERROR);
+ }
+
+ m_socket.send((uint8_t *)m_buffer.get_ptr(), m_buffer.get_size());
+
+ // now block and wait for the reply; first read the header, then the
+ // remaining data
+ m_socket.recv((uint8_t *)m_buffer.get_ptr(), 8);
+
+ // no need to check the magic; it's verified in Protocol::unpack
+ uint32_t size = *(uint32_t *)((char *)m_buffer.get_ptr() + 4);
+ m_buffer.resize(size + 8);
+ m_socket.recv((uint8_t *)m_buffer.get_ptr() + 8, size);
+
+ return (Protocol::unpack((const uint8_t *)m_buffer.get_ptr(), size + 8));
+}
+
+void
+RemoteEnvironment::perform_request(SerializedWrapper *request,
+ SerializedWrapper *reply)
+{
+ int size_left = (int)request->get_size();
+ request->size = size_left;
+ request->magic = HAM_TRANSFER_MAGIC_V2;
+ m_buffer.resize(request->size);
+
+ uint8_t *ptr = (uint8_t *)m_buffer.get_ptr();
+ request->serialize(&ptr, &size_left);
+ ham_assert(size_left == 0);
+
+ m_socket.send((uint8_t *)m_buffer.get_ptr(), request->size);
+
+ // now block and wait for the reply; first read the header, then the
+ // remaining data
+ m_socket.recv((uint8_t *)m_buffer.get_ptr(), 8);
+
+ // now check the magic and receive the remaining data
+ uint32_t magic = *(uint32_t *)((char *)m_buffer.get_ptr() + 0);
+ if (magic != HAM_TRANSFER_MAGIC_V2)
+ throw Exception(HAM_INTERNAL_ERROR);
+ // TODO check the magic
+ int size = (int)*(uint32_t *)((char *)m_buffer.get_ptr() + 4);
+ m_buffer.resize(size);
+ m_socket.recv((uint8_t *)m_buffer.get_ptr() + 8, size - 8);
+
+ ptr = (uint8_t *)m_buffer.get_ptr();
+ reply->deserialize(&ptr, &size);
+ ham_assert(size == 0);
+}
+
+ham_status_t
+RemoteEnvironment::do_create()
+{
+ // the 'create' operation is identical to 'open'
+ return (do_open());
+}
+
+ham_status_t
+RemoteEnvironment::do_open()
+{
+ m_socket.close();
+
+ const char *url = m_config.filename.c_str();
+
+ ham_assert(url != 0);
+ ham_assert(::strstr(url, "ham://") == url);
+ const char *ip = url + 6;
+ const char *port_str = strstr(ip, ":");
+ if (!port_str) {
+ ham_trace(("remote uri does not include port - expected "
+ "`ham://<ip>:<port>`"));
+ return (HAM_INV_PARAMETER);
+ }
+ uint16_t port = (uint16_t)atoi(port_str + 1);
+ if (!port) {
+ ham_trace(("remote uri includes invalid port - expected "
+ "`ham://<ip>:<port>`"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ const char *filename = strstr(port_str, "/");
+
+ std::string hostname(ip, port_str);
+ m_socket.connect(hostname.c_str(), port, m_config.remote_timeout_sec);
+
+ Protocol request(Protocol::CONNECT_REQUEST);
+ request.mutable_connect_request()->set_path(filename);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->type() == Protocol::CONNECT_REPLY);
+
+ ham_status_t st = reply->connect_reply().status();
+ if (st == 0) {
+ m_config.flags |= reply->connect_reply().env_flags();
+ m_remote_handle = reply->connect_reply().env_handle();
+
+ if (get_flags() & HAM_ENABLE_TRANSACTIONS)
+ m_txn_manager.reset(new RemoteTransactionManager(this));
+ }
+
+ return (st);
+}
+
+ham_status_t
+RemoteEnvironment::do_get_database_names(uint16_t *names, uint32_t *count)
+{
+ Protocol request(Protocol::ENV_GET_DATABASE_NAMES_REQUEST);
+ request.mutable_env_get_database_names_request();
+ request.mutable_env_get_database_names_request()->set_env_handle(m_remote_handle);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_get_database_names_reply());
+
+ ham_status_t st = reply->env_get_database_names_reply().status();
+ if (st)
+ return (st);
+
+ /* copy the retrieved names */
+ uint32_t i;
+ for (i = 0;
+ i < (uint32_t)reply->env_get_database_names_reply().names_size()
+ && i < *count;
+ i++) {
+ names[i] = (uint16_t)*(reply->mutable_env_get_database_names_reply()->mutable_names()->mutable_data() + i);
+ }
+
+ *count = i;
+ return (0);
+}
+
+ham_status_t
+RemoteEnvironment::do_get_parameters(ham_parameter_t *param)
+{
+ static char filename[1024]; // TODO not threadsafe!!
+ ham_parameter_t *p = param;
+
+ Protocol request(Protocol::ENV_GET_PARAMETERS_REQUEST);
+ request.mutable_env_get_parameters_request()->set_env_handle(m_remote_handle);
+ while (p && p->name != 0) {
+ request.mutable_env_get_parameters_request()->add_names(p->name);
+ p++;
+ }
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_get_parameters_reply());
+
+ ham_status_t st = reply->env_get_parameters_reply().status();
+ if (st)
+ return (st);
+
+ p = param;
+ while (p && p->name) {
+ switch (p->name) {
+ case HAM_PARAM_CACHESIZE:
+ ham_assert(reply->env_get_parameters_reply().has_cache_size());
+ p->value = reply->env_get_parameters_reply().cache_size();
+ break;
+ case HAM_PARAM_PAGESIZE:
+ ham_assert(reply->env_get_parameters_reply().has_page_size());
+ p->value = reply->env_get_parameters_reply().page_size();
+ break;
+ case HAM_PARAM_MAX_DATABASES:
+ ham_assert(reply->env_get_parameters_reply().has_max_env_databases());
+ p->value = reply->env_get_parameters_reply().max_env_databases();
+ break;
+ case HAM_PARAM_FLAGS:
+ ham_assert(reply->env_get_parameters_reply().has_flags());
+ p->value = reply->env_get_parameters_reply().flags();
+ break;
+ case HAM_PARAM_FILEMODE:
+ ham_assert(reply->env_get_parameters_reply().has_filemode());
+ p->value = reply->env_get_parameters_reply().filemode();
+ break;
+ case HAM_PARAM_FILENAME:
+ if (reply->env_get_parameters_reply().has_filename()) {
+ strncpy(filename, reply->env_get_parameters_reply().filename().c_str(),
+ sizeof(filename) - 1);
+ filename[sizeof(filename) - 1] = 0;
+ p->value = (uint64_t)(&filename[0]);
+ }
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)p->name));
+ break;
+ }
+ p++;
+ }
+ return (0);
+}
+
+ham_status_t
+RemoteEnvironment::do_flush(uint32_t flags)
+{
+ Protocol request(Protocol::ENV_FLUSH_REQUEST);
+ request.mutable_env_flush_request()->set_flags(flags);
+ request.mutable_env_flush_request()->set_env_handle(m_remote_handle);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_flush_reply());
+
+ return (reply->env_flush_reply().status());
+}
+
+ham_status_t
+RemoteEnvironment::do_create_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ Protocol request(Protocol::ENV_CREATE_DB_REQUEST);
+ request.mutable_env_create_db_request()->set_env_handle(m_remote_handle);
+ request.mutable_env_create_db_request()->set_dbname(config.db_name);
+ request.mutable_env_create_db_request()->set_flags(config.flags);
+
+ const ham_parameter_t *p = param;
+ if (p) {
+ for (; p->name; p++) {
+ request.mutable_env_create_db_request()->add_param_names(p->name);
+ request.mutable_env_create_db_request()->add_param_values(p->value);
+ }
+ }
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_create_db_reply());
+
+ ham_status_t st = reply->env_create_db_reply().status();
+ if (st)
+ return (st);
+
+ config.flags = reply->env_create_db_reply().db_flags();
+ RemoteDatabase *rdb = new RemoteDatabase(this, config,
+ reply->env_create_db_reply().db_handle());
+
+ *pdb = rdb;
+ return (0);
+}
+
+ham_status_t
+RemoteEnvironment::do_open_db(Database **pdb, DatabaseConfiguration &config,
+ const ham_parameter_t *param)
+{
+ Protocol request(Protocol::ENV_OPEN_DB_REQUEST);
+ request.mutable_env_open_db_request()->set_env_handle(m_remote_handle);
+ request.mutable_env_open_db_request()->set_dbname(config.db_name);
+ request.mutable_env_open_db_request()->set_flags(config.flags);
+
+ const ham_parameter_t *p = param;
+ if (p) {
+ for (; p->name; p++) {
+ request.mutable_env_open_db_request()->add_param_names(p->name);
+ request.mutable_env_open_db_request()->add_param_values(p->value);
+ }
+ }
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_open_db_reply());
+
+ ham_status_t st = reply->env_open_db_reply().status();
+ if (st)
+ return (st);
+
+ config.flags = reply->env_open_db_reply().db_flags();
+ RemoteDatabase *rdb = new RemoteDatabase(this, config,
+ reply->env_open_db_reply().db_handle());
+
+ *pdb = rdb;
+ return (0);
+}
+
+ham_status_t
+RemoteEnvironment::do_rename_db( uint16_t oldname, uint16_t newname,
+ uint32_t flags)
+{
+ Protocol request(Protocol::ENV_RENAME_REQUEST);
+ request.mutable_env_rename_request()->set_env_handle(m_remote_handle);
+ request.mutable_env_rename_request()->set_oldname(oldname);
+ request.mutable_env_rename_request()->set_newname(newname);
+ request.mutable_env_rename_request()->set_flags(flags);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_rename_reply());
+
+ return (reply->env_rename_reply().status());
+}
+
+ham_status_t
+RemoteEnvironment::do_erase_db(uint16_t name, uint32_t flags)
+{
+ Protocol request(Protocol::ENV_ERASE_DB_REQUEST);
+ request.mutable_env_erase_db_request()->set_env_handle(m_remote_handle);
+ request.mutable_env_erase_db_request()->set_name(name);
+ request.mutable_env_erase_db_request()->set_flags(flags);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ ham_assert(reply->has_env_erase_db_reply());
+
+ return (reply->env_erase_db_reply().status());
+}
+
+Transaction *
+RemoteEnvironment::do_txn_begin(const char *name, uint32_t flags)
+{
+ SerializedWrapper request;
+ request.id = kTxnBeginRequest;
+ request.txn_begin_request.env_handle = m_remote_handle;
+ request.txn_begin_request.flags = flags;
+ if (name) {
+ request.txn_begin_request.name.value = (uint8_t *)name;
+ request.txn_begin_request.name.size = strlen(name) + 1;
+ }
+
+ SerializedWrapper reply;
+ perform_request(&request, &reply);
+ ham_assert(reply.id == kTxnBeginReply);
+
+ ham_status_t st = reply.txn_begin_reply.status;
+ if (st)
+ throw Exception(st);
+
+ Transaction *txn = new RemoteTransaction(this, name, flags,
+ reply.txn_begin_reply.txn_handle);
+ m_txn_manager->begin(txn);
+ return (txn);
+}
+
+ham_status_t
+RemoteEnvironment::do_txn_commit(Transaction *txn, uint32_t flags)
+{
+ RemoteTransaction *rtxn = dynamic_cast<RemoteTransaction *>(txn);
+
+ SerializedWrapper request;
+ request.id = kTxnCommitRequest;
+ request.txn_commit_request.txn_handle = rtxn->get_remote_handle();
+ request.txn_commit_request.flags = flags;
+
+ SerializedWrapper reply;
+ perform_request(&request, &reply);
+ ham_assert(reply.id == kTxnCommitReply);
+
+ ham_status_t st = reply.txn_commit_reply.status;
+ if (st)
+ return (st);
+
+ return (m_txn_manager->commit(txn, flags));
+}
+
+ham_status_t
+RemoteEnvironment::do_txn_abort(Transaction *txn, uint32_t flags)
+{
+ RemoteTransaction *rtxn = dynamic_cast<RemoteTransaction *>(txn);
+
+ SerializedWrapper request;
+ request.id = kTxnAbortRequest;
+ request.txn_abort_request.txn_handle = rtxn->get_remote_handle();
+ request.txn_abort_request.flags = flags;
+
+ SerializedWrapper reply;
+ perform_request(&request, &reply);
+ ham_assert(reply.id == kTxnAbortReply);
+ ham_status_t st = reply.txn_abort_reply.status;
+ if (st)
+ return (st);
+
+ return (m_txn_manager->abort(txn, flags));
+}
+
+ham_status_t
+RemoteEnvironment::do_close(uint32_t flags)
+{
+ Protocol request(Protocol::DISCONNECT_REQUEST);
+ request.mutable_disconnect_request()->set_env_handle(m_remote_handle);
+
+ ScopedPtr<Protocol> reply(perform_request(&request));
+
+ // ignore the reply
+
+ m_socket.close();
+ m_remote_handle = 0;
+ return (0);
+}
+
+void
+RemoteEnvironment::do_fill_metrics(ham_env_metrics_t *metrics) const
+{
+ throw Exception(HAM_NOT_IMPLEMENTED);
+}
+
+} // namespace hamsterdb
+
+#endif // HAM_ENABLE_REMOTE
+
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.h
new file mode 100644
index 0000000000..c45fd5b222
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_remote.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_ENV_REMOTE_H
+#define HAM_ENV_REMOTE_H
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1os/socket.h"
+#include "1base/dynamic_array.h"
+#include "2protobuf/protocol.h"
+#include "2protoserde/messages.h"
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+//
+// The Environment implementation for remote file access
+//
+class RemoteEnvironment : public Environment
+{
+ public:
+ // Constructor
+ RemoteEnvironment(EnvironmentConfiguration config);
+
+ // Sends a |request| message with the Google Protocol Buffers API. Blocks
+ // till the reply was fully received. Returns the reply structure.
+ Protocol *perform_request(Protocol *request);
+
+ // Sends |request| message with the builtin Serde API. Blocks till the
+ // reply was fully received. Fills |reply| with the received data.
+ void perform_request(SerializedWrapper *request, SerializedWrapper *reply);
+
+ protected:
+ // Creates a new Environment (ham_env_create)
+ virtual ham_status_t do_create();
+
+ // Opens a new Environment (ham_env_open)
+ virtual ham_status_t do_open();
+
+ // Returns all database names (ham_env_get_database_names)
+ virtual ham_status_t do_get_database_names(uint16_t *names,
+ uint32_t *count);
+
+ // Returns environment parameters and flags (ham_env_get_parameters)
+ virtual ham_status_t do_get_parameters(ham_parameter_t *param);
+
+ // Flushes the environment and its databases to disk (ham_env_flush)
+ virtual ham_status_t do_flush(uint32_t flags);
+
+ // Creates a new database in the environment (ham_env_create_db)
+ virtual ham_status_t do_create_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Opens an existing database in the environment (ham_env_open_db)
+ virtual ham_status_t do_open_db(Database **db,
+ DatabaseConfiguration &config,
+ const ham_parameter_t *param);
+
+ // Renames a database in the Environment (ham_env_rename_db)
+ virtual ham_status_t do_rename_db(uint16_t oldname, uint16_t newname,
+ uint32_t flags);
+
+ // Erases (deletes) a database from the Environment (ham_env_erase_db)
+ virtual ham_status_t do_erase_db(uint16_t name, uint32_t flags);
+
+ // Begins a new transaction (ham_txn_begin)
+ virtual Transaction *do_txn_begin(const char *name, uint32_t flags);
+
+ // Commits a transaction (ham_txn_commit)
+ virtual ham_status_t do_txn_commit(Transaction *txn, uint32_t flags);
+
+ // Commits a transaction (ham_txn_abort)
+ virtual ham_status_t do_txn_abort(Transaction *txn, uint32_t flags);
+
+ // Closes the Environment (ham_env_close)
+ virtual ham_status_t do_close(uint32_t flags);
+
+ // Fills in the current metrics
+ virtual void do_fill_metrics(ham_env_metrics_t *metrics) const;
+
+ private:
+ // the remote handle
+ uint64_t m_remote_handle;
+
+ // the socket
+ Socket m_socket;
+
+ // a buffer to avoid frequent memory allocations
+ ByteArray m_buffer;
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_ENABLE_REMOTE
+
+#endif /* HAM_ENV_REMOTE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4env/env_test.h b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_test.h
new file mode 100644
index 0000000000..0d9fa76cec
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4env/env_test.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: no
+ * @thread_safe: no
+ */
+
+#ifndef HAM_ENV_TEST_H
+#define HAM_ENV_TEST_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4env/env.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class EnvironmentTest
+{
+ public:
+ // Constructor
+ EnvironmentTest(EnvironmentConfiguration &config)
+ : m_config(config) {
+ }
+
+ // Returns the Environment's configuration
+ EnvironmentConfiguration &config() {
+ return (m_config);
+ }
+
+ void set_filename(const std::string &filename) {
+ m_config.filename = filename;
+ }
+
+ private:
+ // Reference to the Environment's configuration
+ EnvironmentConfiguration &m_config;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_ENV_TEST_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn.h b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn.h
new file mode 100644
index 0000000000..e38e6155dc
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn.h
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * The hamsterdb Transaction implementation
+ *
+ * hamsterdb stores Transactions in volatile RAM (with an append-only journal
+ * in case the RAM is lost). Each Transaction and each modification *in* a
+ * Transaction is stored in a complex data structure.
+ *
+ * When a Database is created, it contains a BtreeIndex for persistent
+ * (committed and flushed) data, and a TransactionIndex for active Transactions
+ * and those Transactions which were committed but not yet flushed to disk.
+ * This TransactionTree is implemented as a binary search tree (see rb.h).
+ *
+ * Each node in the TransactionTree is implemented by TransactionNode. Each
+ * node is identified by its database key, and groups all modifications of this
+ * key (of all Transactions!).
+ *
+ * Each modification in the node is implemented by TransactionOperation. There
+ * is one such TransactionOperation for 'insert', 'erase' etc. The
+ * TransactionOperations form two linked lists - one stored in the Transaction
+ * ("all operations from this Transaction") and another one stored in the
+ * TransactionNode ("all operations on the same key").
+ *
+ * All Transactions in an Environment for a linked list, where the tail is
+ * the chronologically newest Transaction and the head is the oldest
+ * (see Transaction::get_newer and Transaction::get_older).
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_TXN_H
+#define HAM_TXN_H
+
+#include "0root/root.h"
+
+#include <string>
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/dynamic_array.h"
+#include "1base/error.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+//
+// A helper structure; ham_txn_t is declared in ham/hamsterdb.h as an
+// opaque C structure, but internally we use a C++ class. The ham_txn_t
+// struct satisfies the C compiler, and internally we just cast the pointers.
+//
+struct ham_txn_t
+{
+ int dummy;
+};
+
+namespace hamsterdb {
+
+struct Context;
+class Environment;
+
+//
+// An abstract base class for a Transaction. Overwritten for local and
+// remote implementations
+//
+class Transaction
+{
+ protected:
+ enum {
+ // Transaction was aborted
+ kStateAborted = 0x10000,
+
+ // Transaction was committed
+ kStateCommitted = 0x20000
+ };
+
+ public:
+ // Constructor; "begins" the Transaction
+ // supported flags: HAM_TXN_READ_ONLY, HAM_TXN_TEMPORARY
+ Transaction(Environment *env, const char *name, uint32_t flags)
+ : m_id(0), m_env(env), m_flags(flags), m_next(0), m_cursor_refcount(0) {
+ if (name)
+ m_name = name;
+ }
+
+ // Destructor
+ virtual ~Transaction() { }
+
+ // Commits the Transaction
+ virtual void commit(uint32_t flags = 0) = 0;
+
+ // Aborts the Transaction
+ virtual void abort(uint32_t flags = 0) = 0;
+
+ // Returns true if the Transaction was aborted
+ bool is_aborted() const {
+ return (m_flags & kStateAborted) != 0;
+ }
+
+ // Returns true if the Transaction was committed
+ bool is_committed() const {
+ return (m_flags & kStateCommitted) != 0;
+ }
+
+ // Returns the unique id of this Transaction
+ uint64_t get_id() const {
+ return (m_id);
+ }
+
+ // Returns the environment pointer
+ Environment *get_env() const {
+ return (m_env);
+ }
+
+ // Returns the txn name
+ const std::string &get_name() const {
+ return (m_name);
+ }
+
+ // Returns the flags
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // Returns the cursor refcount (numbers of Cursors using this Transaction)
+ uint32_t get_cursor_refcount() const {
+ return (m_cursor_refcount);
+ }
+
+ // Increases the cursor refcount (numbers of Cursors using this Transaction)
+ void increase_cursor_refcount() {
+ m_cursor_refcount++;
+ }
+
+ // Decreases the cursor refcount (numbers of Cursors using this Transaction)
+ void decrease_cursor_refcount() {
+ ham_assert(m_cursor_refcount > 0);
+ m_cursor_refcount--;
+ }
+
+ // Returns the memory buffer for the key data.
+ // Used to allocate array in ham_find, ham_cursor_move etc. which is
+ // then returned to the user.
+ ByteArray &key_arena() {
+ return (m_key_arena);
+ }
+
+ // Returns the memory buffer for the record data.
+ // Used to allocate array in ham_find, ham_cursor_move etc. which is
+ // then returned to the user.
+ ByteArray &record_arena() {
+ return (m_record_arena);
+ }
+
+ // Returns the next Transaction in the linked list */
+ Transaction *get_next() const {
+ return (m_next);
+ }
+
+ // Sets the next Transaction in the linked list */
+ void set_next(Transaction *n) {
+ m_next = n;
+ }
+
+ protected:
+ // the id of this Transaction
+ uint64_t m_id;
+
+ // the Environment pointer
+ Environment *m_env;
+
+ // flags for this Transaction
+ uint32_t m_flags;
+
+ // the Transaction name
+ std::string m_name;
+
+ // the linked list of all transactions
+ Transaction *m_next;
+
+ // reference counter for cursors (number of cursors attached to this txn)
+ uint32_t m_cursor_refcount;
+
+ // this is where key->data points to when returning a key to the user
+ ByteArray m_key_arena;
+
+ // this is where record->data points to when returning a record to the user
+ ByteArray m_record_arena;
+
+ private:
+ friend class Journal;
+
+ // Sets the unique id of this Transaction; the journal needs this to patch
+ // in the id when recovering a Transaction
+ void set_id(uint64_t id) {
+ m_id = id;
+ }
+};
+
+
+//
+// An abstract base class for the TransactionManager. Overwritten for local and
+// remote implementations.
+//
+// The TransactionManager is part of the Environment and manages all
+// Transactions.
+//
+class TransactionManager
+{
+ public:
+ // Constructor
+ TransactionManager(Environment *env)
+ : m_env(env), m_oldest_txn(0), m_newest_txn(0) {
+ }
+
+ // Destructor
+ virtual ~TransactionManager() { }
+
+ // Begins a new Transaction
+ virtual void begin(Transaction *txn) = 0;
+
+ // Commits a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t commit(Transaction *txn, uint32_t flags = 0) = 0;
+
+ // Aborts a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t abort(Transaction *txn, uint32_t flags = 0) = 0;
+
+ // Flushes committed (queued) transactions
+ virtual void flush_committed_txns(Context *context = 0) = 0;
+
+ // Returns the oldest transaction which not yet flushed to disk
+ Transaction *get_oldest_txn() {
+ return (m_oldest_txn);
+ }
+
+ // Returns the newest transaction which not yet flushed to disk
+ Transaction *get_newest_txn() {
+ return (m_newest_txn);
+ }
+
+ protected:
+ // Adds a new transaction to this Environment
+ void append_txn_at_tail(Transaction *txn) {
+ if (!m_newest_txn) {
+ ham_assert(m_oldest_txn == 0);
+ m_oldest_txn = txn;
+ m_newest_txn = txn;
+ }
+ else {
+ m_newest_txn->set_next(txn);
+ m_newest_txn = txn;
+ /* if there's no oldest txn (this means: all txn's but the
+ * current one were already flushed) then set this txn as
+ * the oldest txn */
+ if (!m_oldest_txn)
+ m_oldest_txn = txn;
+ }
+ }
+
+ // Removes a transaction from this Environment
+ void remove_txn_from_head(Transaction *txn) {
+ if (m_newest_txn == txn)
+ m_newest_txn = 0;
+
+ ham_assert(m_oldest_txn == txn);
+ m_oldest_txn = txn->get_next();
+ }
+
+ // The Environment which created this TransactionManager
+ Environment *m_env;
+
+ // The head of the transaction list (the oldest transaction)
+ Transaction *m_oldest_txn;
+
+ // The tail of the transaction list (the youngest/newest transaction)
+ Transaction *m_newest_txn;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_TXN_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.cc b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.cc
new file mode 100644
index 0000000000..b91469239f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.cc
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3btree/btree_cursor.h"
+#include "4db/db.h"
+#include "4txn/txn.h"
+#include "4txn/txn_cursor.h"
+#include "4txn/txn_local.h"
+#include "4env/env.h"
+#include "4cursor/cursor.h"
+#include "4context/context.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+void
+TransactionCursor::clone(const TransactionCursor *other)
+{
+ m_coupled_op = 0;
+ m_coupled_next = 0;
+ m_coupled_previous = 0;
+
+ if (!other->is_nil())
+ couple_to_op(other->get_coupled_op());
+}
+
+void
+TransactionCursor::set_to_nil()
+{
+ /* uncoupled cursor? remove from the txn_op structure */
+ if (!is_nil()) {
+ TransactionOperation *op = get_coupled_op();
+ if (op)
+ remove_cursor_from_op(op);
+ m_coupled_op = 0;
+ }
+
+ /* otherwise cursor is already nil */
+}
+
+void
+TransactionCursor::couple_to_op(TransactionOperation *op)
+{
+ set_to_nil();
+ m_coupled_op = op;
+
+ m_coupled_next = op->cursor_list();
+ m_coupled_previous = 0;
+
+ if (op->cursor_list()) {
+ TransactionCursor *old = op->cursor_list();
+ old->m_coupled_previous = this;
+ }
+
+ op->set_cursor_list(this);
+}
+
+ham_status_t
+TransactionCursor::overwrite(Context *context, LocalTransaction *txn,
+ ham_record_t *record)
+{
+ ham_assert(context->txn == txn);
+
+ if (is_nil())
+ return (HAM_CURSOR_IS_NIL);
+
+ TransactionNode *node = m_coupled_op->get_node();
+
+ /* an overwrite is actually an insert w/ HAM_OVERWRITE of the
+ * current key */
+ return (((LocalDatabase *)get_db())->insert_txn(context, node->get_key(),
+ record, HAM_OVERWRITE, this));
+}
+
+ham_status_t
+TransactionCursor::move_top_in_node(TransactionNode *node,
+ TransactionOperation *op, bool ignore_conflicts, uint32_t flags)
+{
+ Transaction *optxn = 0;
+
+ if (!op)
+ op = node->get_newest_op();
+ else
+ goto next;
+
+ while (op) {
+ optxn = op->get_txn();
+ /* only look at ops from the current transaction and from
+ * committed transactions */
+ if (optxn == m_parent->get_txn() || optxn->is_committed()) {
+ /* a normal (overwriting) insert will return this key */
+ if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)) {
+ couple_to_op(op);
+ return (0);
+ }
+ /* retrieve a duplicate key */
+ if (op->get_flags() & TransactionOperation::kInsertDuplicate) {
+ /* the duplicates are handled by the caller. here we only
+ * couple to the first op */
+ couple_to_op(op);
+ return (0);
+ }
+ /* a normal erase will return an error (but we still couple the
+ * cursor because the caller might need to know WHICH key was
+ * deleted!) */
+ if (op->get_flags() & TransactionOperation::kErase) {
+ couple_to_op(op);
+ return (HAM_KEY_ERASED_IN_TXN);
+ }
+ /* everything else is a bug! */
+ ham_assert(op->get_flags() == TransactionOperation::kNop);
+ }
+ else if (optxn->is_aborted())
+ ; /* nop */
+ else if (!ignore_conflicts) {
+ /* we still have to couple, because higher-level functions
+ * will need to know about the op when consolidating the trees */
+ couple_to_op(op);
+ return (HAM_TXN_CONFLICT);
+ }
+
+next:
+ m_parent->set_dupecache_index(0);
+ op = op->get_previous_in_node();
+ }
+
+ return (HAM_KEY_NOT_FOUND);
+}
+
+ham_status_t
+TransactionCursor::move(uint32_t flags)
+{
+ ham_status_t st;
+ TransactionNode *node;
+
+ if (flags & HAM_CURSOR_FIRST) {
+ /* first set cursor to nil */
+ set_to_nil();
+
+ node = get_db()->txn_index()->get_first();
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+ return (move_top_in_node(node, 0, false, flags));
+ }
+ else if (flags & HAM_CURSOR_LAST) {
+ /* first set cursor to nil */
+ set_to_nil();
+
+ node = get_db()->txn_index()->get_last();
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+ return (move_top_in_node(node, 0, false, flags));
+ }
+ else if (flags & HAM_CURSOR_NEXT) {
+ if (is_nil())
+ return (HAM_CURSOR_IS_NIL);
+
+ node = m_coupled_op->get_node();
+
+ ham_assert(!is_nil());
+
+ /* first move to the next key in the current node; if we fail,
+ * then move to the next node. repeat till we've found a key or
+ * till we've reached the end of the tree */
+ while (1) {
+ node = node->get_next_sibling();
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+ st = move_top_in_node(node, 0, true, flags);
+ if (st == HAM_KEY_NOT_FOUND)
+ continue;
+ return (st);
+ }
+ }
+ else if (flags & HAM_CURSOR_PREVIOUS) {
+ if (is_nil())
+ return (HAM_CURSOR_IS_NIL);
+
+ node = m_coupled_op->get_node();
+
+ ham_assert(!is_nil());
+
+ /* first move to the previous key in the current node; if we fail,
+ * then move to the previous node. repeat till we've found a key or
+ * till we've reached the end of the tree */
+ while (1) {
+ node = node->get_previous_sibling();
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+ st = move_top_in_node(node, 0, true, flags);
+ if (st == HAM_KEY_NOT_FOUND)
+ continue;
+ return (st);
+ }
+ }
+ else {
+ ham_assert(!"this flag is not yet implemented");
+ }
+
+ return (0);
+}
+
+ham_status_t
+TransactionCursor::find(ham_key_t *key, uint32_t flags)
+{
+ TransactionNode *node = 0;
+
+ /* first set cursor to nil */
+ set_to_nil();
+
+ /* then lookup the node */
+ if (get_db()->txn_index())
+ node = get_db()->txn_index()->get(key, flags);
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+
+ while (1) {
+ /* and then move to the newest insert*-op */
+ ham_status_t st = move_top_in_node(node, 0, false, 0);
+ if (st != HAM_KEY_ERASED_IN_TXN)
+ return (st);
+
+ /* if the key was erased and approx. matching is enabled, then move
+ * next/prev till we found a valid key. */
+ if (flags & HAM_FIND_GT_MATCH)
+ node = node->get_next_sibling();
+ else if (flags & HAM_FIND_LT_MATCH)
+ node = node->get_previous_sibling();
+ else
+ return (st);
+
+ if (!node)
+ return (HAM_KEY_NOT_FOUND);
+ }
+
+ ham_assert(!"should never reach this");
+ return (0);
+}
+
+void
+TransactionCursor::copy_coupled_key(ham_key_t *key)
+{
+ Transaction *txn = m_parent->get_txn();
+ ham_key_t *source = 0;
+
+ ByteArray *arena = &get_db()->key_arena(txn);
+
+ /* coupled cursor? get key from the txn_op structure */
+ if (!is_nil()) {
+ TransactionNode *node = m_coupled_op->get_node();
+
+ ham_assert(get_db() == node->get_db());
+ source = node->get_key();
+
+ key->size = source->size;
+ if (source->data && source->size) {
+ if (!(key->flags & HAM_KEY_USER_ALLOC)) {
+ arena->resize(source->size);
+ key->data = arena->get_ptr();
+ }
+ memcpy(key->data, source->data, source->size);
+ }
+ else
+ key->data = 0;
+ return;
+ }
+
+ /* otherwise cursor is nil and we cannot return a key */
+ throw Exception(HAM_CURSOR_IS_NIL);
+}
+
+void
+TransactionCursor::copy_coupled_record(ham_record_t *record)
+{
+ ham_record_t *source = 0;
+ Transaction *txn = m_parent->get_txn();
+
+ ByteArray *arena = &get_db()->record_arena(txn);
+
+ /* coupled cursor? get record from the txn_op structure */
+ if (!is_nil()) {
+ source = m_coupled_op->get_record();
+
+ record->size = source->size;
+ if (source->data && source->size) {
+ if (!(record->flags & HAM_RECORD_USER_ALLOC)) {
+ arena->resize(source->size);
+ record->data = arena->get_ptr();
+ }
+ memcpy(record->data, source->data, source->size);
+ }
+ else
+ record->data = 0;
+ return;
+ }
+
+ /* otherwise cursor is nil and we cannot return a key */
+ throw Exception(HAM_CURSOR_IS_NIL);
+}
+
+uint64_t
+TransactionCursor::get_record_size()
+{
+ /* coupled cursor? get record from the txn_op structure */
+ if (!is_nil())
+ return (m_coupled_op->get_record()->size);
+
+ /* otherwise cursor is nil and we cannot return a key */
+ throw Exception(HAM_CURSOR_IS_NIL);
+}
+
+LocalDatabase *
+TransactionCursor::get_db()
+{
+ return (m_parent->get_db());
+}
+
+ham_status_t
+TransactionCursor::test_insert(ham_key_t *key, ham_record_t *record,
+ uint32_t flags)
+{
+ LocalTransaction *txn = dynamic_cast<LocalTransaction *>(m_parent->get_txn());
+ Context context(get_db()->lenv(), txn, get_db());
+
+ return (get_db()->insert_txn(&context, key, record, flags, this));
+}
+
+void
+TransactionCursor::remove_cursor_from_op(TransactionOperation *op)
+{
+ ham_assert(!is_nil());
+
+ if (op->cursor_list() == this) {
+ op->set_cursor_list(m_coupled_next);
+ if (m_coupled_next)
+ m_coupled_next->m_coupled_previous = 0;
+ }
+ else {
+ if (m_coupled_next)
+ m_coupled_next->m_coupled_previous = m_coupled_previous;
+ if (m_coupled_previous)
+ m_coupled_previous->m_coupled_next = m_coupled_next;
+ }
+ m_coupled_next = 0;
+ m_coupled_previous = 0;
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.h b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.h
new file mode 100644
index 0000000000..d2f4462f76
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_cursor.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A cursor which can iterate over transaction nodes and operations
+ *
+ * A Transaction Cursor can walk over Transaction trees (TransactionIndex).
+ *
+ * Transaction Cursors are only used as part of the Cursor structure as defined
+ * in cursor.h. Like all Transaction operations it is in-memory only,
+ * traversing the red-black tree that is implemented in txn.h, and
+ * consolidating multiple operations in a node (i.e. if a Transaction first
+ * overwrites a record, and another transaction then erases the key).
+ *
+ * The Transaction Cursor has two states: either it is coupled to a
+ * Transaction operation (TransactionOperation) or it is unused.
+ *
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_TXN_CURSOR_H
+#define HAM_TXN_CURSOR_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4txn/txn_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+class Cursor;
+struct Context;
+
+//
+// An cursor which can iterate over Transaction nodes
+//
+class TransactionCursor
+{
+ public:
+ // Constructor
+ TransactionCursor(Cursor *parent)
+ : m_parent(parent) {
+ m_coupled_op = 0;
+ m_coupled_next = 0;
+ m_coupled_previous = 0;
+ }
+
+ // Destructor; asserts that the cursor is nil
+ ~TransactionCursor() {
+ ham_assert(is_nil());
+ }
+
+ // Clones another TransactionCursor
+ void clone(const TransactionCursor *other);
+
+ // Returns the parent cursor
+ // TODO this should be private
+ Cursor *get_parent() {
+ return (m_parent);
+ }
+
+ // Couples this cursor to a TransactionOperation structure
+ void couple_to_op(TransactionOperation *op);
+
+ // Returns the pointer to the coupled TransactionOperation
+ TransactionOperation *get_coupled_op() const {
+ return (m_coupled_op);
+ }
+
+ // Sets the cursor to nil
+ void set_to_nil();
+
+ // Returns true if the cursor is nil (does not point to any item)
+ bool is_nil() const {
+ return (m_coupled_op == 0);
+ }
+
+ // Retrieves the key from the current item; creates a deep copy.
+ //
+ // If the cursor is uncoupled, HAM_CURSOR_IS_NIL is returned. this
+ // means that the item was already flushed to the btree, and the caller has
+ // to use the btree lookup function to retrieve the key.
+ void copy_coupled_key(ham_key_t *key);
+
+ // Retrieves the record from the current item; creates a deep copy.
+ //
+ // If the cursor is uncoupled, HAM_CURSOR_IS_NIL will be returned. this
+ // means that the item was already flushed to the btree, and the caller has
+ // to use the btree lookup function to retrieve the record.
+ void copy_coupled_record(ham_record_t *record);
+
+ // Moves the cursor to first, last, previous or next
+ ham_status_t move(uint32_t flags);
+
+ // Overwrites the record of a cursor
+ ham_status_t overwrite(Context *context, LocalTransaction *txn,
+ ham_record_t *record);
+
+ // Looks up an item, places the cursor
+ ham_status_t find(ham_key_t *key, uint32_t flags);
+
+ // Retrieves the record size of the current item
+ uint64_t get_record_size();
+
+ // Returns the pointer to the next cursor in the linked list of coupled
+ // cursors
+ TransactionCursor *get_coupled_next() {
+ return (m_coupled_next);
+ }
+
+ // Closes the cursor
+ void close() {
+ set_to_nil();
+ }
+
+ private:
+ friend struct TxnCursorFixture;
+
+ // Removes this cursor from this TransactionOperation
+ void remove_cursor_from_op(TransactionOperation *op);
+
+ // Inserts an item, places the cursor on the new item.
+ // This function is only used in the unittests.
+ ham_status_t test_insert(ham_key_t *key, ham_record_t *record,
+ uint32_t flags);
+
+ // Returns the database pointer
+ LocalDatabase *get_db();
+
+ // Moves the cursor to the first valid Operation in a Node
+ ham_status_t move_top_in_node(TransactionNode *node,
+ TransactionOperation *op, bool ignore_conflicts,
+ uint32_t flags);
+
+ // The parent cursor
+ Cursor *m_parent;
+
+ // A Cursor can either be coupled or nil ("not in list"). If it's
+ // coupled, it directly points to a TransactionOperation structure.
+ // If it's nil then |m_coupled_op| is null.
+ //
+ // the txn operation to which we're pointing
+ TransactionOperation *m_coupled_op;
+
+ // a double linked list with other cursors that are coupled
+ // to the same Operation
+ TransactionCursor *m_coupled_next, *m_coupled_previous;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_TXN_CURSOR_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_factory.h b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_factory.h
new file mode 100644
index 0000000000..2738f1b4d7
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_factory.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * A factory to create TransactionOperation and TransactionNode instances.
+ *
+ * @exception_safe: strong
+ * @thread_safe: yes
+ */
+
+#ifndef HAM_TXN_FACTORY_H
+#define HAM_TXN_FACTORY_H
+
+#include "0root/root.h"
+
+#include "ham/types.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1mem/mem.h"
+#include "4txn/txn.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct TransactionFactory
+{
+ // Creates a new TransactionOperation
+ static TransactionOperation *create_operation(LocalTransaction *txn,
+ TransactionNode *node, uint32_t flags, uint32_t orig_flags,
+ uint64_t lsn, ham_key_t *key, ham_record_t *record) {
+ TransactionOperation *op;
+ op = Memory::allocate<TransactionOperation>(sizeof(*op)
+ + (record ? record->size : 0)
+ + (key ? key->size : 0));
+ op->initialize(txn, node, flags, orig_flags, lsn, key, record);
+ return (op);
+ }
+
+ // Destroys a TransactionOperation
+ static void destroy_operation(TransactionOperation *op) {
+ op->destroy();
+ }
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_TXN_FACTORY_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.cc b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.cc
new file mode 100644
index 0000000000..8014b6330f
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.cc
@@ -0,0 +1,676 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "3btree/btree_index.h"
+#include "3journal/journal.h"
+#include "4txn/txn_local.h"
+#include "4txn/txn_factory.h"
+#include "4txn/txn_cursor.h"
+#include "4env/env_local.h"
+#include "4cursor/cursor.h"
+#include "4context/context.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+/* stuff for rb.h */
+#ifndef __ssize_t_defined
+typedef signed ssize_t;
+#endif
+#ifndef __cplusplus
+typedef int bool;
+#define true 1
+#define false (!true)
+#endif /* __cpluscplus */
+
+static int
+compare(void *vlhs, void *vrhs)
+{
+ TransactionNode *lhs = (TransactionNode *)vlhs;
+ TransactionNode *rhs = (TransactionNode *)vrhs;
+ LocalDatabase *db = lhs->get_db();
+
+ if (lhs == rhs)
+ return (0);
+
+ ham_key_t *lhskey = lhs->get_key();
+ ham_key_t *rhskey = rhs->get_key();
+ ham_assert(lhskey && rhskey);
+ return (db->btree_index()->compare_keys(lhskey, rhskey));
+}
+
+rb_proto(static, rbt_, TransactionIndex, TransactionNode)
+rb_gen(static, rbt_, TransactionIndex, TransactionNode, node, compare)
+
+void
+TransactionOperation::initialize(LocalTransaction *txn, TransactionNode *node,
+ uint32_t flags, uint32_t orig_flags, uint64_t lsn,
+ ham_key_t *key, ham_record_t *record)
+{
+ memset(this, 0, sizeof(*this));
+
+ m_txn = txn;
+ m_node = node;
+ m_flags = flags;
+ m_lsn = lsn;
+ m_orig_flags = orig_flags;
+
+ /* copy the key data */
+ if (key) {
+ m_key = *key;
+ if (key->size) {
+ m_key.data = &m_data[0];
+ memcpy(m_key.data, key->data, key->size);
+ }
+ }
+
+ /* copy the record data */
+ if (record) {
+ m_record = *record;
+ if (record->size) {
+ m_record.data = &m_data[key ? key->size : 0];
+ memcpy(m_record.data, record->data, record->size);
+ }
+ }
+}
+
+void
+TransactionOperation::destroy()
+{
+ bool delete_node = false;
+
+ /* remove this op from the node */
+ TransactionNode *node = get_node();
+ if (node->get_oldest_op() == this) {
+ /* if the node is empty: remove the node from the tree */
+ // TODO should this be done in here??
+ if (get_next_in_node() == 0) {
+ node->get_db()->txn_index()->remove(node);
+ delete_node = true;
+ }
+ node->set_oldest_op(get_next_in_node());
+ }
+
+ /* remove this operation from the two linked lists */
+ TransactionOperation *next = get_next_in_node();
+ TransactionOperation *prev = get_previous_in_node();
+ if (next)
+ next->set_previous_in_node(prev);
+ if (prev)
+ prev->set_next_in_node(next);
+
+ next = get_next_in_txn();
+ prev = get_previous_in_txn();
+ if (next)
+ next->set_previous_in_txn(prev);
+ if (prev)
+ prev->set_next_in_txn(next);
+
+ if (delete_node)
+ delete node;
+
+ Memory::release(this);
+}
+
+TransactionNode *
+TransactionNode::get_next_sibling()
+{
+ return (rbt_next(get_db()->txn_index(), this));
+}
+
+TransactionNode *
+TransactionNode::get_previous_sibling()
+{
+ return (rbt_prev(get_db()->txn_index(), this));
+}
+
+TransactionNode::TransactionNode(LocalDatabase *db, ham_key_t *key)
+ : m_db(db), m_oldest_op(0), m_newest_op(0), m_key(key)
+{
+ /* make sure that a node with this key does not yet exist */
+ // TODO re-enable this; currently leads to a stack overflow because
+ // TransactionIndex::get() creates a new TransactionNode
+ // ham_assert(TransactionIndex::get(key, 0) == 0);
+}
+
+TransactionNode::~TransactionNode()
+{
+}
+
+TransactionOperation *
+TransactionNode::append(LocalTransaction *txn, uint32_t orig_flags,
+ uint32_t flags, uint64_t lsn, ham_key_t *key,
+ ham_record_t *record)
+{
+ TransactionOperation *op = TransactionFactory::create_operation(txn,
+ this, flags, orig_flags, lsn,
+ key, record);
+
+ /* store it in the chronological list which is managed by the node */
+ if (!get_newest_op()) {
+ ham_assert(get_oldest_op() == 0);
+ set_newest_op(op);
+ set_oldest_op(op);
+ }
+ else {
+ TransactionOperation *newest = get_newest_op();
+ newest->set_next_in_node(op);
+ op->set_previous_in_node(newest);
+ set_newest_op(op);
+ }
+
+ /* store it in the chronological list which is managed by the transaction */
+ if (!txn->get_newest_op()) {
+ ham_assert(txn->get_oldest_op() == 0);
+ txn->set_newest_op(op);
+ txn->set_oldest_op(op);
+ }
+ else {
+ TransactionOperation *newest = txn->get_newest_op();
+ newest->set_next_in_txn(op);
+ op->set_previous_in_txn(newest);
+ txn->set_newest_op(op);
+ }
+
+ // now that an operation is attached make sure that the node no
+ // longer uses the temporary key pointer
+ m_key = 0;
+
+ return (op);
+}
+
+void
+TransactionIndex::store(TransactionNode *node)
+{
+ rbt_insert(this, node);
+}
+
+void
+TransactionIndex::remove(TransactionNode *node)
+{
+#ifdef HAM_DEBUG
+ bool found = false;
+ TransactionNode *n = rbt_first(this);
+ while (n) {
+ if (n == node) {
+ found = true;
+ break;
+ }
+ n = rbt_next(this, n);
+ }
+ ham_assert(found == true);
+#endif
+
+ rbt_remove(this, node);
+}
+
+LocalTransactionManager::LocalTransactionManager(Environment *env)
+ : TransactionManager(env), m_txn_id(0), m_queued_txn_for_flush(0),
+ m_queued_ops_for_flush(0), m_queued_bytes_for_flush(0),
+ m_txn_threshold(kFlushTxnThreshold),
+ m_ops_threshold(kFlushOperationsThreshold),
+ m_bytes_threshold(kFlushBytesThreshold)
+{
+ if (m_env->get_flags() & HAM_FLUSH_WHEN_COMMITTED) {
+ m_txn_threshold = 0;
+ m_ops_threshold = 0;
+ m_bytes_threshold = 0;
+ }
+}
+
+LocalTransaction::LocalTransaction(LocalEnvironment *env, const char *name,
+ uint32_t flags)
+ : Transaction(env, name, flags), m_log_desc(0), m_oldest_op(0),
+ m_newest_op(0), m_op_counter(0), m_accum_data_size(0)
+{
+ LocalTransactionManager *ltm =
+ (LocalTransactionManager *)env->txn_manager();
+ m_id = ltm->get_incremented_txn_id();
+
+ /* append journal entry */
+ if (env->get_flags() & HAM_ENABLE_RECOVERY
+ && env->get_flags() & HAM_ENABLE_TRANSACTIONS
+ && !(flags & HAM_TXN_TEMPORARY)) {
+ env->journal()->append_txn_begin(this, name,
+ env->next_lsn());
+ }
+}
+
+LocalTransaction::~LocalTransaction()
+{
+ free_operations();
+}
+
+void
+LocalTransaction::commit(uint32_t flags)
+{
+ /* are cursors attached to this txn? if yes, fail */
+ if (get_cursor_refcount()) {
+ ham_trace(("Transaction cannot be committed till all attached "
+ "Cursors are closed"));
+ throw Exception(HAM_CURSOR_STILL_OPEN);
+ }
+
+ /* this transaction is now committed! */
+ m_flags |= kStateCommitted;
+}
+
+void
+LocalTransaction::abort(uint32_t flags)
+{
+ /* are cursors attached to this txn? if yes, fail */
+ if (get_cursor_refcount()) {
+ ham_trace(("Transaction cannot be aborted till all attached "
+ "Cursors are closed"));
+ throw Exception(HAM_CURSOR_STILL_OPEN);
+ }
+
+ /* this transaction is now aborted! */
+ m_flags |= kStateAborted;
+
+ /* immediately release memory of the cached operations */
+ free_operations();
+}
+
+void
+LocalTransaction::free_operations()
+{
+ TransactionOperation *n, *op = get_oldest_op();
+
+ while (op) {
+ n = op->get_next_in_txn();
+ TransactionFactory::destroy_operation(op);
+ op = n;
+ }
+
+ set_oldest_op(0);
+ set_newest_op(0);
+}
+
+TransactionIndex::TransactionIndex(LocalDatabase *db)
+ : m_db(db)
+{
+ rbt_new(this);
+}
+
+TransactionIndex::~TransactionIndex()
+{
+ TransactionNode *node;
+
+ while ((node = rbt_last(this))) {
+ remove(node);
+ delete node;
+ }
+
+ // re-initialize the tree
+ rbt_new(this);
+}
+
+TransactionNode *
+TransactionIndex::get(ham_key_t *key, uint32_t flags)
+{
+ TransactionNode *node = 0;
+ int match = 0;
+
+ /* create a temporary node that we can search for */
+ TransactionNode tmp(m_db, key);
+
+ /* search if node already exists - if yes, return it */
+ if ((flags & HAM_FIND_GEQ_MATCH) == HAM_FIND_GEQ_MATCH) {
+ node = rbt_nsearch(this, &tmp);
+ if (node)
+ match = compare(&tmp, node);
+ }
+ else if ((flags & HAM_FIND_LEQ_MATCH) == HAM_FIND_LEQ_MATCH) {
+ node = rbt_psearch(this, &tmp);
+ if (node)
+ match = compare(&tmp, node);
+ }
+ else if (flags & HAM_FIND_GT_MATCH) {
+ node = rbt_search(this, &tmp);
+ if (node)
+ node = node->get_next_sibling();
+ else
+ node = rbt_nsearch(this, &tmp);
+ match = 1;
+ }
+ else if (flags & HAM_FIND_LT_MATCH) {
+ node = rbt_search(this, &tmp);
+ if (node)
+ node = node->get_previous_sibling();
+ else
+ node = rbt_psearch(this, &tmp);
+ match = -1;
+ }
+ else
+ return (rbt_search(this, &tmp));
+
+ /* tree is empty? */
+ if (!node)
+ return (0);
+
+ /* approx. matching: set the key flag */
+ if (match < 0)
+ ham_key_set_intflags(key, (ham_key_get_intflags(key)
+ & ~BtreeKey::kApproximate) | BtreeKey::kLower);
+ else if (match > 0)
+ ham_key_set_intflags(key, (ham_key_get_intflags(key)
+ & ~BtreeKey::kApproximate) | BtreeKey::kGreater);
+
+ return (node);
+}
+
+TransactionNode *
+TransactionIndex::get_first()
+{
+ return (rbt_first(this));
+}
+
+TransactionNode *
+TransactionIndex::get_last()
+{
+ return (rbt_last(this));
+}
+
+void
+TransactionIndex::enumerate(Context *context,
+ TransactionIndex::Visitor *visitor)
+{
+ TransactionNode *node = rbt_first(this);
+
+ while (node) {
+ visitor->visit(context, node);
+ node = rbt_next(this, node);
+ }
+}
+
+struct KeyCounter : public TransactionIndex::Visitor
+{
+ KeyCounter(LocalDatabase *_db, LocalTransaction *_txn, bool _distinct)
+ : counter(0), distinct(_distinct), txn(_txn), db(_db) {
+ }
+
+ void visit(Context *context, TransactionNode *node) {
+ BtreeIndex *be = db->btree_index();
+ TransactionOperation *op;
+
+ /*
+ * look at each tree_node and walk through each operation
+ * in reverse chronological order (from newest to oldest):
+ * - is this op part of an aborted txn? then skip it
+ * - is this op part of a committed txn? then include it
+ * - is this op part of an txn which is still active? then include it
+ * - if a committed txn has erased the item then there's no need
+ * to continue checking older, committed txns of the same key
+ *
+ * !!
+ * if keys are overwritten or a duplicate key is inserted, then
+ * we have to consolidate the btree keys with the txn-tree keys.
+ */
+ op = node->get_newest_op();
+ while (op) {
+ LocalTransaction *optxn = op->get_txn();
+ if (optxn->is_aborted())
+ ; // nop
+ else if (optxn->is_committed() || txn == optxn) {
+ if (op->get_flags() & TransactionOperation::kIsFlushed)
+ ; // nop
+ // if key was erased then it doesn't exist
+ else if (op->get_flags() & TransactionOperation::kErase)
+ return;
+ else if (op->get_flags() & TransactionOperation::kInsert) {
+ counter++;
+ return;
+ }
+ // key exists - include it
+ else if ((op->get_flags() & TransactionOperation::kInsert)
+ || (op->get_flags() & TransactionOperation::kInsertOverwrite)) {
+ // check if the key already exists in the btree - if yes,
+ // we do not count it (it will be counted later)
+ if (HAM_KEY_NOT_FOUND == be->find(context, 0, node->get_key(), 0, 0, 0, 0))
+ counter++;
+ return;
+ }
+ else if (op->get_flags() & TransactionOperation::kInsertDuplicate) {
+ // check if btree has other duplicates
+ if (0 == be->find(context, 0, node->get_key(), 0, 0, 0, 0)) {
+ // yes, there's another one
+ if (distinct)
+ return;
+ counter++;
+ }
+ else {
+ // check if other key is in this node
+ counter++;
+ if (distinct)
+ return;
+ }
+ }
+ else if (!(op->get_flags() & TransactionOperation::kNop)) {
+ ham_assert(!"shouldn't be here");
+ return;
+ }
+ }
+ else { // txn is still active
+ counter++;
+ }
+
+ op = op->get_previous_in_node();
+ }
+ }
+
+ uint64_t counter;
+ bool distinct;
+ LocalTransaction *txn;
+ LocalDatabase *db;
+};
+
+uint64_t
+TransactionIndex::count(Context *context, LocalTransaction *txn, bool distinct)
+{
+ KeyCounter k(m_db, txn, distinct);
+ enumerate(context, &k);
+ return (k.counter);
+}
+
+void
+LocalTransactionManager::begin(Transaction *txn)
+{
+ append_txn_at_tail(txn);
+}
+
+ham_status_t
+LocalTransactionManager::commit(Transaction *htxn, uint32_t flags)
+{
+ LocalTransaction *txn = dynamic_cast<LocalTransaction *>(htxn);
+ Context context(lenv(), txn, 0);
+
+ try {
+ txn->commit(flags);
+
+ /* append journal entry */
+ if (m_env->get_flags() & HAM_ENABLE_RECOVERY
+ && m_env->get_flags() & HAM_ENABLE_TRANSACTIONS
+ && !(txn->get_flags() & HAM_TXN_TEMPORARY))
+ lenv()->journal()->append_txn_commit(txn,
+ lenv()->next_lsn());
+
+ /* flush committed transactions */
+ m_queued_txn_for_flush++;
+ m_queued_ops_for_flush += txn->get_op_counter();
+ m_queued_bytes_for_flush += txn->get_accum_data_size();
+ maybe_flush_committed_txns(&context);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+ham_status_t
+LocalTransactionManager::abort(Transaction *htxn, uint32_t flags)
+{
+ LocalTransaction *txn = dynamic_cast<LocalTransaction *>(htxn);
+ Context context(lenv(), txn, 0);
+
+ try {
+ txn->abort(flags);
+
+ /* append journal entry */
+ if (m_env->get_flags() & HAM_ENABLE_RECOVERY
+ && m_env->get_flags() & HAM_ENABLE_TRANSACTIONS
+ && !(txn->get_flags() & HAM_TXN_TEMPORARY))
+ lenv()->journal()->append_txn_abort(txn,
+ lenv()->next_lsn());
+
+ /* flush committed transactions; while this one was not committed,
+ * we might have cleared the way now to flush other committed
+ * transactions */
+ m_queued_txn_for_flush++;
+
+ /* no need to increment m_queued_{ops,bytes}_for_flush because this
+ * operation does no longer contain any operations */
+ maybe_flush_committed_txns(&context);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+void
+LocalTransactionManager::maybe_flush_committed_txns(Context *context)
+{
+ if (m_queued_txn_for_flush > m_txn_threshold
+ || m_queued_ops_for_flush > m_ops_threshold
+ || m_queued_bytes_for_flush > m_bytes_threshold)
+ flush_committed_txns_impl(context);
+}
+
+void
+LocalTransactionManager::flush_committed_txns(Context *context /* = 0 */)
+{
+ if (!context) {
+ Context new_context(lenv(), 0, 0);
+ flush_committed_txns_impl(&new_context);
+ }
+ else
+ flush_committed_txns_impl(context);
+}
+
+void
+LocalTransactionManager::flush_committed_txns_impl(Context *context)
+{
+ LocalTransaction *oldest;
+ Journal *journal = lenv()->journal();
+ uint64_t highest_lsn = 0;
+
+ ham_assert(context->changeset.is_empty());
+
+ /* always get the oldest transaction; if it was committed: flush
+ * it; if it was aborted: discard it; otherwise return */
+ while ((oldest = (LocalTransaction *)get_oldest_txn())) {
+ if (oldest->is_committed()) {
+ m_queued_ops_for_flush -= oldest->get_op_counter();
+ ham_assert(m_queued_ops_for_flush >= 0);
+ m_queued_bytes_for_flush -= oldest->get_accum_data_size();
+ ham_assert(m_queued_bytes_for_flush >= 0);
+ uint64_t lsn = flush_txn(context, (LocalTransaction *)oldest);
+ if (lsn > highest_lsn)
+ highest_lsn = lsn;
+
+ /* this transaction was flushed! */
+ if (journal && (oldest->get_flags() & HAM_TXN_TEMPORARY) == 0)
+ journal->transaction_flushed(oldest);
+ }
+ else if (oldest->is_aborted()) {
+ ; /* nop */
+ }
+ else
+ break;
+
+ /* it's possible that Transactions were aborted directly, and not through
+ * the TransactionManager (i.e. in Journal::abort_uncommitted_txns).
+ * so don't rely on m_queued_txn_for_flush, it might be zero */
+ if (m_queued_txn_for_flush > 0)
+ m_queued_txn_for_flush--;
+
+ /* now remove the txn from the linked list */
+ remove_txn_from_head(oldest);
+
+ /* and release the memory */
+ delete oldest;
+ }
+
+ /* now flush the changeset and write the modified pages to disk */
+ if (highest_lsn && m_env->get_flags() & HAM_ENABLE_RECOVERY)
+ context->changeset.flush(highest_lsn);
+ else
+ context->changeset.clear();
+
+ ham_assert(context->changeset.is_empty());
+}
+
+uint64_t
+LocalTransactionManager::flush_txn(Context *context, LocalTransaction *txn)
+{
+ TransactionOperation *op = txn->get_oldest_op();
+ TransactionCursor *cursor = 0;
+ uint64_t highest_lsn = 0;
+
+ while (op) {
+ TransactionNode *node = op->get_node();
+
+ if (op->get_flags() & TransactionOperation::kIsFlushed)
+ goto next_op;
+
+ // perform the actual operation in the btree
+ node->get_db()->flush_txn_operation(context, txn, op);
+
+ /*
+ * this op is about to be flushed!
+ *
+ * as a consequence, all (txn)cursors which are coupled to this op
+ * have to be uncoupled, as their parent (btree) cursor was
+ * already coupled to the btree item instead
+ */
+ op->set_flushed();
+next_op:
+ while ((cursor = op->cursor_list())) {
+ Cursor *pc = cursor->get_parent();
+ ham_assert(pc->get_txn_cursor() == cursor);
+ pc->couple_to_btree(); // TODO merge both calls?
+ if (!pc->is_nil(Cursor::kTxn))
+ pc->set_to_nil(Cursor::kTxn);
+ }
+
+ ham_assert(op->get_lsn() > highest_lsn);
+ highest_lsn = op->get_lsn();
+
+ /* continue with the next operation of this txn */
+ op = op->get_next_in_txn();
+ }
+
+ return (highest_lsn);
+}
+
+} // namespace hamsterdb
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.h b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.h
new file mode 100644
index 0000000000..cfb563466a
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_local.h
@@ -0,0 +1,566 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_TXN_LOCAL_H
+#define HAM_TXN_LOCAL_H
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1rb/rb.h"
+#include "4txn/txn.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+class TransactionNode;
+class TransactionIndex;
+class TransactionCursor;
+class LocalTransaction;
+class LocalDatabase;
+class LocalEnvironment;
+
+
+//
+// The TransactionOperation class describes a single operation (i.e.
+// insert or erase) in a Transaction.
+//
+class TransactionOperation
+{
+ public:
+ enum {
+ // a NOP operation (empty)
+ kNop = 0x000000u,
+
+ // txn operation is an insert
+ kInsert = 0x010000u,
+
+ // txn operation is an insert w/ overwrite
+ kInsertOverwrite = 0x020000u,
+
+ // txn operation is an insert w/ duplicate
+ kInsertDuplicate = 0x040000u,
+
+ // txn operation erases the key
+ kErase = 0x080000u,
+
+ // txn operation was already flushed
+ kIsFlushed = 0x100000u
+ };
+
+ // Returns the flags
+ uint32_t get_flags() const {
+ return (m_flags);
+ }
+
+ // This Operation was flushed to disk
+ void set_flushed() {
+ m_flags |= kIsFlushed;
+ }
+
+ // Returns the original flags of ham_insert/ham_cursor_insert/ham_erase...
+ uint32_t get_orig_flags() const {
+ return (m_orig_flags);
+ }
+
+ // Returns the referenced duplicate id
+ uint32_t get_referenced_dupe() const {
+ return (m_referenced_dupe);
+ }
+
+ // Sets the referenced duplicate id
+ void set_referenced_dupe(uint32_t id) {
+ m_referenced_dupe = id;
+ }
+
+ // Returns a pointer to the Transaction of this update
+ LocalTransaction *get_txn() {
+ return (m_txn);
+ }
+
+ // Returns a pointer to the parent node of this update */
+ TransactionNode *get_node() {
+ return (m_node);
+ }
+
+ // Returns the lsn of this operation
+ uint64_t get_lsn() const {
+ return (m_lsn);
+ }
+
+ // Returns the key of this operation
+ ham_key_t *get_key() {
+ return (&m_key);
+ }
+
+ // Returns the record of this operation
+ ham_record_t *get_record() {
+ return (&m_record);
+ }
+
+ // Returns the list of Cursors coupled to this operation
+ TransactionCursor *cursor_list() {
+ return (m_cursor_list);
+ }
+
+ // Sets the list of Cursors coupled to this operation
+ void set_cursor_list(TransactionCursor *cursors) {
+ m_cursor_list = cursors;
+ }
+
+ // Returns the next TransactionOperation which modifies the
+ // same TransactionNode
+ TransactionOperation *get_next_in_node() {
+ return (m_node_next);
+ }
+
+ // Returns the previous TransactionOperation which modifies the
+ // same TransactionNode
+ TransactionOperation *get_previous_in_node() {
+ return (m_node_prev);
+ }
+
+ // Returns the next TransactionOperation in the same Transaction
+ TransactionOperation *get_next_in_txn() {
+ return (m_txn_next);
+ }
+
+ // Returns the previous TransactionOperation in the same Transaction
+ TransactionOperation *get_previous_in_txn() {
+ return (m_txn_prev);
+ }
+
+ private:
+ friend class TransactionNode;
+ friend struct TransactionFactory;
+
+ // Initialization
+ void initialize(LocalTransaction *txn, TransactionNode *node,
+ uint32_t flags, uint32_t orig_flags, uint64_t lsn,
+ ham_key_t *key, ham_record_t *record);
+
+ // Destructor
+ void destroy();
+
+ // Sets the next TransactionOperation which modifies the
+ // same TransactionNode
+ void set_next_in_node(TransactionOperation *next) {
+ m_node_next = next;
+ }
+
+ // Sets the previous TransactionOperation which modifies the
+ // same TransactionNode
+ void set_previous_in_node(TransactionOperation *prev) {
+ m_node_prev = prev;
+ }
+
+ // Sets the next TransactionOperation in the same Transaction
+ void set_next_in_txn(TransactionOperation *next) {
+ m_txn_next = next;
+ }
+
+ // Sets the previous TransactionOperation in the same Transaction
+ void set_previous_in_txn(TransactionOperation *prev) {
+ m_txn_prev = prev;
+ }
+
+ // the Transaction of this operation
+ LocalTransaction *m_txn;
+
+ // the parent node
+ TransactionNode *m_node;
+
+ // flags and type of this operation; defined in this file
+ uint32_t m_flags;
+
+ // the original flags of this operation, used when calling
+ // ham_cursor_insert, ham_insert, ham_erase etc
+ uint32_t m_orig_flags;
+
+ // the referenced duplicate id (if neccessary) - used if this is
+ // i.e. a ham_cursor_erase, ham_cursor_overwrite or ham_cursor_insert
+ // with a DUPLICATE_AFTER/BEFORE flag
+ // this is 1-based (like dupecache-index, which is also 1-based)
+ uint32_t m_referenced_dupe;
+
+ // the log serial number (lsn) of this operation
+ uint64_t m_lsn;
+
+ // a linked list of cursors which are attached to this operation
+ TransactionCursor *m_cursor_list;
+
+ // next in linked list (managed in TransactionNode)
+ TransactionOperation *m_node_next;
+
+ // previous in linked list (managed in TransactionNode)
+ TransactionOperation *m_node_prev;
+
+ // next in linked list (managed in Transaction)
+ TransactionOperation *m_txn_next;
+
+ // previous in linked list (managed in Transaction)
+ TransactionOperation *m_txn_prev;
+
+ // the key which is inserted or overwritten
+ ham_key_t m_key;
+
+ // the record which is inserted or overwritten
+ ham_record_t m_record;
+
+ // Storage for record->data. This saves us one memory allocation.
+ uint8_t m_data[1];
+};
+
+
+//
+// A node in the Transaction Index, used as the node structure in rb.h.
+// Manages a group of TransactionOperation objects which all modify the
+// same key.
+//
+// To avoid chicken-egg problems when inserting a new TransactionNode
+// into the TransactionTree, it is possible to assign a temporary key
+// to this node. However, as soon as an operation is attached to this node,
+// the TransactionNode class will use the key structure in this operation.
+//
+// This basically avoids one memory allocation.
+//
+class TransactionNode
+{
+ public:
+ // Constructor;
+ // The default parameters are required for the compilation of rb.h.
+ // |key| is just a temporary pointer which allows to create a
+ // TransactionNode without further memory allocations/copying. The actual
+ // key is then fetched from |m_oldest_op| as soon as this node is fully
+ // initialized.
+ TransactionNode(LocalDatabase *db = 0, ham_key_t *key = 0);
+
+ // Destructor; removes this node from the tree, unless |dont_insert|
+ // was set to true
+ ~TransactionNode();
+
+ // Returns the database
+ LocalDatabase *get_db() {
+ return (m_db);
+ }
+
+ // Returns the modified key
+ ham_key_t *get_key() {
+ return (m_oldest_op ? m_oldest_op->get_key() : m_key);
+ }
+
+ // Retrieves the next larger sibling of a given node, or NULL if there
+ // is no sibling
+ TransactionNode *get_next_sibling();
+
+ // Retrieves the previous larger sibling of a given node, or NULL if there
+ // is no sibling
+ TransactionNode *get_previous_sibling();
+
+ // Returns the first (oldest) TransactionOperation in this node
+ TransactionOperation *get_oldest_op() {
+ return (m_oldest_op);
+ };
+
+ // Sets the first (oldest) TransactionOperation in this node
+ void set_oldest_op(TransactionOperation *oldest) {
+ m_oldest_op = oldest;
+ }
+
+ // Returns the last (newest) TransactionOperation in this node
+ TransactionOperation *get_newest_op() {
+ return (m_newest_op);
+ };
+
+ // Sets the last (newest) TransactionOperation in this node
+ void set_newest_op(TransactionOperation *newest) {
+ m_newest_op = newest;
+ }
+
+ // Appends an actual operation to this node
+ TransactionOperation *append(LocalTransaction *txn, uint32_t orig_flags,
+ uint32_t flags, uint64_t lsn, ham_key_t *key,
+ ham_record_t *record);
+
+ // red-black tree stub, required for rb.h
+ rb_node(TransactionNode) node;
+
+ private:
+ friend struct TxnFixture;
+
+ // the database - need this to get the compare function
+ LocalDatabase *m_db;
+
+ // the linked list of operations - head is oldest operation
+ TransactionOperation *m_oldest_op;
+
+ // the linked list of operations - tail is newest operation
+ TransactionOperation *m_newest_op;
+
+ // Pointer to the key data; only used as long as there are no operations
+ // attached. Otherwise we have a chicken-egg problem in rb.h.
+ ham_key_t *m_key;
+};
+
+
+//
+// Each Database has a binary tree which stores the current Transaction
+// operations; this tree is implemented in TransactionIndex
+//
+class TransactionIndex
+{
+ public:
+ // Traverses a TransactionIndex; for each node, a callback is executed
+ struct Visitor {
+ virtual void visit(Context *context, TransactionNode *node) = 0;
+ };
+
+ // Constructor
+ TransactionIndex(LocalDatabase *db);
+
+ // Destructor; frees all nodes and their operations
+ ~TransactionIndex();
+
+ // Stores a new TransactionNode in the index
+ void store(TransactionNode *node);
+
+ // Removes a TransactionNode from the index
+ void remove(TransactionNode *node);
+
+ // Visits every node in the TransactionTree
+ void enumerate(Context *context, Visitor *visitor);
+
+ // Returns an opnode for an optree; if a node with this
+ // key already exists then the existing node is returned, otherwise NULL.
+ // |flags| can be HAM_FIND_GEQ_MATCH, HAM_FIND_LEQ_MATCH etc
+ TransactionNode *get(ham_key_t *key, uint32_t flags);
+
+ // Returns the first (= "smallest") node of the tree, or NULL if the
+ // tree is empty
+ TransactionNode *get_first();
+
+ // Returns the last (= "greatest") node of the tree, or NULL if the
+ // tree is empty
+ TransactionNode *get_last();
+
+ // Returns the key count of this index
+ uint64_t count(Context *context, LocalTransaction *txn, bool distinct);
+
+ // private: //TODO re-enable this; currently disabled because rb.h needs it
+ // the Database for all operations in this tree
+ LocalDatabase *m_db;
+
+ // stuff for rb.h
+ TransactionNode *rbt_root;
+ TransactionNode rbt_nil;
+};
+
+
+//
+// A local Transaction
+//
+class LocalTransaction : public Transaction
+{
+ public:
+ // Constructor; "begins" the Transaction
+ // supported flags: HAM_TXN_READ_ONLY, HAM_TXN_TEMPORARY
+ LocalTransaction(LocalEnvironment *env, const char *name, uint32_t flags);
+
+ // Destructor; frees all TransactionOperation structures associated
+ // with this Transaction
+ virtual ~LocalTransaction();
+
+ // Commits the Transaction
+ void commit(uint32_t flags = 0);
+
+ // Aborts the Transaction
+ void abort(uint32_t flags = 0);
+
+ // Returns the first (or 'oldest') TransactionOperation of this Transaction
+ TransactionOperation *get_oldest_op() const {
+ return (m_oldest_op);
+ }
+
+ // Sets the first (or 'oldest') TransactionOperation of this Transaction
+ void set_oldest_op(TransactionOperation *op) {
+ m_oldest_op = op;
+ }
+
+ // Returns the last (or 'newest') TransactionOperation of this Transaction
+ TransactionOperation *get_newest_op() const {
+ return (m_newest_op);
+ }
+
+ // Sets the last (or 'newest') TransactionOperation of this Transaction
+ void set_newest_op(TransactionOperation *op) {
+ if (op) {
+ m_op_counter++;
+ m_accum_data_size += op->get_record()
+ ? op->get_record()->size
+ : 0;
+ m_accum_data_size += op->get_node()->get_key()->size;
+ }
+ m_newest_op = op;
+ }
+
+ // Returns the number of operations attached to this Transaction
+ int get_op_counter() const {
+ return (m_op_counter);
+ }
+
+ // Returns the accumulated data size of all operations
+ int get_accum_data_size() const {
+ return (m_accum_data_size);
+ }
+
+ private:
+ friend class Journal;
+ friend struct TxnFixture;
+ friend struct TxnCursorFixture;
+
+ // Frees the internal structures; releases all the memory. This is
+ // called in the destructor, but also when aborting a Transaction
+ // (before it's deleted by the Environment).
+ void free_operations();
+
+ // Returns the index of the journal's log file descriptor
+ int get_log_desc() const {
+ return (m_log_desc);
+ }
+
+ // Sets the index of the journal's log file descriptor
+ void set_log_desc(int desc) {
+ m_log_desc = desc;
+ }
+
+ // index of the log file descriptor for this transaction [0..1]
+ int m_log_desc;
+
+ // the linked list of operations - head is oldest operation
+ TransactionOperation *m_oldest_op;
+
+ // the linked list of operations - tail is newest operation
+ TransactionOperation *m_newest_op;
+
+ // For counting the operations
+ int m_op_counter;
+
+ // The approximate accumulated memory consumed by this Transaction
+ // (sums up key->size and record->size over all operations)
+ int m_accum_data_size;
+};
+
+
+//
+// A TransactionManager for local Transactions
+//
+class LocalTransactionManager : public TransactionManager
+{
+ enum {
+ // flush if this limit is exceeded
+ kFlushTxnThreshold = 64,
+
+ // flush if this limit is exceeded
+ kFlushOperationsThreshold = kFlushTxnThreshold * 20,
+
+ // flush if this limit is exceeded
+ kFlushBytesThreshold = 1024 * 1024 // 1 mb - same as journal buffer
+ };
+
+ public:
+ // Constructor
+ LocalTransactionManager(Environment *env);
+
+ // Begins a new Transaction
+ virtual void begin(Transaction *txn);
+
+ // Commits a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t commit(Transaction *txn, uint32_t flags = 0);
+
+ // Aborts a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t abort(Transaction *txn, uint32_t flags = 0);
+
+ // Flushes committed (queued) transactions
+ virtual void flush_committed_txns(Context *context = 0);
+
+ // Increments the global transaction ID and returns the new value.
+ uint64_t get_incremented_txn_id() {
+ return (++m_txn_id);
+ }
+
+ // Returns the current transaction ID; only for testing!
+ uint64_t test_get_txn_id() const {
+ return (m_txn_id);
+ }
+
+ // Sets the current transaction ID; used by the Journal to
+ // reset the original txn id during recovery.
+ void set_txn_id(uint64_t id) {
+ m_txn_id = id;
+ }
+
+ private:
+ void flush_committed_txns_impl(Context *context);
+
+ // Flushes a single committed Transaction; returns the lsn of the
+ // last operation in this transaction
+ uint64_t flush_txn(Context *context, LocalTransaction *txn);
+
+ // Casts m_env to a LocalEnvironment
+ LocalEnvironment *lenv() {
+ return ((LocalEnvironment *)m_env);
+ }
+
+ // Flushes committed transactions if there are enough committed
+ // transactions waiting to be flushed, or if other conditions apply
+ void maybe_flush_committed_txns(Context *context);
+
+ // The current transaction ID
+ uint64_t m_txn_id;
+
+ // Number of Transactions waiting to be flushed
+ int m_queued_txn_for_flush;
+
+ // Combined number of Operations in these transactions waiting to be flushed
+ int m_queued_ops_for_flush;
+
+ // Approx. memory consumption of all these operations in the flush queue
+ int m_queued_bytes_for_flush;
+
+ // Threshold for transactio queue
+ int m_txn_threshold;
+
+ // Threshold for transactio queue
+ int m_ops_threshold;
+
+ // Threshold for transactio queue
+ int m_bytes_threshold;
+};
+
+} // namespace hamsterdb
+
+#endif /* HAM_TXN_LOCAL_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.cc b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.cc
new file mode 100644
index 0000000000..2d4403b077
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.cc
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+#include <string.h>
+
+// Always verify that a file of level N does not include headers > N!
+#include "2protobuf/protocol.h"
+#include "4txn/txn_remote.h"
+#include "4env/env_remote.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+RemoteTransaction::RemoteTransaction(Environment *env, const char *name,
+ uint32_t flags, uint64_t remote_handle)
+ : Transaction(env, name, flags), m_remote_handle(remote_handle)
+{
+}
+
+void
+RemoteTransaction::commit(uint32_t flags)
+{
+ /* There's nothing else to do for this Transaction, therefore set it
+ * to 'aborted' (although it was committed) */
+ m_flags |= kStateAborted;
+}
+
+void
+RemoteTransaction::abort(uint32_t flags)
+{
+ /* this transaction is now aborted! */
+ m_flags |= kStateAborted;
+}
+
+void
+RemoteTransactionManager::begin(Transaction *txn)
+{
+ append_txn_at_tail(txn);
+}
+
+ham_status_t
+RemoteTransactionManager::commit(Transaction *txn, uint32_t flags)
+{
+ try {
+ txn->commit(flags);
+
+ /* "flush" (remove) committed and aborted transactions */
+ flush_committed_txns();
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+ham_status_t
+RemoteTransactionManager::abort(Transaction *txn, uint32_t flags)
+{
+ try {
+ txn->abort(flags);
+
+ /* "flush" (remove) committed and aborted transactions */
+ flush_committed_txns();
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+ return (0);
+}
+
+void
+RemoteTransactionManager::flush_committed_txns(Context *context /* = 0 */)
+{
+ Transaction *oldest;
+
+ while ((oldest = get_oldest_txn())) {
+ if (oldest->is_committed() || oldest->is_aborted()) {
+ remove_txn_from_head(oldest);
+ delete oldest;
+ }
+ else
+ return;
+ }
+}
+
+} // namespace hamsterdb
+
+#endif // HAM_ENABLE_REMOTE
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.h b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.h
new file mode 100644
index 0000000000..4c7d6f46e5
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/4txn/txn_remote.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @exception_safe: unknown
+ * @thread_safe: unknown
+ */
+
+#ifndef HAM_TXN_REMOTE_H
+#define HAM_TXN_REMOTE_H
+
+#ifdef HAM_ENABLE_REMOTE
+
+#include "0root/root.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "4txn/txn.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+namespace hamsterdb {
+
+struct Context;
+
+//
+// A remote Transaction
+//
+class RemoteTransaction : public Transaction
+{
+ public:
+ // Constructor; "begins" the Transaction
+ // supported flags: HAM_TXN_READ_ONLY, HAM_TXN_TEMPORARY
+ RemoteTransaction(Environment *env, const char *name, uint32_t flags,
+ uint64_t remote_handle);
+
+ // Commits the Transaction
+ virtual void commit(uint32_t flags = 0);
+
+ // Aborts the Transaction
+ virtual void abort(uint32_t flags = 0);
+
+ // Returns the remote Transaction handle
+ uint64_t get_remote_handle() const {
+ return (m_remote_handle);
+ }
+
+ private:
+ // The remote Transaction handle
+ uint64_t m_remote_handle;
+};
+
+
+//
+// A TransactionManager for remote Transactions
+//
+class RemoteTransactionManager : public TransactionManager
+{
+ public:
+ // Constructor
+ RemoteTransactionManager(Environment *env)
+ : TransactionManager(env) {
+ }
+
+ // Begins a new Transaction
+ virtual void begin(Transaction *txn);
+
+ // Commits a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t commit(Transaction *txn, uint32_t flags = 0);
+
+ // Aborts a Transaction; the derived subclass has to take care of
+ // flushing and/or releasing memory
+ virtual ham_status_t abort(Transaction *txn, uint32_t flags = 0);
+
+ // Flushes committed (queued) transactions
+ virtual void flush_committed_txns(Context *context = 0);
+};
+
+} // namespace hamsterdb
+
+#endif // HAM_ENABLE_REMOTE
+
+#endif /* HAM_TXN_REMOTE_H */
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hamsterdb.cc b/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hamsterdb.cc
new file mode 100644
index 0000000000..ed366ed374
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hamsterdb.cc
@@ -0,0 +1,1633 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "ham/hamsterdb.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "1base/dynamic_array.h"
+#include "1mem/mem.h"
+#include "2config/db_config.h"
+#include "2config/env_config.h"
+#include "2page/page.h"
+#ifdef HAM_ENABLE_REMOTE
+# include "2protobuf/protocol.h"
+#endif
+#include "2device/device.h"
+#include "3btree/btree_stats.h"
+#include "3blob_manager/blob_manager.h"
+#include "3btree/btree_index.h"
+#include "3btree/btree_cursor.h"
+#include "4cursor/cursor.h"
+#include "4db/db.h"
+#include "4env/env.h"
+#include "4env/env_header.h"
+#include "4env/env_local.h"
+#include "4env/env_remote.h"
+#include "4txn/txn.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+/* return true if the filename is for a local file */
+static bool
+filename_is_local(const char *filename)
+{
+ return (!filename || strstr(filename, "ham://") != filename);
+}
+
+ham_status_t
+ham_txn_begin(ham_txn_t **htxn, ham_env_t *henv, const char *name,
+ void *, uint32_t flags)
+{
+ Transaction **ptxn = (Transaction **)htxn;
+
+ if (!ptxn) {
+ ham_trace(("parameter 'txn' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *ptxn = 0;
+
+ if (!henv) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Environment *env = (Environment *)henv;
+
+ return (env->txn_begin(ptxn, name, flags));
+}
+
+HAM_EXPORT const char *
+ham_txn_get_name(ham_txn_t *htxn)
+{
+ Transaction *txn = (Transaction *)htxn;
+ if (!txn)
+ return (0);
+
+ const std::string &name = txn->get_env()->txn_get_name(txn);
+ return (name.empty() ? 0 : name.c_str());
+}
+
+ham_status_t
+ham_txn_commit(ham_txn_t *htxn, uint32_t flags)
+{
+ Transaction *txn = (Transaction *)htxn;
+ if (!txn) {
+ ham_trace(("parameter 'txn' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Environment *env = txn->get_env();
+
+ return (env->txn_commit(txn, flags));
+}
+
+ham_status_t
+ham_txn_abort(ham_txn_t *htxn, uint32_t flags)
+{
+ Transaction *txn = (Transaction *)htxn;
+ if (!txn) {
+ ham_trace(("parameter 'txn' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Environment *env = txn->get_env();
+
+ return (env->txn_abort(txn, flags));
+}
+
+const char * HAM_CALLCONV
+ham_strerror(ham_status_t result)
+{
+ switch (result) {
+ case HAM_SUCCESS:
+ return ("Success");
+ case HAM_INV_KEY_SIZE:
+ return ("Invalid key size");
+ case HAM_INV_RECORD_SIZE:
+ return ("Invalid record size");
+ case HAM_INV_PAGESIZE:
+ return ("Invalid page size");
+ case HAM_OUT_OF_MEMORY:
+ return ("Out of memory");
+ case HAM_INV_PARAMETER:
+ return ("Invalid parameter");
+ case HAM_INV_FILE_HEADER:
+ return ("Invalid database file header");
+ case HAM_INV_FILE_VERSION:
+ return ("Invalid database file version");
+ case HAM_KEY_NOT_FOUND:
+ return ("Key not found");
+ case HAM_DUPLICATE_KEY:
+ return ("Duplicate key");
+ case HAM_INTEGRITY_VIOLATED:
+ return ("Internal integrity violated");
+ case HAM_INTERNAL_ERROR:
+ return ("Internal error");
+ case HAM_WRITE_PROTECTED:
+ return ("Database opened in read-only mode");
+ case HAM_BLOB_NOT_FOUND:
+ return ("Data blob not found");
+ case HAM_IO_ERROR:
+ return ("System I/O error");
+ case HAM_NOT_IMPLEMENTED:
+ return ("Operation not implemented");
+ case HAM_FILE_NOT_FOUND:
+ return ("File not found");
+ case HAM_WOULD_BLOCK:
+ return ("Operation would block");
+ case HAM_NOT_READY:
+ return ("Object was not initialized correctly");
+ case HAM_CURSOR_STILL_OPEN:
+ return ("Cursor must be closed prior to Transaction abort/commit");
+ case HAM_FILTER_NOT_FOUND:
+ return ("Record filter or file filter not found");
+ case HAM_TXN_CONFLICT:
+ return ("Operation conflicts with another Transaction");
+ case HAM_TXN_STILL_OPEN:
+ return ("Database cannot be closed because it is modified in a "
+ "Transaction");
+ case HAM_CURSOR_IS_NIL:
+ return ("Cursor points to NIL");
+ case HAM_DATABASE_NOT_FOUND:
+ return ("Database not found");
+ case HAM_DATABASE_ALREADY_EXISTS:
+ return ("Database name already exists");
+ case HAM_DATABASE_ALREADY_OPEN:
+ return ("Database already open, or: Database handle "
+ "already initialized");
+ case HAM_ENVIRONMENT_ALREADY_OPEN:
+ return ("Environment already open, or: Environment handle "
+ "already initialized");
+ case HAM_LIMITS_REACHED:
+ return ("Database limits reached");
+ case HAM_ALREADY_INITIALIZED:
+ return ("Object was already initialized");
+ case HAM_NEED_RECOVERY:
+ return ("Database needs recovery");
+ case HAM_LOG_INV_FILE_HEADER:
+ return ("Invalid log file header");
+ case HAM_NETWORK_ERROR:
+ return ("Remote I/O error/Network error");
+ default:
+ return ("Unknown error");
+ }
+}
+
+/**
+ * Prepares a @ref ham_key_t structure for returning key data in.
+ *
+ * This function checks whether the @ref ham_key_t structure has been
+ * properly initialized by the user and resets all internal used elements.
+ *
+ * @return true when the @a key structure has been initialized correctly
+ * before.
+ *
+ * @return false when the @a key structure has @e not been initialized
+ * correctly before.
+ */
+static inline bool
+__prepare_key(ham_key_t *key)
+{
+ if (unlikely(key->size && !key->data)) {
+ ham_trace(("key->size != 0, but key->data is NULL"));
+ return (false);
+ }
+ if (unlikely(key->flags != 0 && key->flags != HAM_KEY_USER_ALLOC)) {
+ ham_trace(("invalid flag in key->flags"));
+ return (false);
+ }
+ key->_flags = 0;
+ return (true);
+}
+
+/**
+ * Prepares a @ref ham_record_t structure for returning record data in.
+ *
+ * This function checks whether the @ref ham_record_t structure has been
+ * properly initialized by the user and resets all internal used elements.
+ *
+ * @return true when the @a record structure has been initialized
+ * correctly before.
+ *
+ * @return false when the @a record structure has @e not been
+ * initialized correctly before.
+ */
+static inline bool
+__prepare_record(ham_record_t *record)
+{
+ if (unlikely(record->size && !record->data)) {
+ ham_trace(("record->size != 0, but record->data is NULL"));
+ return false;
+ }
+ if (unlikely(record->flags & HAM_DIRECT_ACCESS))
+ record->flags &= ~HAM_DIRECT_ACCESS;
+ if (unlikely(record->flags != 0 && record->flags != HAM_RECORD_USER_ALLOC)) {
+ ham_trace(("invalid flag in record->flags"));
+ return (false);
+ }
+ return (true);
+}
+
+void HAM_CALLCONV
+ham_get_version(uint32_t *major, uint32_t *minor, uint32_t *revision)
+{
+ if (major)
+ *major = HAM_VERSION_MAJ;
+ if (minor)
+ *minor = HAM_VERSION_MIN;
+ if (revision)
+ *revision = HAM_VERSION_REV;
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_create(ham_env_t **henv, const char *filename,
+ uint32_t flags, uint32_t mode, const ham_parameter_t *param)
+{
+ EnvironmentConfiguration config;
+ config.filename = filename ? filename : "";
+ config.file_mode = mode;
+
+ if (!henv) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *henv = 0;
+
+ /* creating a file in READ_ONLY mode? doesn't make sense */
+ if (flags & HAM_READ_ONLY) {
+ ham_trace(("cannot create a file in read-only mode"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* in-memory? recovery is not possible */
+ if ((flags & HAM_IN_MEMORY) && (flags & HAM_ENABLE_RECOVERY)) {
+ ham_trace(("combination of HAM_IN_MEMORY and HAM_ENABLE_RECOVERY "
+ "not allowed"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (flags & HAM_ENABLE_CRC32) {
+ ham_trace(("Crc32 is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ }
+
+ /* HAM_ENABLE_TRANSACTIONS implies HAM_ENABLE_RECOVERY, unless explicitly
+ * disabled */
+ if ((flags & HAM_ENABLE_TRANSACTIONS) && !(flags & HAM_DISABLE_RECOVERY))
+ flags |= HAM_ENABLE_RECOVERY;
+
+ /* flag HAM_AUTO_RECOVERY implies HAM_ENABLE_RECOVERY */
+ if (flags & HAM_AUTO_RECOVERY)
+ flags |= HAM_ENABLE_RECOVERY;
+
+ /* in-memory with Transactions? disable recovery */
+ if (flags & HAM_IN_MEMORY)
+ flags &= ~HAM_ENABLE_RECOVERY;
+
+ if (param) {
+ for (; param->name; param++) {
+ switch (param->name) {
+ case HAM_PARAM_JOURNAL_COMPRESSION:
+ ham_trace(("Journal compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_CACHE_SIZE:
+ if (flags & HAM_IN_MEMORY && param->value != 0) {
+ ham_trace(("combination of HAM_IN_MEMORY and cache size != 0 "
+ "not allowed"));
+ return (HAM_INV_PARAMETER);
+ }
+ /* don't allow cache limits with unlimited cache */
+ if (flags & HAM_CACHE_UNLIMITED && param->value != 0) {
+ ham_trace(("combination of HAM_CACHE_UNLIMITED and cache size != 0 "
+ "not allowed"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (param->value > 0)
+ config.cache_size_bytes = (size_t)param->value;
+ break;
+ case HAM_PARAM_PAGE_SIZE:
+ if (param->value != 1024 && param->value % 2048 != 0) {
+ ham_trace(("invalid page size - must be 1024 or a multiple of 2048"));
+ return (HAM_INV_PAGESIZE);
+ }
+ if (param->value > 0)
+ config.page_size_bytes = (uint32_t)param->value;
+ break;
+ case HAM_PARAM_FILE_SIZE_LIMIT:
+ if (param->value > 0)
+ config.file_size_limit_bytes = (size_t)param->value;
+ break;
+ case HAM_PARAM_JOURNAL_SWITCH_THRESHOLD:
+ config.journal_switch_threshold = (uint32_t)param->value;
+ break;
+ case HAM_PARAM_LOG_DIRECTORY:
+ config.log_filename = (const char *)param->value;
+ break;
+ case HAM_PARAM_NETWORK_TIMEOUT_SEC:
+ config.remote_timeout_sec = (uint32_t)param->value;
+ break;
+ case HAM_PARAM_ENCRYPTION_KEY:
+ ham_trace(("Encryption is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_POSIX_FADVISE:
+ config.posix_advice = (int)param->value;
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)param->name));
+ return (HAM_INV_PARAMETER);
+ }
+ }
+ }
+
+ if (config.filename.empty() && !(flags & HAM_IN_MEMORY)) {
+ ham_trace(("filename is missing"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ config.flags = flags;
+
+ /*
+ * make sure that max_databases actually fit in a header
+ * page!
+ * leave at least 128 bytes for other header data
+ */
+ config.max_databases = config.page_size_bytes
+ - sizeof(PEnvironmentHeader) - 128;
+ config.max_databases /= sizeof(PBtreeHeader);
+
+ ham_status_t st = 0;
+ Environment *env = 0;
+
+ if (filename_is_local(config.filename.c_str())) {
+ env = new LocalEnvironment(config);
+ }
+ else {
+#ifndef HAM_ENABLE_REMOTE
+ return (HAM_NOT_IMPLEMENTED);
+#else // HAM_ENABLE_REMOTE
+ env = new RemoteEnvironment(config);
+#endif
+ }
+
+#ifdef HAM_ENABLE_REMOTE
+ atexit(Protocol::shutdown);
+#endif
+
+ /* and finish the initialization of the Environment */
+ st = env->create();
+
+ /* flush the environment to make sure that the header page is written
+ * to disk TODO required?? */
+ if (st == 0)
+ st = env->flush(0);
+
+ if (st) {
+ env->close(HAM_AUTO_CLEANUP);
+ delete env;
+ return (st);
+ }
+
+ *henv = (ham_env_t *)env;
+ return (0);
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_create_db(ham_env_t *henv, ham_db_t **hdb, uint16_t db_name,
+ uint32_t flags, const ham_parameter_t *param)
+{
+ Environment *env = (Environment *)henv;
+ DatabaseConfiguration config;
+
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *hdb = 0;
+
+ if (!db_name || (db_name >= 0xf000)) {
+ ham_trace(("invalid database name"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ config.db_name = db_name;
+ config.flags = flags;
+
+ return (env->create_db((Database **)hdb, config, param));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_open_db(ham_env_t *henv, ham_db_t **hdb, uint16_t db_name,
+ uint32_t flags, const ham_parameter_t *param)
+{
+ Environment *env = (Environment *)henv;
+ DatabaseConfiguration config;
+
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *hdb = 0;
+
+ if (!db_name) {
+ ham_trace(("parameter 'db_name' must not be 0"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (db_name >= 0xf000) {
+ ham_trace(("database name must be lower than 0xf000"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (env->get_flags() & HAM_IN_MEMORY) {
+ ham_trace(("cannot open a Database in an In-Memory Environment"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ config.flags = flags;
+ config.db_name = db_name;
+
+ return (env->open_db((Database **)hdb, config, param));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_open(ham_env_t **henv, const char *filename, uint32_t flags,
+ const ham_parameter_t *param)
+{
+ EnvironmentConfiguration config;
+ config.filename = filename ? filename : "";
+
+ if (!henv) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ *henv = 0;
+
+ /* cannot open an in-memory-db */
+ if (flags & HAM_IN_MEMORY) {
+ ham_trace(("cannot open an in-memory database"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* HAM_ENABLE_DUPLICATE_KEYS has to be specified in ham_env_create_db,
+ * not ham_env_open */
+ if (flags & HAM_ENABLE_DUPLICATE_KEYS) {
+ ham_trace(("invalid flag HAM_ENABLE_DUPLICATE_KEYS (only allowed when "
+ "creating a database"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (flags & HAM_ENABLE_CRC32) {
+ ham_trace(("Crc32 is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ }
+
+ /* HAM_ENABLE_TRANSACTIONS implies HAM_ENABLE_RECOVERY, unless explicitly
+ * disabled */
+ if ((flags & HAM_ENABLE_TRANSACTIONS) && !(flags & HAM_DISABLE_RECOVERY))
+ flags |= HAM_ENABLE_RECOVERY;
+
+ /* flag HAM_AUTO_RECOVERY implies HAM_ENABLE_RECOVERY */
+ if (flags & HAM_AUTO_RECOVERY)
+ flags |= HAM_ENABLE_RECOVERY;
+
+ if (config.filename.empty() && !(flags & HAM_IN_MEMORY)) {
+ ham_trace(("filename is missing"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (param) {
+ for (; param->name; param++) {
+ switch (param->name) {
+ case HAM_PARAM_JOURNAL_COMPRESSION:
+ ham_trace(("Journal compression is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_CACHE_SIZE:
+ /* don't allow cache limits with unlimited cache */
+ if (flags & HAM_CACHE_UNLIMITED && param->value != 0) {
+ ham_trace(("combination of HAM_CACHE_UNLIMITED and cache size != 0 "
+ "not allowed"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (param->value > 0)
+ config.cache_size_bytes = param->value;
+ break;
+ case HAM_PARAM_FILE_SIZE_LIMIT:
+ if (param->value > 0)
+ config.file_size_limit_bytes = (size_t)param->value;
+ break;
+ case HAM_PARAM_JOURNAL_SWITCH_THRESHOLD:
+ config.journal_switch_threshold = (uint32_t)param->value;
+ break;
+ case HAM_PARAM_LOG_DIRECTORY:
+ config.log_filename = (const char *)param->value;
+ break;
+ case HAM_PARAM_NETWORK_TIMEOUT_SEC:
+ config.remote_timeout_sec = (uint32_t)param->value;
+ break;
+ case HAM_PARAM_ENCRYPTION_KEY:
+ ham_trace(("Encryption is only available in hamsterdb pro"));
+ return (HAM_NOT_IMPLEMENTED);
+ case HAM_PARAM_POSIX_FADVISE:
+ config.posix_advice = (int)param->value;
+ break;
+ default:
+ ham_trace(("unknown parameter %d", (int)param->name));
+ return (HAM_INV_PARAMETER);
+ }
+ }
+ }
+
+ config.flags = flags;
+
+ ham_status_t st = 0;
+ Environment *env = 0;
+
+ if (filename_is_local(config.filename.c_str())) {
+ env = new LocalEnvironment(config);
+ }
+ else {
+#ifndef HAM_ENABLE_REMOTE
+ return (HAM_NOT_IMPLEMENTED);
+#else // HAM_ENABLE_REMOTE
+ env = new RemoteEnvironment(config);
+#endif
+ }
+
+#ifdef HAM_ENABLE_REMOTE
+ atexit(Protocol::shutdown);
+#endif
+
+ /* and finish the initialization of the Environment */
+ st = env->open();
+
+ if (st) {
+ (void)env->close(HAM_AUTO_CLEANUP);
+ delete env;
+ return (st);
+ }
+
+ *henv = (ham_env_t *)env;
+ return (0);
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_rename_db(ham_env_t *henv, uint16_t oldname, uint16_t newname,
+ uint32_t flags)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (!oldname) {
+ ham_trace(("parameter 'oldname' must not be 0"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!newname) {
+ ham_trace(("parameter 'newname' must not be 0"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (newname >= 0xf000) {
+ ham_trace(("parameter 'newname' must be lower than 0xf000"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* no need to do anything if oldname==newname */
+ if (oldname == newname)
+ return (0);
+
+ /* rename the database */
+ return (env->rename_db(oldname, newname, flags));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_erase_db(ham_env_t *henv, uint16_t name, uint32_t flags)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (!name) {
+ ham_trace(("parameter 'name' must not be 0"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* erase the database */
+ return (env->erase_db(name, flags));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_get_database_names(ham_env_t *henv, uint16_t *names, uint32_t *count)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (!names) {
+ ham_trace(("parameter 'names' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!count) {
+ ham_trace(("parameter 'count' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* get all database names */
+ return (env->get_database_names(names, count));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_env_get_parameters(ham_env_t *henv, ham_parameter_t *param)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (!param) {
+ ham_trace(("parameter 'param' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* get the parameters */
+ return (env->get_parameters(param));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_flush(ham_env_t *henv, uint32_t flags)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (flags && flags != HAM_FLUSH_COMMITTED_TRANSACTIONS) {
+ ham_trace(("parameter 'flags' is unused, set to 0"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ /* flush the Environment */
+ return (env->flush(flags));
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_close(ham_env_t *henv, uint32_t flags)
+{
+ ham_status_t st;
+ Environment *env = (Environment *)henv;
+
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ try {
+ /* close the environment */
+ st = env->close(flags);
+ if (st)
+ return (st);
+
+ delete env;
+ return (0);
+ }
+ catch (Exception &ex) {
+ return (ex.code);
+ }
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_get_parameters(ham_db_t *hdb, ham_parameter_t *param)
+{
+ Database *db = (Database *)hdb;
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (!param) {
+ ham_trace(("parameter 'param' must not be NULL"));
+ return HAM_INV_PARAMETER;
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ /* get the parameters */
+ return (db->set_error(db->get_parameters(param)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_get_error(ham_db_t *hdb)
+{
+ Database *db = (Database *)hdb;
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (0);
+ }
+
+ ScopedLock lock;
+ if (db->get_env())
+ lock = ScopedLock(db->get_env()->mutex());
+
+ return (db->get_error());
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_set_compare_func(ham_db_t *hdb, ham_compare_func_t foo)
+{
+ Database *db = (Database *)hdb;
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!foo) {
+ ham_trace(("function pointer must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ LocalDatabase *ldb = dynamic_cast<LocalDatabase *>(db);
+ if (!ldb) {
+ ham_trace(("operation not possible for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(ldb->get_env()->mutex());
+
+ /* set the compare functions */
+ return (ldb->set_error(ldb->set_compare_func(foo)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_find(ham_db_t *hdb, ham_txn_t *htxn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+ Environment *env;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ env = db->get_env();
+
+ ScopedLock lock(env->mutex());
+
+ if (!key) {
+ ham_trace(("parameter 'key' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!record) {
+ ham_trace(("parameter 'record' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_PREPEND) {
+ ham_trace(("flag HAM_HINT_PREPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_APPEND) {
+ ham_trace(("flag HAM_HINT_APPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DIRECT_ACCESS)
+ && !(env->get_flags() & HAM_IN_MEMORY)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is only allowed in "
+ "In-Memory Databases"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DIRECT_ACCESS)
+ && (env->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is not allowed in "
+ "combination with Transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (db->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed in combination with "
+ "transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ /* record number: make sure that we have a valid key structure */
+ if ((db->get_flags() & HAM_RECORD_NUMBER32) && !key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((db->get_flags() & HAM_RECORD_NUMBER64) && !key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ if (!__prepare_key(key) || !__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ return (db->set_error(db->find(0, txn, key, record, flags)));
+}
+
+HAM_EXPORT int HAM_CALLCONV
+ham_key_get_approximate_match_type(ham_key_t *key)
+{
+ if (key && (ham_key_get_intflags(key) & BtreeKey::kApproximate)) {
+ int rv = (ham_key_get_intflags(key) & BtreeKey::kLower) ? -1 : +1;
+ return (rv);
+ }
+
+ return (0);
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_insert(ham_db_t *hdb, ham_txn_t *htxn, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+ Environment *env;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return HAM_INV_PARAMETER;
+ }
+ env = db->get_env();
+
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(env->mutex());
+
+ if (!key) {
+ ham_trace(("parameter 'key' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!record) {
+ ham_trace(("parameter 'record' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_APPEND) {
+ ham_trace(("flags HAM_HINT_APPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_PREPEND) {
+ ham_trace(("flags HAM_HINT_PREPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (db->get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot insert in a read-only database"));
+ return (db->set_error(HAM_WRITE_PROTECTED));
+ }
+ if ((flags & HAM_OVERWRITE) && (flags & HAM_DUPLICATE)) {
+ ham_trace(("cannot combine HAM_OVERWRITE and HAM_DUPLICATE"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (db->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed in combination with "
+ "transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL) && (record->size <= sizeof(uint64_t))) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record->size "
+ "<= 8"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (record->partial_size + record->partial_offset > record->size)) {
+ ham_trace(("partial offset+size is greater than the total "
+ "record size"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DUPLICATE)
+ && !(db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS)) {
+ ham_trace(("database does not support duplicate keys "
+ "(see HAM_ENABLE_DUPLICATE_KEYS)"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DUPLICATE_INSERT_AFTER)
+ || (flags & HAM_DUPLICATE_INSERT_BEFORE)
+ || (flags & HAM_DUPLICATE_INSERT_LAST)
+ || (flags & HAM_DUPLICATE_INSERT_FIRST)) {
+ ham_trace(("function does not support flags HAM_DUPLICATE_INSERT_*; "
+ "see ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ if (!__prepare_key(key) || !__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ /* allocate temp. storage for a recno key */
+ if ((db->get_flags() & HAM_RECORD_NUMBER32)
+ || (db->get_flags() & HAM_RECORD_NUMBER64)) {
+ if (flags & HAM_OVERWRITE) {
+ if (!key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ else {
+ if (key->flags & HAM_KEY_USER_ALLOC) {
+ if (!key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ else {
+ if (key->data || key->size) {
+ ham_trace(("key->size must be 0, key->data must be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ }
+ }
+
+ return (db->set_error(db->insert(0, txn, key, record, flags)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_erase(ham_db_t *hdb, ham_txn_t *htxn, ham_key_t *key, uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+ Environment *env;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ env = db->get_env();
+
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(env->mutex());
+
+ if (!key) {
+ ham_trace(("parameter 'key' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_PREPEND) {
+ ham_trace(("flag HAM_HINT_PREPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_APPEND) {
+ ham_trace(("flag HAM_HINT_APPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (db->get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot erase from a read-only database"));
+ return (HAM_WRITE_PROTECTED);
+ }
+
+ if (!__prepare_key(key))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ return (db->set_error(db->erase(0, txn, key, flags)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_check_integrity(ham_db_t *hdb, uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if (flags && flags != HAM_PRINT_GRAPH) {
+ ham_trace(("unknown flag 0x%u", flags));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ return (db->set_error(db->check_integrity(flags)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_db_close(ham_db_t *hdb, uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ if ((flags & HAM_TXN_AUTO_ABORT) && (flags & HAM_TXN_AUTO_COMMIT)) {
+ ham_trace(("invalid combination of flags: HAM_TXN_AUTO_ABORT + "
+ "HAM_TXN_AUTO_COMMIT"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ Environment *env = db->get_env();
+
+ /* it's ok to close an uninitialized Database */
+ if (!env) {
+ delete db;
+ return (0);
+ }
+
+ return (env->close_db(db, flags));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_create(ham_cursor_t **hcursor, ham_db_t *hdb, ham_txn_t *htxn,
+ uint32_t flags)
+{
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+ Environment *env;
+ Cursor **cursor = 0;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ cursor = (Cursor **)hcursor;
+ env = db->get_env();
+
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(env->mutex());
+
+ return (db->set_error(db->cursor_create(cursor, txn, flags)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_clone(ham_cursor_t *hsrc, ham_cursor_t **hdest)
+{
+ Database *db;
+
+ if (!hsrc) {
+ ham_trace(("parameter 'src' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!hdest) {
+ ham_trace(("parameter 'dest' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *src, **dest;
+ src = (Cursor *)hsrc;
+ dest = (Cursor **)hdest;
+
+ db = src->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ return (db->set_error(db->cursor_clone(dest, src)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_overwrite(ham_cursor_t *hcursor, ham_record_t *record,
+ uint32_t flags)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (flags) {
+ ham_trace(("function does not support a non-zero flags value; "
+ "see ham_cursor_insert for an alternative then"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!record) {
+ ham_trace(("parameter 'record' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+ if (db->get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot overwrite in a read-only database"));
+ return (db->set_error(HAM_WRITE_PROTECTED));
+ }
+
+ return (db->set_error(db->cursor_overwrite(cursor, record, flags)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_move(ham_cursor_t *hcursor, ham_key_t *key,
+ ham_record_t *record, uint32_t flags)
+{
+ Database *db;
+ Environment *env;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if ((flags & HAM_ONLY_DUPLICATES) && (flags & HAM_SKIP_DUPLICATES)) {
+ ham_trace(("combination of HAM_ONLY_DUPLICATES and "
+ "HAM_SKIP_DUPLICATES not allowed"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ env = db->get_env();
+
+ if ((flags & HAM_DIRECT_ACCESS)
+ && !(env->get_flags() & HAM_IN_MEMORY)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is only allowed in "
+ "In-Memory Databases"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DIRECT_ACCESS)
+ && (env->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is not allowed in "
+ "combination with Transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (db->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed in combination with "
+ "transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ if (key && !__prepare_key(key))
+ return (db->set_error(HAM_INV_PARAMETER));
+ if (record && !__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ return (db->set_error(db->cursor_move(cursor, key, record, flags)));
+}
+
+HAM_EXPORT ham_status_t HAM_CALLCONV
+ham_cursor_find(ham_cursor_t *hcursor, ham_key_t *key, ham_record_t *record,
+ uint32_t flags)
+{
+ Database *db;
+ Environment *env;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+ env = db->get_env();
+
+ ScopedLock lock;
+ if (!(flags & HAM_DONT_LOCK))
+ lock = ScopedLock(env->mutex());
+
+ if (!key) {
+ ham_trace(("parameter 'key' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DIRECT_ACCESS)
+ && !(env->get_flags() & HAM_IN_MEMORY)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is only allowed in "
+ "In-Memory Databases"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DIRECT_ACCESS)
+ && (env->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_DIRECT_ACCESS is not allowed in "
+ "combination with Transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_PREPEND) {
+ ham_trace(("flag HAM_HINT_PREPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_APPEND) {
+ ham_trace(("flag HAM_HINT_APPEND is only allowed in "
+ "ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (db->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed in combination with "
+ "transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ if (key && !__prepare_key(key))
+ return (db->set_error(HAM_INV_PARAMETER));
+ if (record && !__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ return (db->set_error(db->find(cursor, cursor->get_txn(),
+ key, record, flags)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_insert(ham_cursor_t *hcursor, ham_key_t *key, ham_record_t *record,
+ uint32_t flags)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (!key) {
+ ham_trace(("parameter 'key' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!record) {
+ ham_trace(("parameter 'record' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags&HAM_HINT_APPEND) && (flags&HAM_HINT_PREPEND)) {
+ ham_trace(("flags HAM_HINT_APPEND and HAM_HINT_PREPEND "
+ "are mutually exclusive"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (!__prepare_key(key) || !__prepare_record(record))
+ return (db->set_error(HAM_INV_PARAMETER));
+
+ if (db->get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot insert to a read-only database"));
+ return (db->set_error(HAM_WRITE_PROTECTED));
+ }
+ if ((flags & HAM_DUPLICATE) && (flags & HAM_OVERWRITE)) {
+ ham_trace(("cannot combine HAM_DUPLICATE and HAM_OVERWRITE"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_DUPLICATE)
+ && !(db->get_flags() & HAM_ENABLE_DUPLICATE_KEYS)) {
+ ham_trace(("database does not support duplicate keys "
+ "(see HAM_ENABLE_DUPLICATE_KEYS)"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL)
+ && (db->get_flags() & HAM_ENABLE_TRANSACTIONS)) {
+ ham_trace(("flag HAM_PARTIAL is not allowed in combination with "
+ "transactions"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags&HAM_PARTIAL)
+ && (record->partial_size + record->partial_offset > record->size)) {
+ ham_trace(("partial offset+size is greater than the total "
+ "record size"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if ((flags & HAM_PARTIAL) && (record->size <= sizeof(uint64_t))) {
+ ham_trace(("flag HAM_PARTIAL is not allowed if record->size <= 8"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ /*
+ * set flag HAM_DUPLICATE if one of DUPLICATE_INSERT* is set, but do
+ * not allow these flags if duplicate sorting is enabled
+ */
+ if (flags & (HAM_DUPLICATE_INSERT_AFTER
+ | HAM_DUPLICATE_INSERT_BEFORE
+ | HAM_DUPLICATE_INSERT_LAST
+ | HAM_DUPLICATE_INSERT_FIRST)) {
+ flags |= HAM_DUPLICATE;
+ }
+
+ /* allocate temp. storage for a recno key */
+ if ((db->get_flags() & HAM_RECORD_NUMBER32)
+ || (db->get_flags() & HAM_RECORD_NUMBER64)) {
+ if (flags & HAM_OVERWRITE) {
+ if (!key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ else {
+ if (key->flags & HAM_KEY_USER_ALLOC) {
+ if (!key->data) {
+ ham_trace(("key->data must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ else {
+ if (key->data || key->size) {
+ ham_trace(("key->size must be 0, key->data must be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ }
+ }
+ }
+
+ return (db->set_error(db->insert(cursor, cursor->get_txn(), key,
+ record, flags)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_erase(ham_cursor_t *hcursor, uint32_t flags)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (db->get_flags() & HAM_READ_ONLY) {
+ ham_trace(("cannot erase from a read-only database"));
+ return (db->set_error(HAM_WRITE_PROTECTED));
+ }
+ if (flags & HAM_HINT_PREPEND) {
+ ham_trace(("flags HAM_HINT_PREPEND only allowed in ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+ if (flags & HAM_HINT_APPEND) {
+ ham_trace(("flags HAM_HINT_APPEND only allowed in ham_cursor_insert"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ return (db->set_error(db->erase(cursor, cursor->get_txn(), 0, flags)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_get_duplicate_count(ham_cursor_t *hcursor, uint32_t *count,
+ uint32_t flags)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (!count) {
+ ham_trace(("parameter 'count' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ return (db->set_error(db->cursor_get_record_count(cursor, flags, count)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_get_duplicate_position(ham_cursor_t *hcursor, uint32_t *position)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (!position) {
+ ham_trace(("parameter 'position' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ return (db->set_error(db->cursor_get_duplicate_position(cursor, position)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_get_record_size(ham_cursor_t *hcursor, uint64_t *size)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ if (!size) {
+ ham_trace(("parameter 'size' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ return (db->set_error(db->cursor_get_record_size(cursor, size)));
+}
+
+ham_status_t HAM_CALLCONV
+ham_cursor_close(ham_cursor_t *hcursor)
+{
+ Database *db;
+
+ if (!hcursor) {
+ ham_trace(("parameter 'cursor' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Cursor *cursor = (Cursor *)hcursor;
+
+ db = cursor->get_db();
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ return (db->set_error(db->cursor_close(cursor)));
+}
+
+void HAM_CALLCONV
+ham_set_context_data(ham_db_t *hdb, void *data)
+{
+ Database *db = (Database *)hdb;
+
+ if (!db)
+ return;
+
+ ScopedLock lock(db->get_env()->mutex());
+ db->set_context_data(data);
+}
+
+void * HAM_CALLCONV
+ham_get_context_data(ham_db_t *hdb, ham_bool_t dont_lock)
+{
+ Database *db = (Database *)hdb;
+ if (!db)
+ return (0);
+
+ if (dont_lock)
+ return (db->get_context_data());
+
+ ScopedLock lock(db->get_env()->mutex());
+ return (db->get_context_data());
+}
+
+ham_db_t * HAM_CALLCONV
+ham_cursor_get_database(ham_cursor_t *hcursor)
+{
+ if (hcursor) {
+ Cursor *cursor = (Cursor *)hcursor;
+ return ((ham_db_t *)cursor->get_db());
+ }
+ return (0);
+}
+
+ham_env_t * HAM_CALLCONV
+ham_db_get_env(ham_db_t *hdb)
+{
+ Database *db = (Database *)hdb;
+ if (!db)
+ return (0);
+
+ return ((ham_env_t *)db->get_env());
+}
+
+ham_status_t HAM_CALLCONV
+ham_db_get_key_count(ham_db_t *hdb, ham_txn_t *htxn, uint32_t flags,
+ uint64_t *keycount)
+{
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+
+ if (!db) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (flags & ~(HAM_SKIP_DUPLICATES)) {
+ ham_trace(("parameter 'flag' contains unsupported flag bits: %08x",
+ flags & (~HAM_SKIP_DUPLICATES)));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!keycount) {
+ ham_trace(("parameter 'keycount' must not be NULL"));
+ return (db->set_error(HAM_INV_PARAMETER));
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+
+ return (db->set_error(db->count(txn, (flags & HAM_SKIP_DUPLICATES) != 0,
+ keycount)));
+}
+
+void HAM_CALLCONV
+ham_set_errhandler(ham_errhandler_fun f)
+{
+ if (f)
+ hamsterdb::Globals::ms_error_handler = f;
+ else
+ hamsterdb::Globals::ms_error_handler = hamsterdb::default_errhandler;
+}
+
+ham_status_t HAM_CALLCONV
+ham_env_get_metrics(ham_env_t *henv, ham_env_metrics_t *metrics)
+{
+ Environment *env = (Environment *)henv;
+ if (!env) {
+ ham_trace(("parameter 'env' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!metrics) {
+ ham_trace(("parameter 'metrics' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ memset(metrics, 0, sizeof(ham_env_metrics_t));
+ metrics->version = HAM_METRICS_VERSION;
+
+ // fill in memory metrics
+ Memory::get_global_metrics(metrics);
+ // ... and everything else
+ return (env->fill_metrics(metrics));
+}
+
+ham_bool_t HAM_CALLCONV
+ham_is_debug()
+{
+#ifdef HAM_DEBUG
+ return (HAM_TRUE);
+#else
+ return (HAM_FALSE);
+#endif
+}
+
+ham_bool_t HAM_CALLCONV
+ham_is_pro()
+{
+ return (HAM_FALSE);
+}
+
+uint32_t HAM_CALLCONV
+ham_is_pro_evaluation()
+{
+ return (0);
+}
diff --git a/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hola.cc b/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hola.cc
new file mode 100644
index 0000000000..a5a56a1814
--- /dev/null
+++ b/plugins/Dbx_kv/src/hamsterdb/src/5hamsterdb/hola.cc
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2005-2015 Christoph Rupp (chris@crupp.de).
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "0root/root.h"
+
+#include "ham/hamsterdb_ola.h"
+
+// Always verify that a file of level N does not include headers > N!
+#include "1base/error.h"
+#include "3btree/btree_visitor.h"
+#include "4db/db.h"
+#include "4db/db_local.h"
+
+#ifndef HAM_ROOT_H
+# error "root.h was not included"
+#endif
+
+using namespace hamsterdb;
+
+ham_status_t HAM_CALLCONV
+hola_count(ham_db_t *hdb, ham_txn_t *htxn, hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+
+ result->type = HAM_TYPE_UINT64;
+ result->u.result_u64 = 0;
+
+ ScopedLock lock(db->get_env()->mutex());
+ return (db->set_error(db->count(txn, false, &result->u.result_u64)));
+}
+
+//
+// A ScanVisitor for hola_count_if
+//
+template<typename PodType>
+struct CountIfScanVisitor : public ScanVisitor {
+ CountIfScanVisitor(hola_bool_predicate_t *pred)
+ : m_count(0), m_pred(pred) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ if (m_pred->predicate_func(key_data, key_size, m_pred->context))
+ m_count++;
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ const PodType *p = (const PodType *)key_array;
+ const PodType *end = &p[key_count];
+ for (; p < end; p++) {
+ if (m_pred->predicate_func(p, sizeof(PodType), m_pred->context))
+ m_count++;
+ }
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ memcpy(&result->u.result_u64, &m_count, sizeof(uint64_t));
+ }
+
+ // The counter
+ uint64_t m_count;
+
+ // The user's predicate
+ hola_bool_predicate_t *m_pred;
+};
+
+//
+// A ScanVisitor for hola_count_if on binary keys
+//
+struct CountIfScanVisitorBinary : public ScanVisitor {
+ CountIfScanVisitorBinary(size_t key_size, hola_bool_predicate_t *pred)
+ : m_count(0), m_key_size(key_size), m_pred(pred) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ if (m_pred->predicate_func(key_data, key_size, m_pred->context))
+ m_count++;
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ assert(m_key_size != HAM_KEY_SIZE_UNLIMITED);
+ const uint8_t *p = (const uint8_t *)key_array;
+ const uint8_t *end = &p[key_count * m_key_size];
+ for (; p < end; p += m_key_size) {
+ if (m_pred->predicate_func(p, m_key_size, m_pred->context))
+ m_count++;
+ }
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ memcpy(&result->u.result_u64, &m_count, sizeof(uint64_t));
+ }
+
+ // The counter
+ uint64_t m_count;
+
+ // The key size
+ uint16_t m_key_size;
+
+ // The user's predicate
+ hola_bool_predicate_t *m_pred;
+};
+
+ham_status_t HAM_CALLCONV
+hola_count_if(ham_db_t *hdb, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!pred) {
+ ham_trace(("parameter 'pred' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+ result->type = HAM_TYPE_UINT64;
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ visitor.reset(new CountIfScanVisitor<uint8_t>(pred));
+ break;
+ case HAM_TYPE_UINT16:
+ visitor.reset(new CountIfScanVisitor<uint16_t>(pred));
+ break;
+ case HAM_TYPE_UINT32:
+ visitor.reset(new CountIfScanVisitor<uint32_t>(pred));
+ break;
+ case HAM_TYPE_UINT64:
+ visitor.reset(new CountIfScanVisitor<uint64_t>(pred));
+ break;
+ case HAM_TYPE_REAL32:
+ visitor.reset(new CountIfScanVisitor<float>(pred));
+ break;
+ case HAM_TYPE_REAL64:
+ visitor.reset(new CountIfScanVisitor<double>(pred));
+ break;
+ case HAM_TYPE_BINARY:
+ visitor.reset(new CountIfScanVisitorBinary(db->config().key_size,
+ pred));
+ break;
+ default:
+ ham_assert(!"shouldn't be here");
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), false);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}
+
+ham_status_t HAM_CALLCONV
+hola_count_distinct(ham_db_t *hdb, ham_txn_t *htxn, hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ Database *db = (Database *)hdb;
+ Transaction *txn = (Transaction *)htxn;
+
+ result->type = HAM_TYPE_UINT64;
+ result->u.result_u64 = 0;
+
+ ScopedLock lock(db->get_env()->mutex());
+ return (db->set_error(db->count(txn, true, &result->u.result_u64)));
+}
+
+ham_status_t HAM_CALLCONV
+hola_count_distinct_if(ham_db_t *hdb, ham_txn_t *txn,
+ hola_bool_predicate_t *pred, hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!pred) {
+ ham_trace(("parameter 'pred' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+ result->type = HAM_TYPE_UINT64;
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ visitor.reset(new CountIfScanVisitor<uint8_t>(pred));
+ break;
+ case HAM_TYPE_UINT16:
+ visitor.reset(new CountIfScanVisitor<uint16_t>(pred));
+ break;
+ case HAM_TYPE_UINT32:
+ visitor.reset(new CountIfScanVisitor<uint32_t>(pred));
+ break;
+ case HAM_TYPE_UINT64:
+ visitor.reset(new CountIfScanVisitor<uint64_t>(pred));
+ break;
+ case HAM_TYPE_REAL32:
+ visitor.reset(new CountIfScanVisitor<float>(pred));
+ break;
+ case HAM_TYPE_REAL64:
+ visitor.reset(new CountIfScanVisitor<double>(pred));
+ break;
+ case HAM_TYPE_BINARY:
+ visitor.reset(new CountIfScanVisitorBinary(db->config().key_size,
+ pred));
+ break;
+ default:
+ ham_assert(!"shouldn't be here");
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), true);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}
+
+//
+// A ScanVisitor for hola_average
+//
+template<typename PodType, typename ResultType>
+struct AverageScanVisitor : public ScanVisitor {
+ AverageScanVisitor()
+ : m_sum(0), m_count(0) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ ham_assert(key_size == sizeof(PodType));
+
+ m_sum += *(const PodType *)key_data * duplicate_count;
+ m_count++;
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ const PodType *p = (const PodType *)key_array;
+ const PodType *end = &p[key_count];
+ for (; p < end; p++)
+ m_sum += *p;
+ m_count += key_count;
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ ResultType res = m_sum / m_count;
+ memcpy(&result->u.result_u64, &res, sizeof(uint64_t));
+ }
+
+ // The sum of all keys
+ ResultType m_sum;
+
+ // For counting the keys
+ uint64_t m_count;
+};
+
+ham_status_t HAM_CALLCONV
+hola_average(ham_db_t *hdb, ham_txn_t *txn, hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageScanVisitor<uint8_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT16:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageScanVisitor<uint16_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT32:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageScanVisitor<uint32_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT64:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageScanVisitor<uint64_t, uint64_t>());
+ break;
+ case HAM_TYPE_REAL32:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new AverageScanVisitor<float, double>());
+ break;
+ case HAM_TYPE_REAL64:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new AverageScanVisitor<double, double>());
+ break;
+ default:
+ ham_trace(("hola_avg* can only be applied to numerical data"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), false);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}
+
+//
+// A ScanVisitor for hola_average_if
+//
+template<typename PodType, typename ResultType>
+struct AverageIfScanVisitor : public ScanVisitor {
+ AverageIfScanVisitor(hola_bool_predicate_t *pred)
+ : m_sum(0), m_count(0), m_pred(pred) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ ham_assert(key_size == sizeof(PodType));
+
+ if (m_pred->predicate_func(key_data, key_size, m_pred->context)) {
+ m_sum += *(const PodType *)key_data * duplicate_count;
+ m_count++;
+ }
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ const PodType *p = (const PodType *)key_array;
+ const PodType *end = &p[key_count];
+ for (; p < end; p++) {
+ if (m_pred->predicate_func(p, sizeof(PodType), m_pred->context)) {
+ m_sum += *p;
+ m_count++;
+ }
+ }
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ ResultType res = m_sum / m_count;
+ memcpy(&result->u.result_u64, &res, sizeof(uint64_t));
+ }
+
+ // The sum of all keys
+ ResultType m_sum;
+
+ // For counting the keys
+ uint64_t m_count;
+
+ // The user's predicate function
+ hola_bool_predicate_t *m_pred;
+};
+
+ham_status_t HAM_CALLCONV
+hola_average_if(ham_db_t *hdb, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!pred) {
+ ham_trace(("parameter 'pred' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageIfScanVisitor<uint8_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT16:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageIfScanVisitor<uint16_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT32:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageIfScanVisitor<uint32_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT64:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new AverageIfScanVisitor<uint64_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_REAL32:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new AverageIfScanVisitor<float, double>(pred));
+ break;
+ case HAM_TYPE_REAL64:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new AverageIfScanVisitor<double, double>(pred));
+ break;
+ default:
+ ham_trace(("hola_avg* can only be applied to numerical data"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), false);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}
+
+//
+// A ScanVisitor for hola_sum
+//
+template<typename PodType, typename ResultType>
+struct SumScanVisitor : public ScanVisitor {
+ SumScanVisitor()
+ : m_sum(0) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ ham_assert(key_size == sizeof(PodType));
+ m_sum += *(const PodType *)key_data * duplicate_count;
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ const PodType *p = (const PodType *)key_array;
+ const PodType *end = &p[key_count];
+ const int kMax = 8;
+ ResultType sums[kMax] = {0};
+ for (; p + kMax < end; p += kMax) {
+#if defined __GNUC__
+ __builtin_prefetch(((char *)p) + kMax * sizeof(PodType));
+#endif
+ sums[0] += p[0];
+ sums[1] += p[1];
+ sums[2] += p[2];
+ sums[3] += p[3];
+ sums[4] += p[4];
+ sums[5] += p[5];
+ sums[6] += p[6];
+ sums[7] += p[7];
+ }
+ for (; p < end; p++)
+ m_sum += *p;
+ for (int i = 0; i < kMax; i++)
+ m_sum += sums[i];
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ memcpy(&result->u.result_u64, &m_sum, sizeof(uint64_t));
+ }
+
+ // The sum of all keys
+ ResultType m_sum;
+};
+
+ham_status_t HAM_CALLCONV
+hola_sum(ham_db_t *hdb, ham_txn_t *txn, hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'hdb' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumScanVisitor<uint8_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT16:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumScanVisitor<uint16_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT32:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumScanVisitor<uint32_t, uint64_t>());
+ break;
+ case HAM_TYPE_UINT64:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumScanVisitor<uint64_t, uint64_t>());
+ break;
+ case HAM_TYPE_REAL32:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new SumScanVisitor<float, double>());
+ break;
+ case HAM_TYPE_REAL64:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new SumScanVisitor<double, double>());
+ break;
+ default:
+ ham_trace(("hola_sum* can only be applied to numerical data"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), false);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}
+
+//
+// A ScanVisitor for hola_sum_if
+//
+template<typename PodType, typename ResultType>
+struct SumIfScanVisitor : public ScanVisitor {
+ SumIfScanVisitor(hola_bool_predicate_t *pred)
+ : m_sum(0), m_pred(pred) {
+ }
+
+ // Operates on a single key
+ virtual void operator()(const void *key_data, uint16_t key_size,
+ size_t duplicate_count) {
+ ham_assert(key_size == sizeof(PodType));
+
+ if (m_pred->predicate_func(key_data, key_size, m_pred->context))
+ m_sum += *(const PodType *)key_data * duplicate_count;
+ }
+
+ // Operates on an array of keys
+ virtual void operator()(const void *key_array, size_t key_count) {
+ const PodType *p = (const PodType *)key_array;
+ const PodType *end = &p[key_count];
+ for (; p < end; p++) {
+ if (m_pred->predicate_func(p, sizeof(PodType), m_pred->context))
+ m_sum += *p;
+ }
+ }
+
+ // Assigns the result to |result|
+ virtual void assign_result(hola_result_t *result) {
+ memcpy(&result->u.result_u64, &m_sum, sizeof(uint64_t));
+ }
+
+ // The sum of all keys
+ ResultType m_sum;
+
+ // The user's predicate function
+ hola_bool_predicate_t *m_pred;
+};
+
+ham_status_t HAM_CALLCONV
+hola_sum_if(ham_db_t *hdb, ham_txn_t *txn, hola_bool_predicate_t *pred,
+ hola_result_t *result)
+{
+ if (!hdb) {
+ ham_trace(("parameter 'db' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!pred) {
+ ham_trace(("parameter 'pred' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+ if (!result) {
+ ham_trace(("parameter 'result' must not be NULL"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ // Remote databases are not yet supported
+ LocalDatabase *db = dynamic_cast<LocalDatabase *>((Database *)hdb);
+ if (!db) {
+ ham_trace(("hola_* functions are not yet supported for remote databases"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ std::auto_ptr<ScanVisitor> visitor;
+ result->u.result_u64 = 0;
+
+ switch (db->config().key_type) {
+ case HAM_TYPE_UINT8:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumIfScanVisitor<uint8_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT16:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumIfScanVisitor<uint16_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT32:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumIfScanVisitor<uint32_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_UINT64:
+ result->type = HAM_TYPE_UINT64;
+ visitor.reset(new SumIfScanVisitor<uint64_t, uint64_t>(pred));
+ break;
+ case HAM_TYPE_REAL32:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new SumIfScanVisitor<float, double>(pred));
+ break;
+ case HAM_TYPE_REAL64:
+ result->type = HAM_TYPE_REAL64;
+ visitor.reset(new SumIfScanVisitor<double, double>(pred));
+ break;
+ default:
+ ham_trace(("hola_sum* can only be applied to numerical data"));
+ return (HAM_INV_PARAMETER);
+ }
+
+ ScopedLock lock(db->get_env()->mutex());
+ ham_status_t st = db->scan((Transaction *)txn, visitor.get(), false);
+ if (st == 0)
+ visitor->assign_result(result);
+ return (db->set_error(st));
+}