diff options
author | George Hazan <george.hazan@gmail.com> | 2015-01-13 17:36:14 +0000 |
---|---|---|
committer | George Hazan <george.hazan@gmail.com> | 2015-01-13 17:36:14 +0000 |
commit | 3ed0cd54cf2db670166af44296694a1c9679ec03 (patch) | |
tree | 15dd89ebc75ecc3ff300e1d3ebdd27fc275583be /plugins/Dbx_mdb/src/lmdb/mdb.c | |
parent | 88722c0734ae6ce9b91ba397b89c4b73ed6f064d (diff) |
- fix for ssize_t;
- various optimizations
git-svn-id: http://svn.miranda-ng.org/main/trunk@11848 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/Dbx_mdb/src/lmdb/mdb.c')
-rw-r--r-- | plugins/Dbx_mdb/src/lmdb/mdb.c | 142 |
1 files changed, 93 insertions, 49 deletions
diff --git a/plugins/Dbx_mdb/src/lmdb/mdb.c b/plugins/Dbx_mdb/src/lmdb/mdb.c index 996683cd08..10339c8388 100644 --- a/plugins/Dbx_mdb/src/lmdb/mdb.c +++ b/plugins/Dbx_mdb/src/lmdb/mdb.c @@ -42,6 +42,12 @@ * as int64 which is wrong. MSVC doesn't define it at all, so just * don't use it. */ +#ifdef _WIN64
+typedef signed __int64 ssize_t;
+#else
+typedef signed int ssize_t;
+#endif
+ #define MDB_PID_T int #define MDB_THR_T DWORD #include <sys/types.h> @@ -79,6 +85,14 @@ extern int cacheflush(char *addr, int nbytes, int cache); #define CACHEFLUSH(addr, bytes, cache) #endif +#if defined(__linux) && !defined(MDB_FDATASYNC_WORKS) +/** fdatasync is broken on ext3/ext4fs on older kernels, see + * description in #mdb_env_open2 comments. You can safely + * define MDB_FDATASYNC_WORKS if this code will only be run + * on kernels 3.6 and newer. + */ +//#define BROKEN_FDATASYNC +#endif #include <errno.h> #include <limits.h> @@ -367,7 +381,6 @@ static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc); */ #ifndef MDB_FDATASYNC # define MDB_FDATASYNC fdatasync -# define HAVE_FDATASYNC 1 #endif #ifndef MDB_MSYNC @@ -1138,6 +1151,8 @@ struct MDB_env { #define MDB_ENV_ACTIVE 0x20000000U /** me_txkey is set */ #define MDB_ENV_TXKEY 0x10000000U + /** fdatasync is unreliable */ +#define MDB_FSYNCONLY 0x08000000U uint32_t me_flags; /**< @ref mdb_env */ unsigned int me_psize; /**< DB page size, inited from me_os_psize */ unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */ @@ -1154,7 +1169,7 @@ struct MDB_env { MDB_txn *me_txn; /**< current write transaction */ MDB_txn *me_txn0; /**< prealloc'd write transaction */ size_t me_mapsize; /**< size of the data memory map */ - size_t me_size; /**< current file size */ + off_t me_size; /**< current file size */ pgno_t me_maxpg; /**< me_mapsize / me_psize */ MDB_dbx *me_dbxs; /**< array of static DB info */ uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ @@ -1203,7 +1218,7 @@ typedef struct MDB_ntxn { #endif /** max bytes to write in one call */ -#define MAX_WRITE (0x80000000U >> (sizeof(size_t) == 4)) +#define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4)) /** Check \b txn and \b dbi arguments to a function */ #define TXN_DBI_EXIST(txn, dbi) \ @@ -2341,19 +2356,12 @@ fail: return rc; } -/* internal env_sync flags: */ -#define FORCE 1 /* as before, force a flush */ -#define FGREW 0x8000 /* file has grown, do a full fsync instead of just - fdatasync. We shouldn't have to do this, according to the POSIX spec. - But common Linux FSs violate the spec and won't sync required metadata - correctly when the file grows. This only makes a difference if the - platform actually distinguishes fdatasync from fsync. - http://www.openldap.org/lists/openldap-devel/201411/msg00000.html */ - -static int -mdb_env_sync0(MDB_env *env, int flag) +int +mdb_env_sync(MDB_env *env, int force) { - int rc = 0, force = flag & FORCE; + int rc = 0; + if (env->me_flags & MDB_RDONLY) + return EACCES; if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { if (env->me_flags & MDB_WRITEMAP) { int flags = ((env->me_flags & MDB_MAPASYNC) && !force) @@ -2365,9 +2373,9 @@ mdb_env_sync0(MDB_env *env, int flag) rc = ErrCode(); #endif } else { -#ifdef HAVE_FDATASYNC - if (flag & FGREW) { - if (fsync(env->me_fd)) /* Avoid ext-fs bugs, do full sync */ +#ifdef BROKEN_FDATASYNC + if (env->me_flags & MDB_FSYNCONLY) { + if (fsync(env->me_fd)) rc = ErrCode(); } else #endif @@ -2378,12 +2386,6 @@ mdb_env_sync0(MDB_env *env, int flag) return rc; } -int -mdb_env_sync(MDB_env *env, int force) -{ - return mdb_env_sync0(env, force != 0); -} - /** Back up parent txn's cursors, then grab the originals for tracking */ static int mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) @@ -2939,7 +2941,7 @@ mdb_freelist_save(MDB_txn *txn) int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; txnid_t pglast = 0, head_id = 0; pgno_t freecnt = 0, *free_pgs, *mop; - size_t head_room = 0, total_room = 0, mop_len, clean_limit; + ssize_t head_room = 0, total_room = 0, mop_len, clean_limit; mdb_cursor_init(&mc, txn, FREE_DBI, NULL); @@ -2971,7 +2973,7 @@ mdb_freelist_save(MDB_txn *txn) /* Come back here after each Put() in case freelist changed */ MDB_val key, data; pgno_t *pgs; - size_t j; + ssize_t j; /* If using records from freeDB which we have not yet * deleted, delete them and any we reserved for me_pghead. @@ -3095,7 +3097,7 @@ mdb_freelist_save(MDB_txn *txn) rc = mdb_cursor_first(&mc, &key, &data); for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { txnid_t id = *(txnid_t *)key.mv_data; - size_t len = (size_t)(data.mv_size / sizeof(MDB_ID)) - 1; + ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; MDB_ID save; mdb_tassert(txn, len >= 0 && id <= env->me_pglast); @@ -3135,7 +3137,7 @@ mdb_page_flush(MDB_txn *txn, int keep) OVERLAPPED ov; #else struct iovec iov[MDB_COMMIT_PAGES]; - size_t wpos = 0, wsize = 0, wres; + ssize_t wpos = 0, wsize = 0, wres; size_t next_pos = 1; /* impossible pos, so pos != next_pos */ int n = 0; #endif @@ -3458,15 +3460,8 @@ mdb_txn_commit(MDB_txn *txn) mdb_audit(txn); #endif - i = 0; -#ifdef HAVE_FDATASYNC - if (txn->mt_next_pgno * env->me_psize > env->me_size) { - i |= FGREW; - env->me_size = txn->mt_next_pgno * env->me_psize; - } -#endif if ((rc = mdb_page_flush(txn, 0)) || - (rc = mdb_env_sync0(env, i)) || + (rc = mdb_env_sync(env, 0)) || (rc = mdb_env_write_meta(txn))) goto fail; @@ -3946,6 +3941,11 @@ mdb_fsize(HANDLE fd, size_t *size) return MDB_SUCCESS; } +#ifdef BROKEN_FDATASYNC +#include <sys/utsname.h> +#include <sys/vfs.h> +#endif + /** Further setup required for opening an LMDB environment */ static int ESECT @@ -3964,6 +3964,54 @@ mdb_env_open2(MDB_env *env) env->me_pidquery = PROCESS_QUERY_INFORMATION; #endif /* _WIN32 */ +#ifdef BROKEN_FDATASYNC + /* ext3/ext4 fdatasync is broken on some older Linux kernels. + * https://lkml.org/lkml/2012/9/3/83 + * Kernels after 3.6-rc6 are known good. + * https://lkml.org/lkml/2012/9/10/556 + * See if the DB is on ext3/ext4, then check for new enough kernel + * Kernels 2.6.32.60, 2.6.34.15, 3.2.30, and 3.5.4 are also known + * to be patched. + */ + { + struct statfs st; + fstatfs(env->me_fd, &st); + while (st.f_type == 0xEF53) { + struct utsname uts; + int i; + uname(&uts); + if (uts.release[0] < '3') { + if (!strncmp(uts.release, "2.6.32.", 7)) { + i = atoi(uts.release+7); + if (i >= 60) + break; /* 2.6.32.60 and newer is OK */ + } else if (!strncmp(uts.release, "2.6.34.", 7)) { + i = atoi(uts.release+7); + if (i >= 15) + break; /* 2.6.34.15 and newer is OK */ + } + } else if (uts.release[0] == '3') { + i = atoi(uts.release+2); + if (i > 5) + break; /* 3.6 and newer is OK */ + if (i == 5) { + i = atoi(uts.release+4); + if (i >= 4) + break; /* 3.5.4 and newer is OK */ + } else if (i == 2) { + i = atoi(uts.release+4); + if (i >= 30) + break; /* 3.2.30 and newer is OK */ + } + } else { /* 4.x and newer is OK */ + break; + } + env->me_flags |= MDB_FSYNCONLY; + break; + } + } +#endif + if ((i = mdb_env_read_header(env, &meta)) != 0) { if (i != ENOENT) return i; @@ -4007,10 +4055,6 @@ mdb_env_open2(MDB_env *env) newenv = 0; } - rc = mdb_fsize(env->me_fd, &env->me_size); - if (rc) - return rc; - rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); if (rc) return rc; @@ -4844,11 +4888,11 @@ static int mdb_cmp_memn(const MDB_val *a, const MDB_val *b) { int diff; - size_t len_diff; + ssize_t len_diff; unsigned int len; len = a->mv_size; - len_diff = (size_t) a->mv_size - (size_t) b->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; if (len_diff > 0) { len = b->mv_size; len_diff = 1; @@ -4863,14 +4907,14 @@ static int mdb_cmp_memnr(const MDB_val *a, const MDB_val *b) { const unsigned char *p1, *p2, *p1_lim; - size_t len_diff; + ssize_t len_diff; int diff; p1_lim = (const unsigned char *)a->mv_data; p1 = (const unsigned char *)a->mv_data + a->mv_size; p2 = (const unsigned char *)b->mv_data + b->mv_size; - len_diff = (size_t) a->mv_size - (size_t) b->mv_size; + len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size; if (len_diff > 0) { p1_lim += len_diff; len_diff = 1; @@ -6803,7 +6847,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, { unsigned int i; size_t node_size = NODESIZE; - size_t room; + ssize_t room; indx_t ofs; MDB_node *node; MDB_page *mp = mc->mc_pg[mc->mc_top]; @@ -6834,7 +6878,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, return MDB_SUCCESS; } - room = (size_t)SIZELEFT(mp) - (size_t)sizeof(indx_t); + room = (ssize_t)SIZELEFT(mp) - (ssize_t)sizeof(indx_t); if (key != NULL) node_size += key->mv_size; if (IS_LEAF(mp)) { @@ -6849,7 +6893,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page", data->mv_size, node_size+data->mv_size)); node_size = EVEN(node_size + sizeof(pgno_t)); - if ((size_t)node_size > room) + if ((ssize_t)node_size > room) goto full; if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp))) return rc; @@ -6861,7 +6905,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx, } } node_size = EVEN(node_size); - if ((size_t)node_size > room) + if ((ssize_t)node_size > room) goto full; update: @@ -8768,7 +8812,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd) DWORD len, w2; #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL) #else - size_t len; + ssize_t len; size_t w2; #define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0) #endif |