summaryrefslogtreecommitdiff
path: root/plugins/Dbx_mdb/src/mdbx/lck-windows.c
blob: 0582b28d784b43aef9c21f1c1ce02e87c7024e61 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
/*
 * Copyright 2015-2017 Leonid Yuriev <leo@yuriev.ru>
 * and other libmdbx authors: please see AUTHORS file.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted only as authorized by the OpenLDAP
 * Public License.
 *
 * A copy of this license is available in the file LICENSE in the
 * top-level directory of the distribution or, alternatively, at
 * <http://www.OpenLDAP.org/license.html>.
 */

#include "./bits.h"

/* PREAMBLE FOR WINDOWS:
 *
 * We are not concerned for performance here.
 * If you are running Windows a performance could NOT be the goal.
 * Otherwise please use Linux.
 *
 * Regards,
 * LY
 */

/*----------------------------------------------------------------------------*/
/* rthc */

static CRITICAL_SECTION rthc_critical_section;

static void NTAPI tls_callback(PVOID module, DWORD reason, PVOID reserved) {
  (void)module;
  (void)reserved;
  switch (reason) {
  case DLL_PROCESS_ATTACH:
    InitializeCriticalSection(&rthc_critical_section);
    break;
  case DLL_PROCESS_DETACH:
    DeleteCriticalSection(&rthc_critical_section);
    break;

  case DLL_THREAD_ATTACH:
    break;
  case DLL_THREAD_DETACH:
    mdbx_rthc_cleanup();
    break;
  }
}

void mdbx_rthc_lock(void) { EnterCriticalSection(&rthc_critical_section); }

void mdbx_rthc_unlock(void) { LeaveCriticalSection(&rthc_critical_section); }

/* *INDENT-OFF* */
/* clang-format off */
#if defined(_MSC_VER)
#  pragma const_seg(push)
#  pragma data_seg(push)

#  ifdef _WIN64
     /* kick a linker to create the TLS directory if not already done */
#    pragma comment(linker, "/INCLUDE:_tls_used")
     /* Force some symbol references. */
#    pragma comment(linker, "/INCLUDE:mdbx_tls_callback")
     /* specific const-segment for WIN64 */
#    pragma const_seg(".CRT$XLB")
     const
#  else
     /* kick a linker to create the TLS directory if not already done */
#    pragma comment(linker, "/INCLUDE:__tls_used")
     /* Force some symbol references. */
#    pragma comment(linker, "/INCLUDE:_mdbx_tls_callback")
     /* specific data-segment for WIN32 */
#    pragma data_seg(".CRT$XLB")
#  endif

   PIMAGE_TLS_CALLBACK mdbx_tls_callback = tls_callback;
#  pragma data_seg(pop)
#  pragma const_seg(pop)

#elif defined(__GNUC__)
#  ifdef _WIN64
     const
#  endif
   PIMAGE_TLS_CALLBACK mdbx_tls_callback __attribute__((section(".CRT$XLB"), used))
     = tls_callback;
#else
#  error FIXME
#endif
/* *INDENT-ON* */
/* clang-format on */

/*----------------------------------------------------------------------------*/

#define LCK_SHARED 0
#define LCK_EXCLUSIVE LOCKFILE_EXCLUSIVE_LOCK
#define LCK_WAITFOR 0
#define LCK_DONTWAIT LOCKFILE_FAIL_IMMEDIATELY

static __inline BOOL flock(mdbx_filehandle_t fd, DWORD flags, uint64_t offset,
                           size_t bytes) {
  OVERLAPPED ov;
  ov.hEvent = 0;
  ov.Offset = (DWORD)offset;
  ov.OffsetHigh = HIGH_DWORD(offset);
  return LockFileEx(fd, flags, 0, (DWORD)bytes, HIGH_DWORD(bytes), &ov);
}

static __inline BOOL funlock(mdbx_filehandle_t fd, uint64_t offset,
                             size_t bytes) {
  return UnlockFile(fd, (DWORD)offset, HIGH_DWORD(offset), (DWORD)bytes,
                    HIGH_DWORD(bytes));
}

/*----------------------------------------------------------------------------*/
/* global `write` lock for write-txt processing,
 * exclusive locking both meta-pages) */

#define LCK_MAXLEN (1u + (size_t)(MAXSSIZE_T))
#define LCK_META_OFFSET 0
#define LCK_META_LEN 0x10000u
#define LCK_BODY_OFFSET LCK_META_LEN
#define LCK_BODY_LEN (LCK_MAXLEN - LCK_BODY_OFFSET + 1u)
#define LCK_META LCK_META_OFFSET, LCK_META_LEN
#define LCK_BODY LCK_BODY_OFFSET, LCK_BODY_LEN
#define LCK_WHOLE 0, LCK_MAXLEN

int mdbx_txn_lock(MDBX_env *env, bool dontwait) {
  if (flock(env->me_fd, dontwait ? (LCK_EXCLUSIVE | LCK_DONTWAIT)
                                 : (LCK_EXCLUSIVE | LCK_WAITFOR),
            LCK_BODY))
    return MDBX_SUCCESS;
  int rc = GetLastError();
  return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY;
}

void mdbx_txn_unlock(MDBX_env *env) {
  if (!funlock(env->me_fd, LCK_BODY))
    mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
}

/*----------------------------------------------------------------------------*/
/* global `read` lock for readers registration,
 * exclusive locking `mti_numreaders` (second) cacheline */

#define LCK_LO_OFFSET 0
#define LCK_LO_LEN offsetof(MDBX_lockinfo, mti_numreaders)
#define LCK_UP_OFFSET LCK_LO_LEN
#define LCK_UP_LEN (MDBX_LOCKINFO_WHOLE_SIZE - LCK_UP_OFFSET)
#define LCK_LOWER LCK_LO_OFFSET, LCK_LO_LEN
#define LCK_UPPER LCK_UP_OFFSET, LCK_UP_LEN

int mdbx_rdt_lock(MDBX_env *env) {
  if (env->me_lfd == INVALID_HANDLE_VALUE)
    return MDBX_SUCCESS; /* readonly database in readonly filesystem */

  /* transite from S-? (used) to S-E (locked), e.g. exclusive lock upper-part */
  if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER))
    return MDBX_SUCCESS;
  return GetLastError();
}

void mdbx_rdt_unlock(MDBX_env *env) {
  if (env->me_lfd != INVALID_HANDLE_VALUE) {
    /* transite from S-E (locked) to S-? (used), e.g. unlock upper-part */
    if (!funlock(env->me_lfd, LCK_UPPER))
      mdbx_panic("%s failed: errcode %u", mdbx_func_, GetLastError());
  }
}

/*----------------------------------------------------------------------------*/
/* global `initial` lock for lockfile initialization,
 * exclusive/shared locking first cacheline */

/* FIXME: locking schema/algo descritpion.
 ?-?  = free
 S-?  = used
 E-?  = exclusive-read
 ?-S
 ?-E  = middle
 S-S
 S-E  = locked
 E-S
 E-E  = exclusive-write
*/

int mdbx_lck_init(MDBX_env *env) {
  (void)env;
  return MDBX_SUCCESS;
}

/* Seize state as 'exclusive-write' (E-E and returns MDBX_RESULT_TRUE)
 * or as 'used' (S-? and returns MDBX_RESULT_FALSE), otherwise returns an error
 */
static int internal_seize_lck(HANDLE lfd) {
  int rc;
  assert(lfd != INVALID_HANDLE_VALUE);

  /* 1) now on ?-? (free), get ?-E (middle) */
  mdbx_jitter4testing(false);
  if (!flock(lfd, LCK_EXCLUSIVE | LCK_WAITFOR, LCK_UPPER)) {
    rc = GetLastError() /* 2) something went wrong, give up */;
    mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
               "?-?(free) >> ?-E(middle)", rc);
    return rc;
  }

  /* 3) now on ?-E (middle), try E-E (exclusive-write) */
  mdbx_jitter4testing(false);
  if (flock(lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
    return MDBX_RESULT_TRUE /* 4) got E-E (exclusive-write), done */;

  /* 5) still on ?-E (middle) */
  rc = GetLastError();
  mdbx_jitter4testing(false);
  if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
    /* 6) something went wrong, give up */
    if (!funlock(lfd, LCK_UPPER))
      mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
                 "?-E(middle) >> ?-?(free)", GetLastError());
    return rc;
  }

  /* 7) still on ?-E (middle), try S-E (locked) */
  mdbx_jitter4testing(false);
  rc = flock(lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER) ? MDBX_RESULT_FALSE
                                                        : GetLastError();

  mdbx_jitter4testing(false);
  if (rc != MDBX_RESULT_FALSE)
    mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
               "?-E(middle) >> S-E(locked)", rc);

  /* 8) now on S-E (locked) or still on ?-E (middle),
  *    transite to S-? (used) or ?-? (free) */
  if (!funlock(lfd, LCK_UPPER))
    mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
               "X-E(locked/middle) >> X-?(used/free)", GetLastError());

  /* 9) now on S-? (used, DONE) or ?-? (free, FAILURE) */
  return rc;
}

int mdbx_lck_seize(MDBX_env *env) {
  int rc;

  assert(env->me_fd != INVALID_HANDLE_VALUE);
  if (env->me_lfd == INVALID_HANDLE_VALUE) {
    /* LY: without-lck mode (e.g. on read-only filesystem) */
    mdbx_jitter4testing(false);
    if (!flock(env->me_fd, LCK_SHARED | LCK_DONTWAIT, LCK_WHOLE)) {
      rc = GetLastError();
      mdbx_error("%s(%s) failed: errcode %u", mdbx_func_, "without-lck", rc);
      return rc;
    }
    return MDBX_RESULT_FALSE;
  }

  rc = internal_seize_lck(env->me_lfd);
  mdbx_jitter4testing(false);
  if (rc == MDBX_RESULT_TRUE && (env->me_flags & MDBX_RDONLY) == 0) {
    /* Check that another process don't operates in without-lck mode.
     * Doing such check by exclusive locking the body-part of db. Should be
     * noted:
     *  - we need an exclusive lock for do so;
     *  - we can't lock meta-pages, otherwise other process could get an error
     *    while opening db in valid (non-conflict) mode. */
    if (!flock(env->me_fd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_BODY)) {
      rc = GetLastError();
      mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
                 "lock-against-without-lck", rc);
      mdbx_jitter4testing(false);
      mdbx_lck_destroy(env);
    } else {
      mdbx_jitter4testing(false);
      if (!funlock(env->me_fd, LCK_BODY))
        mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
                   "unlock-against-without-lck", GetLastError());
    }
  }

  return rc;
}

int mdbx_lck_downgrade(MDBX_env *env, bool complete) {
  /* Transite from exclusive state (E-?) to used (S-?) */
  assert(env->me_fd != INVALID_HANDLE_VALUE);
  assert(env->me_lfd != INVALID_HANDLE_VALUE);

  /* 1) must be at E-E (exclusive-write) */
  if (!complete) {
    /* transite from E-E to E_? (exclusive-read) */
    if (!funlock(env->me_lfd, LCK_UPPER))
      mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
                 "E-E(exclusive-write) >> E-?(exclusive-read)", GetLastError());
    return MDBX_SUCCESS /* 2) now at E-? (exclusive-read), done */;
  }

  /* 3) now at E-E (exclusive-write), transite to ?_E (middle) */
  if (!funlock(env->me_lfd, LCK_LOWER))
    mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
               "E-E(exclusive-write) >> ?-E(middle)", GetLastError());

  /* 4) now at ?-E (middle), transite to S-E (locked) */
  if (!flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)) {
    int rc = GetLastError() /* 5) something went wrong, give up */;
    mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
               "?-E(middle) >> S-E(locked)", rc);
    return rc;
  }

  /* 6) got S-E (locked), continue transition to S-? (used) */
  if (!funlock(env->me_lfd, LCK_UPPER))
    mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
               "S-E(locked) >> S-?(used)", GetLastError());

  return MDBX_SUCCESS /* 7) now at S-? (used), done */;
}

int mdbx_lck_upgrade(MDBX_env *env) {
  /* Transite from locked state (S-E) to exclusive-write (E-E) */
  assert(env->me_fd != INVALID_HANDLE_VALUE);
  assert(env->me_lfd != INVALID_HANDLE_VALUE);

  /* 1) must be at S-E (locked), transite to ?_E (middle) */
  if (!funlock(env->me_lfd, LCK_LOWER))
    mdbx_panic("%s(%s) failed: errcode %u", mdbx_func_,
               "S-E(locked) >> ?-E(middle)", GetLastError());

  /* 3) now on ?-E (middle), try E-E (exclusive-write) */
  mdbx_jitter4testing(false);
  if (flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER))
    return MDBX_RESULT_TRUE; /* 4) got E-E (exclusive-write), done */

  /* 5) still on ?-E (middle) */
  int rc = GetLastError();
  mdbx_jitter4testing(false);
  if (rc != ERROR_SHARING_VIOLATION && rc != ERROR_LOCK_VIOLATION) {
    /* 6) something went wrong, report but continue */
    mdbx_error("%s(%s) failed: errcode %u", mdbx_func_,
               "?-E(middle) >> E-E(exclusive-write)", rc);
  }

  /* 7) still on ?-E (middle), try restore S-E (locked) */
  mdbx_jitter4testing(false);
  rc = flock(env->me_lfd, LCK_SHARED | LCK_DONTWAIT, LCK_LOWER)
           ? MDBX_RESULT_FALSE
           : GetLastError();

  mdbx_jitter4testing(false);
  if (rc != MDBX_RESULT_FALSE) {
    mdbx_fatal("%s(%s) failed: errcode %u", mdbx_func_,
               "?-E(middle) >> S-E(locked)", rc);
    return rc;
  }

  /* 8) now on S-E (locked) */
  return MDBX_RESULT_FALSE;
}

void mdbx_lck_destroy(MDBX_env *env) {
  int rc;

  if (env->me_lfd != INVALID_HANDLE_VALUE) {
    /* double `unlock` for robustly remove overlapped shared/exclusive locks */
    while (funlock(env->me_lfd, LCK_LOWER))
      ;
    rc = GetLastError();
    assert(rc == ERROR_NOT_LOCKED);
    (void)rc;
    SetLastError(ERROR_SUCCESS);

    while (funlock(env->me_lfd, LCK_UPPER))
      ;
    rc = GetLastError();
    assert(rc == ERROR_NOT_LOCKED);
    (void)rc;
    SetLastError(ERROR_SUCCESS);
  }

  if (env->me_fd != INVALID_HANDLE_VALUE) {
    /* explicitly unlock to avoid latency for other processes (windows kernel
     * releases such locks via deferred queues) */
    while (funlock(env->me_fd, LCK_BODY))
      ;
    rc = GetLastError();
    assert(rc == ERROR_NOT_LOCKED);
    (void)rc;
    SetLastError(ERROR_SUCCESS);

    while (funlock(env->me_fd, LCK_META))
      ;
    rc = GetLastError();
    assert(rc == ERROR_NOT_LOCKED);
    (void)rc;
    SetLastError(ERROR_SUCCESS);

    while (funlock(env->me_fd, LCK_WHOLE))
      ;
    rc = GetLastError();
    assert(rc == ERROR_NOT_LOCKED);
    (void)rc;
    SetLastError(ERROR_SUCCESS);
  }
}

/*----------------------------------------------------------------------------*/
/* reader checking (by pid) */

int mdbx_rpid_set(MDBX_env *env) {
  (void)env;
  return MDBX_SUCCESS;
}

int mdbx_rpid_clear(MDBX_env *env) {
  (void)env;
  return MDBX_SUCCESS;
}

/* Checks reader by pid.
 *
 * Returns:
 *   MDBX_RESULT_TRUE, if pid is live (unable to acquire lock)
 *   MDBX_RESULT_FALSE, if pid is dead (lock acquired)
 *   or otherwise the errcode. */
int mdbx_rpid_check(MDBX_env *env, mdbx_pid_t pid) {
  (void)env;
  HANDLE hProcess = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid);
  int rc;
  if (hProcess) {
    rc = WaitForSingleObject(hProcess, 0);
    CloseHandle(hProcess);
  } else {
    rc = GetLastError();
  }

  switch (rc) {
  case ERROR_INVALID_PARAMETER:
    /* pid seem invalid */
    return MDBX_RESULT_FALSE;
  case WAIT_OBJECT_0:
    /* process just exited */
    return MDBX_RESULT_FALSE;
  case WAIT_TIMEOUT:
    /* pid running */
    return MDBX_RESULT_TRUE;
  default:
    /* failure */
    return rc;
  }
}