diff options
Diffstat (limited to 'transaction.go')
-rw-r--r-- | transaction.go | 1506 |
1 files changed, 1506 insertions, 0 deletions
diff --git a/transaction.go b/transaction.go index 74118d1..1064e8e 100644 --- a/transaction.go +++ b/transaction.go @@ -31,3 +31,1509 @@ type transaction struct { // Implicit from slices? TODO: MDB_dbi mt_numdbs; mt_dirty_room int } + +// ntxn represents a nested transaction. +type ntxn struct { + transaction *transaction /**< the transaction */ + pageState pageState /**< parent transaction's saved freestate */ +} + + +func (t *transaction) allocPage(num int) *page { + /* + MDB_env *env = txn->mt_env; + MDB_page *ret = env->me_dpages; + size_t psize = env->me_psize, sz = psize, off; + // For ! #MDB_NOMEMINIT, psize counts how much to init. + // For a single page alloc, we init everything after the page header. + // For multi-page, we init the final page; if the caller needed that + // many pages they will be filling in at least up to the last page. + if (num == 1) { + if (ret) { + VGMEMP_ALLOC(env, ret, sz); + VGMEMP_DEFINED(ret, sizeof(ret->mp_next)); + env->me_dpages = ret->mp_next; + return ret; + } + psize -= off = PAGEHDRSZ; + } else { + sz *= num; + off = sz - psize; + } + if ((ret = malloc(sz)) != NULL) { + VGMEMP_ALLOC(env, ret, sz); + if (!(env->me_flags & MDB_NOMEMINIT)) { + memset((char *)ret + off, 0, psize); + ret->mp_pad = 0; + } + } else { + txn->mt_flags |= MDB_TXN_ERROR; + } + return ret; + */ + return nil +} + +// Find oldest txnid still referenced. Expects txn->mt_txnid > 0. +func (t *transaction) oldest() int { + /* + int i; + txnid_t mr, oldest = txn->mt_txnid - 1; + if (txn->mt_env->me_txns) { + MDB_reader *r = txn->mt_env->me_txns->mti_readers; + for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + mr = r[i].mr_txnid; + if (oldest > mr) + oldest = mr; + } + } + } + return oldest; + */ + return 0 +} + +// Add a page to the txn's dirty list +func (t *transaction) dirty(p *page) { + /* + MDB_ID2 mid; + int rc, (*insert)(MDB_ID2L, MDB_ID2 *); + + if (txn->mt_env->me_flags & MDB_WRITEMAP) { + insert = mdb_mid2l_append; + } else { + insert = mdb_mid2l_insert; + } + mid.mid = mp->mp_pgno; + mid.mptr = mp; + rc = insert(txn->mt_u.dirty_list, &mid); + mdb_tassert(txn, rc == 0); + txn->mt_dirty_room--; + */ +} + +// Pull a page off the txn's spill list, if present. +// If a page being referenced was spilled to disk in this txn, bring +// it back and make it dirty/writable again. +// @param[in] txn the transaction handle. +// @param[in] mp the page being referenced. It must not be dirty. +// @param[out] ret the writable page, if any. ret is unchanged if +// mp wasn't spilled. +func (t *transaction) unspill(p *page) *page { + /* + MDB_env *env = txn->mt_env; + const MDB_txn *tx2; + unsigned x; + pgno_t pgno = mp->mp_pgno, pn = pgno << 1; + + for (tx2 = txn; tx2; tx2=tx2->mt_parent) { + if (!tx2->mt_spill_pgs) + continue; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + MDB_page *np; + int num; + if (txn->mt_dirty_room == 0) + return MDB_TXN_FULL; + if (IS_OVERFLOW(mp)) + num = mp->mp_pages; + else + num = 1; + if (env->me_flags & MDB_WRITEMAP) { + np = mp; + } else { + np = mdb_page_malloc(txn, num); + if (!np) + return ENOMEM; + if (num > 1) + memcpy(np, mp, num * env->me_psize); + else + mdb_page_copy(np, mp, env->me_psize); + } + if (tx2 == txn) { + // If in current txn, this page is no longer spilled. + // If it happens to be the last page, truncate the spill list. + // Otherwise mark it as deleted by setting the LSB. + if (x == txn->mt_spill_pgs[0]) + txn->mt_spill_pgs[0]--; + else + txn->mt_spill_pgs[x] |= 1; + } // otherwise, if belonging to a parent txn, the + // page remains spilled until child commits + + mdb_page_dirty(txn, np); + np->mp_flags |= P_DIRTY; + *ret = np; + break; + } + } + return MDB_SUCCESS; + */ + return nil +} + +// Back up parent txn's cursors, then grab the originals for tracking +func (t *transaction) shadow(dst *transaction) error { + /* + MDB_cursor *mc, *bk; + MDB_xcursor *mx; + size_t size; + int i; + + for (i = src->mt_numdbs; --i >= 0; ) { + if ((mc = src->mt_cursors[i]) != NULL) { + size = sizeof(MDB_cursor); + if (mc->mc_xcursor) + size += sizeof(MDB_xcursor); + for (; mc; mc = bk->mc_next) { + bk = malloc(size); + if (!bk) + return ENOMEM; + *bk = *mc; + mc->mc_backup = bk; + mc->mc_db = &dst->mt_dbs[i]; + // Kill pointers into src - and dst to reduce abuse: The + // user may not use mc until dst ends. Otherwise we'd... + mc->mc_txn = NULL; // ...set this to dst + mc->mc_dbflag = NULL; // ...and &dst->mt_dbflags[i] + if ((mx = mc->mc_xcursor) != NULL) { + *(MDB_xcursor *)(bk+1) = *mx; + mx->mx_cursor.mc_txn = NULL; // ...and dst. + } + mc->mc_next = dst->mt_cursors[i]; + dst->mt_cursors[i] = mc; + } + } + } + return MDB_SUCCESS; + */ + return nil +} + +// Close this write txn's cursors, give parent txn's cursors back to parent. +// @param[in] txn the transaction handle. +// @param[in] merge true to keep changes to parent cursors, false to revert. +// @return 0 on success, non-zero on failure. +func (t *transaction) closeCursors(merge bool) { + /* + MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk; + MDB_xcursor *mx; + int i; + + for (i = txn->mt_numdbs; --i >= 0; ) { + for (mc = cursors[i]; mc; mc = next) { + next = mc->mc_next; + if ((bk = mc->mc_backup) != NULL) { + if (merge) { + // Commit changes to parent txn + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbflag = bk->mc_dbflag; + if ((mx = mc->mc_xcursor) != NULL) + mx->mx_cursor.mc_txn = bk->mc_txn; + } else { + // Abort nested txn + *mc = *bk; + if ((mx = mc->mc_xcursor) != NULL) + *mx = *(MDB_xcursor *)(bk+1); + } + mc = bk; + } + // Only malloced cursors are permanently tracked. + free(mc); + } + cursors[i] = NULL; + } + */ +} + +// Common code for #mdb_txn_begin() and #mdb_txn_renew(). +// @param[in] txn the transaction handle to initialize +// @return 0 on success, non-zero on failure. +func (t *transaction) renew() error { + /* + MDB_env *env = txn->mt_env; + MDB_txninfo *ti = env->me_txns; + MDB_meta *meta; + unsigned int i, nr; + uint16_t x; + int rc, new_notls = 0; + + // Setup db info + txn->mt_numdbs = env->me_numdbs; + txn->mt_dbxs = env->me_dbxs; // mostly static anyway + + if (txn->mt_flags & MDB_TXN_RDONLY) { + if (!ti) { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + txn->mt_u.reader = NULL; + } else { + MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader : + pthread_getspecific(env->me_txkey); + if (r) { + if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1) + return MDB_BAD_RSLOT; + } else { + MDB_PID_T pid = env->me_pid; + pthread_t tid = pthread_self(); + + if (!(env->me_flags & MDB_LIVE_READER)) { + rc = mdb_reader_pid(env, Pidset, pid); + if (rc) + return rc; + env->me_flags |= MDB_LIVE_READER; + } + + LOCK_MUTEX_R(env); + nr = ti->mti_numreaders; + for (i=0; i<nr; i++) + if (ti->mti_readers[i].mr_pid == 0) + break; + if (i == env->me_maxreaders) { + UNLOCK_MUTEX_R(env); + return MDB_READERS_FULL; + } + ti->mti_readers[i].mr_pid = pid; + ti->mti_readers[i].mr_tid = tid; + if (i == nr) + ti->mti_numreaders = ++nr; + // Save numreaders for un-mutexed mdb_env_close() + env->me_numreaders = nr; + UNLOCK_MUTEX_R(env); + + r = &ti->mti_readers[i]; + new_notls = (env->me_flags & MDB_NOTLS); + if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) { + r->mr_pid = 0; + return rc; + } + } + txn->mt_txnid = r->mr_txnid = ti->mti_txnid; + txn->mt_u.reader = r; + meta = env->me_metas[txn->mt_txnid & 1]; + } + } else { + if (ti) { + LOCK_MUTEX_W(env); + + txn->mt_txnid = ti->mti_txnid; + meta = env->me_metas[txn->mt_txnid & 1]; + } else { + meta = env->me_metas[ mdb_env_pick_meta(env) ]; + txn->mt_txnid = meta->mm_txnid; + } + txn->mt_txnid++; +#if MDB_DEBUG + if (txn->mt_txnid == mdb_debug_start) + mdb_debug = 1; +#endif + txn->mt_dirty_room = MDB_IDL_UM_MAX; + txn->mt_u.dirty_list = env->me_dirty_list; + txn->mt_u.dirty_list[0].mid = 0; + txn->mt_free_pgs = env->me_free_pgs; + txn->mt_free_pgs[0] = 0; + txn->mt_spill_pgs = NULL; + env->me_txn = txn; + } + + // Copy the DB info and flags + memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db)); + + // Moved to here to avoid a data race in read TXNs + txn->mt_next_pgno = meta->mm_last_pg+1; + + for (i=2; i<txn->mt_numdbs; i++) { + x = env->me_dbflags[i]; + txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS; + txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0; + } + txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID; + + if (env->me_maxpg < txn->mt_next_pgno) { + mdb_txn_reset0(txn, "renew0-mapfail"); + if (new_notls) { + txn->mt_u.reader->mr_pid = 0; + txn->mt_u.reader = NULL; + } + return MDB_MAP_RESIZED; + } + + return MDB_SUCCESS; + */ + return nil +} + +func (t *transaction) Renew() error { + /* + int rc; + + if (!txn || txn->mt_dbxs) // A reset txn has mt_dbxs==NULL + return EINVAL; + + if (txn->mt_env->me_flags & MDB_FATAL_ERROR) { + DPUTS("environment had fatal error, must shutdown!"); + return MDB_PANIC; + } + + rc = mdb_txn_renew0(txn); + if (rc == MDB_SUCCESS) { + DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root)); + } + return rc; + */ + return nil +} + +func (t *transaction) DB() DB { + return t.db +} + +// Export or close DBI handles opened in this txn. +func (t *transaction) updateBuckets(keep bool) { + /* + int i; + MDB_dbi n = txn->mt_numdbs; + MDB_env *env = txn->mt_env; + unsigned char *tdbflags = txn->mt_dbflags; + + for (i = n; --i >= 2;) { + if (tdbflags[i] & DB_NEW) { + if (keep) { + env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; + } else { + char *ptr = env->me_dbxs[i].md_name.mv_data; + env->me_dbxs[i].md_name.mv_data = NULL; + env->me_dbxs[i].md_name.mv_size = 0; + env->me_dbflags[i] = 0; + free(ptr); + } + } + } + if (keep && env->me_numdbs < n) + env->me_numdbs = n; + */ +} + +// Common code for #mdb_txn_reset() and #mdb_txn_abort(). +// May be called twice for readonly txns: First reset it, then abort. +// @param[in] txn the transaction handle to reset +// @param[in] act why the transaction is being reset +func (t *transaction) reset(act string) { + /* + MDB_env *env = txn->mt_env; + + // Close any DBI handles opened in this txn + mdb_dbis_update(txn, 0); + + DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u", + act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', + (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root)); + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + if (txn->mt_u.reader) { + txn->mt_u.reader->mr_txnid = (txnid_t)-1; + if (!(env->me_flags & MDB_NOTLS)) + txn->mt_u.reader = NULL; // txn does not own reader + } + txn->mt_numdbs = 0; // close nothing if called again + txn->mt_dbxs = NULL; // mark txn as reset + } else { + mdb_cursors_close(txn, 0); + + if (!(env->me_flags & MDB_WRITEMAP)) { + mdb_dlist_free(txn); + } + mdb_midl_free(env->me_pghead); + + if (txn->mt_parent) { + txn->mt_parent->mt_child = NULL; + env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate; + mdb_midl_free(txn->mt_free_pgs); + mdb_midl_free(txn->mt_spill_pgs); + free(txn->mt_u.dirty_list); + return; + } + + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + env->me_pghead = NULL; + env->me_pglast = 0; + + env->me_txn = NULL; + // The writer mutex was locked in mdb_txn_begin. + if (env->me_txns) + UNLOCK_MUTEX_W(env); + } + */ +} + +func (t *transaction) Reset() { + /* + if (txn == NULL) + return; + + // This call is only valid for read-only txns + if (!(txn->mt_flags & MDB_TXN_RDONLY)) + return; + + mdb_txn_reset0(txn, "reset"); + */ +} + +func (t *transaction) Abort() { + /* + if (txn == NULL) + return; + + if (txn->mt_child) + mdb_txn_abort(txn->mt_child); + + mdb_txn_reset0(txn, "abort"); + // Free reader slot tied to this txn (if MDB_NOTLS && writable FS) + if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader) + txn->mt_u.reader->mr_pid = 0; + + free(txn); + */ +} + +// Save the freelist as of this transaction to the freeDB. +// This changes the freelist. Keep trying until it stabilizes. +func (t *transaction) saveFreelist() error { + /* + // env->me_pghead[] can grow and shrink during this call. + // env->me_pglast and txn->mt_free_pgs[] can only grow. + // Page numbers cannot disappear from txn->mt_free_pgs[]. + MDB_cursor mc; + MDB_env *env = txn->mt_env; + int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1; + txnid_t pglast = 0, head_id = 0; + pgno_t freecnt = 0, *free_pgs, *mop; + ssize_t head_room = 0, total_room = 0, mop_len, clean_limit; + + mdb_cursor_init(&mc, txn, FREE_DBI, NULL); + + if (env->me_pghead) { + // Make sure first page of freeDB is touched and on freelist + rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + + // MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) + clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP)) + ? SSIZE_MAX : maxfree_1pg; + + for (;;) { + // Come back here after each Put() in case freelist changed + MDB_val key, data; + pgno_t *pgs; + ssize_t j; + + // If using records from freeDB which we have not yet + // deleted, delete them and any we reserved for me_pghead. + while (pglast < env->me_pglast) { + rc = mdb_cursor_first(&mc, &key, NULL); + if (rc) + return rc; + pglast = head_id = *(txnid_t *)key.mv_data; + total_room = head_room = 0; + mdb_tassert(txn, pglast <= env->me_pglast); + rc = mdb_cursor_del(&mc, 0); + if (rc) + return rc; + } + + // Save the IDL of pages freed by this txn, to a single record + if (freecnt < txn->mt_free_pgs[0]) { + if (!freecnt) { + // Make sure last page of freeDB is touched and on freelist + rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY); + if (rc && rc != MDB_NOTFOUND) + return rc; + } + free_pgs = txn->mt_free_pgs; + // Write to last page of freeDB + key.mv_size = sizeof(txn->mt_txnid); + key.mv_data = &txn->mt_txnid; + do { + freecnt = free_pgs[0]; + data.mv_size = MDB_IDL_SIZEOF(free_pgs); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + // Retry if mt_free_pgs[] grew during the Put() + free_pgs = txn->mt_free_pgs; + } while (freecnt < free_pgs[0]); + mdb_midl_sort(free_pgs); + memcpy(data.mv_data, free_pgs, data.mv_size); +#if (MDB_DEBUG) > 1 + { + unsigned int i = free_pgs[0]; + DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u", + txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i)); + for (; i; i--) + DPRINTF(("IDL %"Z"u", free_pgs[i])); + } +#endif + continue; + } + + mop = env->me_pghead; + mop_len = mop ? mop[0] : 0; + + // Reserve records for me_pghead[]. Split it if multi-page, + // to avoid searching freeDB for a page range. Use keys in + // range [1,me_pglast]: Smaller than txnid of oldest reader. + if (total_room >= mop_len) { + if (total_room == mop_len || --more < 0) + break; + } else if (head_room >= maxfree_1pg && head_id > 1) { + // Keep current record (overflow page), add a new one + head_id--; + head_room = 0; + } + // (Re)write {key = head_id, IDL length = head_room} + total_room -= head_room; + head_room = mop_len - total_room; + if (head_room > maxfree_1pg && head_id > 1) { + // Overflow multi-page for part of me_pghead + head_room /= head_id; // amortize page sizes + head_room += maxfree_1pg - head_room % (maxfree_1pg + 1); + } else if (head_room < 0) { + // Rare case, not bothering to delete this record + head_room = 0; + } + key.mv_size = sizeof(head_id); + key.mv_data = &head_id; + data.mv_size = (head_room + 1) * sizeof(pgno_t); + rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE); + if (rc) + return rc; + // IDL is initially empty, zero out at least the length + pgs = (pgno_t *)data.mv_data; + j = head_room > clean_limit ? head_room : 0; + do { + pgs[j] = 0; + } while (--j >= 0); + total_room += head_room; + } + + // Fill in the reserved me_pghead records + rc = MDB_SUCCESS; + if (mop_len) { + MDB_val key, data; + + mop += mop_len; + rc = mdb_cursor_first(&mc, &key, &data); + for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) { + unsigned flags = MDB_CURRENT; + txnid_t id = *(txnid_t *)key.mv_data; + ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1; + MDB_ID save; + + mdb_tassert(txn, len >= 0 && id <= env->me_pglast); + key.mv_data = &id; + if (len > mop_len) { + len = mop_len; + data.mv_size = (len + 1) * sizeof(MDB_ID); + flags = 0; + } + data.mv_data = mop -= len; + save = mop[0]; + mop[0] = len; + rc = mdb_cursor_put(&mc, &key, &data, flags); + mop[0] = save; + if (rc || !(mop_len -= len)) + break; + } + } + return rc; + */ + return nil +} + +// Flush (some) dirty pages to the map, after clearing their dirty flag. +// @param[in] txn the transaction that's being committed +// @param[in] keep number of initial pages in dirty_list to keep dirty. +// @return 0 on success, non-zero on failure. +func (t *transaction) flush(keep bool) { + /* + MDB_env *env = txn->mt_env; + MDB_ID2L dl = txn->mt_u.dirty_list; + unsigned psize = env->me_psize, j; + int i, pagecount = dl[0].mid, rc; + size_t size = 0, pos = 0; + pgno_t pgno = 0; + MDB_page *dp = NULL; +#ifdef _WIN32 + OVERLAPPED ov; +#else + struct iovec iov[MDB_COMMIT_PAGES]; + ssize_t wpos = 0, wsize = 0, wres; + size_t next_pos = 1; // impossible pos, so pos != next_pos + int n = 0; +#endif + + j = i = keep; + + if (env->me_flags & MDB_WRITEMAP) { + // Clear dirty flags + while (++i <= pagecount) { + dp = dl[i].mptr; + // Don't flush this page yet + if (dp->mp_flags & P_KEEP) { + dp->mp_flags ^= P_KEEP; + dl[++j] = dl[i]; + continue; + } + dp->mp_flags &= ~P_DIRTY; + } + goto done; + } + + // Write the pages + for (;;) { + if (++i <= pagecount) { + dp = dl[i].mptr; + // Don't flush this page yet + if (dp->mp_flags & P_KEEP) { + dp->mp_flags ^= P_KEEP; + dl[i].mid = 0; + continue; + } + pgno = dl[i].mid; + // clear dirty flag + dp->mp_flags &= ~P_DIRTY; + pos = pgno * psize; + size = psize; + if (IS_OVERFLOW(dp)) size *= dp->mp_pages; + } +#ifdef _WIN32 + else break; + + // Windows actually supports scatter/gather I/O, but only on + // unbuffered file handles. Since we're relying on the OS page + // cache for all our data, that's self-defeating. So we just + // write pages one at a time. We use the ov structure to set + // the write offset, to at least save the overhead of a Seek + // system call. + DPRINTF(("committing page %"Z"u", pgno)); + memset(&ov, 0, sizeof(ov)); + ov.Offset = pos & 0xffffffff; + ov.OffsetHigh = pos >> 16 >> 16; + if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) { + rc = ErrCode(); + DPRINTF(("WriteFile: %d", rc)); + return rc; + } +#else + // Write up to MDB_COMMIT_PAGES dirty pages at a time. + if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) { + if (n) { + // Write previous page(s) +#ifdef MDB_USE_PWRITEV + wres = pwritev(env->me_fd, iov, n, wpos); +#else + if (n == 1) { + wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos); + } else { + if (lseek(env->me_fd, wpos, SEEK_SET) == -1) { + rc = ErrCode(); + DPRINTF(("lseek: %s", strerror(rc))); + return rc; + } + wres = writev(env->me_fd, iov, n); + } +#endif + if (wres != wsize) { + if (wres < 0) { + rc = ErrCode(); + DPRINTF(("Write error: %s", strerror(rc))); + } else { + rc = EIO; // TODO: Use which error code? + DPUTS("short write, filesystem full?"); + } + return rc; + } + n = 0; + } + if (i > pagecount) + break; + wpos = pos; + wsize = 0; + } + DPRINTF(("committing page %"Z"u", pgno)); + next_pos = pos + size; + iov[n].iov_len = size; + iov[n].iov_base = (char *)dp; + wsize += size; + n++; +#endif // _WIN32 + } + + for (i = keep; ++i <= pagecount; ) { + dp = dl[i].mptr; + // This is a page we skipped above + if (!dl[i].mid) { + dl[++j] = dl[i]; + dl[j].mid = dp->mp_pgno; + continue; + } + mdb_dpage_free(env, dp); + } + +done: + i--; + txn->mt_dirty_room += i - j; + dl[0].mid = j; + return MDB_SUCCESS; +} + +int +mdb_txn_commit(MDB_txn *txn) +{ + int rc; + unsigned int i; + MDB_env *env; + + if (txn == NULL || txn->mt_env == NULL) + return EINVAL; + + if (txn->mt_child) { + rc = mdb_txn_commit(txn->mt_child); + txn->mt_child = NULL; + if (rc) + goto fail; + } + + env = txn->mt_env; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) { + mdb_dbis_update(txn, 1); + txn->mt_numdbs = 2; // so txn_abort() doesn't close any new handles + mdb_txn_abort(txn); + return MDB_SUCCESS; + } + + if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) { + DPUTS("error flag is set, can't commit"); + if (txn->mt_parent) + txn->mt_parent->mt_flags |= MDB_TXN_ERROR; + rc = MDB_BAD_TXN; + goto fail; + } + + if (txn->mt_parent) { + MDB_txn *parent = txn->mt_parent; + MDB_ID2L dst, src; + MDB_IDL pspill; + unsigned x, y, len, ps_len; + + // Append our free list to parent's + rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs); + if (rc) + goto fail; + mdb_midl_free(txn->mt_free_pgs); + // Failures after this must either undo the changes + // to the parent or set MDB_TXN_ERROR in the parent. + + parent->mt_next_pgno = txn->mt_next_pgno; + parent->mt_flags = txn->mt_flags; + + // Merge our cursors into parent's and close them + mdb_cursors_close(txn, 1); + + // Update parent's DB table. + memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); + parent->mt_numdbs = txn->mt_numdbs; + parent->mt_dbflags[0] = txn->mt_dbflags[0]; + parent->mt_dbflags[1] = txn->mt_dbflags[1]; + for (i=2; i<txn->mt_numdbs; i++) { + // preserve parent's DB_NEW status + x = parent->mt_dbflags[i] & DB_NEW; + parent->mt_dbflags[i] = txn->mt_dbflags[i] | x; + } + + dst = parent->mt_u.dirty_list; + src = txn->mt_u.dirty_list; + // Remove anything in our dirty list from parent's spill list + if ((pspill = parent->mt_spill_pgs) && (ps_len = pspill[0])) { + x = y = ps_len; + pspill[0] = (pgno_t)-1; + // Mark our dirty pages as deleted in parent spill list + for (i=0, len=src[0].mid; ++i <= len; ) { + MDB_ID pn = src[i].mid << 1; + while (pn > pspill[x]) + x--; + if (pn == pspill[x]) { + pspill[x] = 1; + y = --x; + } + } + // Squash deleted pagenums if we deleted any + for (x=y; ++x <= ps_len; ) + if (!(pspill[x] & 1)) + pspill[++y] = pspill[x]; + pspill[0] = y; + } + + // Find len = length of merging our dirty list with parent's + x = dst[0].mid; + dst[0].mid = 0; // simplify loops + if (parent->mt_parent) { + len = x + src[0].mid; + y = mdb_mid2l_search(src, dst[x].mid + 1) - 1; + for (i = x; y && i; y--) { + pgno_t yp = src[y].mid; + while (yp < dst[i].mid) + i--; + if (yp == dst[i].mid) { + i--; + len--; + } + } + } else { // Simplify the above for single-ancestor case + len = MDB_IDL_UM_MAX - txn->mt_dirty_room; + } + // Merge our dirty list with parent's + y = src[0].mid; + for (i = len; y; dst[i--] = src[y--]) { + pgno_t yp = src[y].mid; + while (yp < dst[x].mid) + dst[i--] = dst[x--]; + if (yp == dst[x].mid) + free(dst[x--].mptr); + } + mdb_tassert(txn, i == x); + dst[0].mid = len; + free(txn->mt_u.dirty_list); + parent->mt_dirty_room = txn->mt_dirty_room; + if (txn->mt_spill_pgs) { + if (parent->mt_spill_pgs) { + // TODO: Prevent failure here, so parent does not fail + rc = mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs); + if (rc) + parent->mt_flags |= MDB_TXN_ERROR; + mdb_midl_free(txn->mt_spill_pgs); + mdb_midl_sort(parent->mt_spill_pgs); + } else { + parent->mt_spill_pgs = txn->mt_spill_pgs; + } + } + + parent->mt_child = NULL; + mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead); + free(txn); + return rc; + } + + if (txn != env->me_txn) { + DPUTS("attempt to commit unknown transaction"); + rc = EINVAL; + goto fail; + } + + mdb_cursors_close(txn, 0); + + if (!txn->mt_u.dirty_list[0].mid && + !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS))) + goto done; + + DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u", + txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root)); + + // Update DB root pointers + if (txn->mt_numdbs > 2) { + MDB_cursor mc; + MDB_dbi i; + MDB_val data; + data.mv_size = sizeof(MDB_db); + + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + for (i = 2; i < txn->mt_numdbs; i++) { + if (txn->mt_dbflags[i] & DB_DIRTY) { + data.mv_data = &txn->mt_dbs[i]; + rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0); + if (rc) + goto fail; + } + } + } + + rc = mdb_freelist_save(txn); + if (rc) + goto fail; + + mdb_midl_free(env->me_pghead); + env->me_pghead = NULL; + if (mdb_midl_shrink(&txn->mt_free_pgs)) + env->me_free_pgs = txn->mt_free_pgs; + +#if (MDB_DEBUG) > 2 + mdb_audit(txn); +#endif + + if ((rc = mdb_page_flush(txn, 0)) || + (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_write_meta(txn))) + goto fail; + +done: + env->me_pglast = 0; + env->me_txn = NULL; + mdb_dbis_update(txn, 1); + + if (env->me_txns) + UNLOCK_MUTEX_W(env); + free(txn); + + return MDB_SUCCESS; + +fail: + mdb_txn_abort(txn); + return rc; + */ + return nil +} + +// Update the environment info to commit a transaction. +// @param[in] txn the transaction that's being committed +// @return 0 on success, non-zero on failure. +func (t *transaction) writeMeta() error { + /* + MDB_env *env; + MDB_meta meta, metab, *mp; + off_t off; + int rc, len, toggle; + char *ptr; + HANDLE mfd; +#ifdef _WIN32 + OVERLAPPED ov; +#else + int r2; +#endif + + toggle = txn->mt_txnid & 1; + DPRINTF(("writing meta page %d for root page %"Z"u", + toggle, txn->mt_dbs[MAIN_DBI].md_root)); + + env = txn->mt_env; + mp = env->me_metas[toggle]; + + if (env->me_flags & MDB_WRITEMAP) { + // Persist any increases of mapsize config + if (env->me_mapsize > mp->mm_mapsize) + mp->mm_mapsize = env->me_mapsize; + mp->mm_dbs[0] = txn->mt_dbs[0]; + mp->mm_dbs[1] = txn->mt_dbs[1]; + mp->mm_last_pg = txn->mt_next_pgno - 1; + mp->mm_txnid = txn->mt_txnid; + if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) { + unsigned meta_size = env->me_psize; + rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC; + ptr = env->me_map; + if (toggle) { +#ifndef _WIN32 // POSIX msync() requires ptr = start of OS page + if (meta_size < env->me_os_psize) + meta_size += meta_size; + else +#endif + ptr += meta_size; + } + if (MDB_MSYNC(ptr, meta_size, rc)) { + rc = ErrCode(); + goto fail; + } + } + goto done; + } + metab.mm_txnid = env->me_metas[toggle]->mm_txnid; + metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg; + + ptr = (char *)&meta; + if (env->me_mapsize > mp->mm_mapsize) { + // Persist any increases of mapsize config + meta.mm_mapsize = env->me_mapsize; + off = offsetof(MDB_meta, mm_mapsize); + } else { + off = offsetof(MDB_meta, mm_dbs[0].md_depth); + } + len = sizeof(MDB_meta) - off; + + ptr += off; + meta.mm_dbs[0] = txn->mt_dbs[0]; + meta.mm_dbs[1] = txn->mt_dbs[1]; + meta.mm_last_pg = txn->mt_next_pgno - 1; + meta.mm_txnid = txn->mt_txnid; + + if (toggle) + off += env->me_psize; + off += PAGEHDRSZ; + + // Write to the SYNC fd + mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ? + env->me_fd : env->me_mfd; +#ifdef _WIN32 + { + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov)) + rc = -1; + } +#else + rc = pwrite(mfd, ptr, len, off); +#endif + if (rc != len) { + rc = rc < 0 ? ErrCode() : EIO; + DPUTS("write failed, disk error?"); + // On a failure, the pagecache still contains the new data. + // Write some old data back, to prevent it from being used. + // Use the non-SYNC fd; we know it will fail anyway. + meta.mm_last_pg = metab.mm_last_pg; + meta.mm_txnid = metab.mm_txnid; +#ifdef _WIN32 + memset(&ov, 0, sizeof(ov)); + ov.Offset = off; + WriteFile(env->me_fd, ptr, len, NULL, &ov); +#else + r2 = pwrite(env->me_fd, ptr, len, off); + (void)r2; // Silence warnings. We don't care about pwrite's return value +#endif +fail: + env->me_flags |= MDB_FATAL_ERROR; + return rc; + } +done: + // Memory ordering issues are irrelevant; since the entire writer + // is wrapped by wmutex, all of these changes will become visible + // after the wmutex is unlocked. Since the DB is multi-version, + // readers will get consistent data regardless of how fresh or + // how stale their view of these values is. + if (env->me_txns) + env->me_txns->mti_txnid = txn->mt_txnid; + + return MDB_SUCCESS; + */ + return nil +} + +// Find the address of the page corresponding to a given page number. +// @param[in] txn the transaction for this access. +// @param[in] pgno the page number for the page to retrieve. +// @param[out] ret address of a pointer where the page's address will be stored. +// @param[out] lvl dirty_list inheritance level of found page. 1=current txn, 0=mapped page. +// @return 0 on success, non-zero on failure. +func (t *transaction) getPage(id int) (*page, int, error) { + /* + MDB_env *env = txn->mt_env; + MDB_page *p = NULL; + int level; + + if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) { + MDB_txn *tx2 = txn; + level = 1; + do { + MDB_ID2L dl = tx2->mt_u.dirty_list; + unsigned x; + // Spilled pages were dirtied in this txn and flushed + // because the dirty list got full. Bring this page + // back in from the map (but don't unspill it here, + // leave that unless page_touch happens again). + if (tx2->mt_spill_pgs) { + MDB_ID pn = pgno << 1; + x = mdb_midl_search(tx2->mt_spill_pgs, pn); + if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { + p = (MDB_page *)(env->me_map + env->me_psize * pgno); + goto done; + } + } + if (dl[0].mid) { + unsigned x = mdb_mid2l_search(dl, pgno); + if (x <= dl[0].mid && dl[x].mid == pgno) { + p = dl[x].mptr; + goto done; + } + } + level++; + } while ((tx2 = tx2->mt_parent) != NULL); + } + + if (pgno < txn->mt_next_pgno) { + level = 0; + p = (MDB_page *)(env->me_map + env->me_psize * pgno); + } else { + DPRINTF(("page %"Z"u not found", pgno)); + txn->mt_flags |= MDB_TXN_ERROR; + return MDB_PAGE_NOTFOUND; + } + +done: + *ret = p; + if (lvl) + *lvl = level; + return MDB_SUCCESS; + */ + + return nil, 0, nil +} + +// Return the data associated with a given node. +// @param[in] txn The transaction for this operation. +// @param[in] leaf The node being read. +// @param[out] data Updated to point to the node's data. +// @return 0 on success, non-zero on failure. +func (t *transaction) readNode(leaf *node, data []byte) error { + /* + MDB_page *omp; // overflow page + pgno_t pgno; + int rc; + + if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) { + data->mv_size = NODEDSZ(leaf); + data->mv_data = NODEDATA(leaf); + return MDB_SUCCESS; + } + + // Read overflow data. + data->mv_size = NODEDSZ(leaf); + memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); + if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) { + DPRINTF(("read overflow page %"Z"u failed", pgno)); + return rc; + } + data->mv_data = METADATA(omp); + + return MDB_SUCCESS; + */ + return nil +} + +func (t *transaction) Get(bucket Bucket, key []byte) ([]byte, error) { + /* + MDB_cursor mc; + MDB_xcursor mx; + int exact = 0; + DKBUF; + + if (key == NULL || data == NULL) + return EINVAL; + + DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); + */ + return nil, nil +} + +func (t *transaction) Cursor(b Bucket) (Cursor, error) { + /* + MDB_cursor *mc; + size_t size = sizeof(MDB_cursor); + + if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + // Allow read access to the freelist + if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EINVAL; + + if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) + size += sizeof(MDB_xcursor); + + if ((mc = malloc(size)) != NULL) { + mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1)); + if (txn->mt_cursors) { + mc->mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = mc; + mc->mc_flags |= C_UNTRACK; + } + } else { + return ENOMEM; + } + + *ret = mc; + + return MDB_SUCCESS; + */ + return nil +} + +func (t *transaction) Renew(c Cursor) error { + /* + if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs) + return EINVAL; + + if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors) + return EINVAL; + + mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor); + return MDB_SUCCESS; + */ +} + +func (t *transaction) Delete(b *bucket, key []byte, data []byte) error { + /* + MDB_cursor mc; + MDB_xcursor mx; + MDB_cursor_op op; + MDB_val rdata, *xdata; + int rc, exact; + DKBUF; + + if (key == NULL) + return EINVAL; + + DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key))); + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR)) + return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN; + + mdb_cursor_init(&mc, txn, dbi, &mx); + + exact = 0; + if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) { + // must ignore any data + data = NULL; + } + if (data) { + op = MDB_GET_BOTH; + rdata = *data; + xdata = &rdata; + } else { + op = MDB_SET; + xdata = NULL; + } + rc = mdb_cursor_set(&mc, key, xdata, op, &exact); + if (rc == 0) { + // let mdb_page_split know about this cursor if needed: + // delete will trigger a rebalance; if it needs to move + // a node from one page to another, it will have to + // update the parent's separator key(s). If the new sepkey + // is larger than the current one, the parent page may + // run out of space, triggering a split. We need this + // cursor to be consistent until the end of the rebalance. + mc.mc_flags |= C_UNTRACK; + mc.mc_next = txn->mt_cursors[dbi]; + txn->mt_cursors[dbi] = &mc; + rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA); + txn->mt_cursors[dbi] = mc.mc_next; + } + return rc; + */ + return nil +} + +func (t *transaction) Put(b Bucket, key []byte, data []byte, flags int) error { + /* + MDB_cursor mc; + MDB_xcursor mx; + + if (key == NULL || data == NULL) + return EINVAL; + + if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags) + return EINVAL; + + mdb_cursor_init(&mc, txn, dbi, &mx); + return mdb_cursor_put(&mc, key, data, flags); + */ +} + +func (t *transaction) Bucket(name string, flags int) (Bucket, error) { + /* + MDB_val key, data; + MDB_dbi i; + MDB_cursor mc; + int rc, dbflag, exact; + unsigned int unused = 0; + size_t len; + + if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, FREE_DBI); + } + + if ((flags & VALID_FLAGS) != flags) + return EINVAL; + if (txn->mt_flags & MDB_TXN_ERROR) + return MDB_BAD_TXN; + + // main DB? + if (!name) { + *dbi = MAIN_DBI; + if (flags & PERSISTENT_FLAGS) { + uint16_t f2 = flags & PERSISTENT_FLAGS; + // make sure flag changes get committed + if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) { + txn->mt_dbs[MAIN_DBI].md_flags |= f2; + txn->mt_flags |= MDB_TXN_DIRTY; + } + } + mdb_default_cmp(txn, MAIN_DBI); + return MDB_SUCCESS; + } + + if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { + mdb_default_cmp(txn, MAIN_DBI); + } + + // Is the DB already open? + len = strlen(name); + for (i=2; i<txn->mt_numdbs; i++) { + if (!txn->mt_dbxs[i].md_name.mv_size) { + // Remember this free slot + if (!unused) unused = i; + continue; + } + if (len == txn->mt_dbxs[i].md_name.mv_size && + !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { + *dbi = i; + return MDB_SUCCESS; + } + } + + // If no free slot and max hit, fail + if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs) + return MDB_DBS_FULL; + + // Cannot mix named databases with some mainDB flags + if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY)) + return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; + + // Find the DB info + dbflag = DB_NEW|DB_VALID; + exact = 0; + key.mv_size = len; + key.mv_data = (void *)name; + mdb_cursor_init(&mc, txn, MAIN_DBI, NULL); + rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact); + if (rc == MDB_SUCCESS) { + // make sure this is actually a DB + MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); + if (!(node->mn_flags & F_SUBDATA)) + return MDB_INCOMPATIBLE; + } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) { + // Create if requested + MDB_db dummy; + data.mv_size = sizeof(MDB_db); + data.mv_data = &dummy; + memset(&dummy, 0, sizeof(dummy)); + dummy.md_root = P_INVALID; + dummy.md_flags = flags & PERSISTENT_FLAGS; + rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA); + dbflag |= DB_DIRTY; + } + + // OK, got info, add to table + if (rc == MDB_SUCCESS) { + unsigned int slot = unused ? unused : txn->mt_numdbs; + txn->mt_dbxs[slot].md_name.mv_data = strdup(name); + txn->mt_dbxs[slot].md_name.mv_size = len; + txn->mt_dbxs[slot].md_rel = NULL; + txn->mt_dbflags[slot] = dbflag; + memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); + *dbi = slot; + mdb_default_cmp(txn, slot); + if (!unused) { + txn->mt_numdbs++; + } + } + + return rc; + */ + return nil, nil +} + +func (t *transaction) Stat(b Bucket) *Stat { + if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs) + return EINVAL; + + if (txn->mt_dbflags[dbi] & DB_STALE) { + MDB_cursor mc; + MDB_xcursor mx; + /* Stale, must read the DB's root. cursor_init does it for us. */ + mdb_cursor_init(&mc, txn, dbi, &mx); + } + return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); +} + +func (t *transaction) BucketFlags(b Bucket) (int, error) { + /* + // We could return the flags for the FREE_DBI too but what's the point? + if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs) + return EINVAL; + *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS; + return MDB_SUCCESS; + */ + return 0, nil +} + +func (t *transaction) Drop(b Bucket int del) error { + /* + MDB_cursor *mc, *m2; + int rc; + + if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID)) + return EINVAL; + + if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) + return EACCES; + + rc = mdb_cursor_open(txn, dbi, &mc); + if (rc) + return rc; + + rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT); + // Invalidate the dropped DB's cursors + for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) + m2->mc_flags &= ~(C_INITIALIZED|C_EOF); + if (rc) + goto leave; + + // Can't delete the main DB + if (del && dbi > MAIN_DBI) { + rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL); + if (!rc) { + txn->mt_dbflags[dbi] = DB_STALE; + mdb_dbi_close(txn->mt_env, dbi); + } + } else { + // reset the DB record, mark it dirty + txn->mt_dbflags[dbi] |= DB_DIRTY; + txn->mt_dbs[dbi].md_depth = 0; + txn->mt_dbs[dbi].md_branch_pages = 0; + txn->mt_dbs[dbi].md_leaf_pages = 0; + txn->mt_dbs[dbi].md_overflow_pages = 0; + txn->mt_dbs[dbi].md_entries = 0; + txn->mt_dbs[dbi].md_root = P_INVALID; + + txn->mt_flags |= MDB_TXN_DIRTY; + } +leave: + mdb_cursor_close(mc); + return rc; + */ + return nil +} |