aboutsummaryrefslogtreecommitdiff
path: root/transaction.go
diff options
context:
space:
mode:
Diffstat (limited to 'transaction.go')
-rw-r--r--transaction.go2424
1 files changed, 1214 insertions, 1210 deletions
diff --git a/transaction.go b/transaction.go
index 1064e8e..de3bd0d 100644
--- a/transaction.go
+++ b/transaction.go
@@ -16,7 +16,7 @@ type Transaction interface {
type transaction struct {
id int
flags int
- db *db
+ db *DB
parent *transaction
child *transaction
nextPageNumber int
@@ -25,7 +25,7 @@ type transaction struct {
dirtyList []int
reader *reader
// TODO: bucketxs []*bucketx
- buckets []*bucket
+ buckets []*Bucket
bucketFlags []int
cursors []*cursor
// Implicit from slices? TODO: MDB_dbi mt_numdbs;
@@ -35,41 +35,40 @@ type transaction struct {
// ntxn represents a nested transaction.
type ntxn struct {
transaction *transaction /**< the transaction */
- pageState pageState /**< parent transaction's saved freestate */
+ pageState pageState /**< parent transaction's saved freestate */
}
-
func (t *transaction) allocPage(num int) *page {
/*
- MDB_env *env = txn->mt_env;
- MDB_page *ret = env->me_dpages;
- size_t psize = env->me_psize, sz = psize, off;
- // For ! #MDB_NOMEMINIT, psize counts how much to init.
- // For a single page alloc, we init everything after the page header.
- // For multi-page, we init the final page; if the caller needed that
- // many pages they will be filling in at least up to the last page.
- if (num == 1) {
- if (ret) {
- VGMEMP_ALLOC(env, ret, sz);
- VGMEMP_DEFINED(ret, sizeof(ret->mp_next));
- env->me_dpages = ret->mp_next;
- return ret;
+ MDB_env *env = txn->mt_env;
+ MDB_page *ret = env->me_dpages;
+ size_t psize = env->me_psize, sz = psize, off;
+ // For ! #MDB_NOMEMINIT, psize counts how much to init.
+ // For a single page alloc, we init everything after the page header.
+ // For multi-page, we init the final page; if the caller needed that
+ // many pages they will be filling in at least up to the last page.
+ if (num == 1) {
+ if (ret) {
+ VGMEMP_ALLOC(env, ret, sz);
+ VGMEMP_DEFINED(ret, sizeof(ret->mp_next));
+ env->me_dpages = ret->mp_next;
+ return ret;
+ }
+ psize -= off = PAGEHDRSZ;
+ } else {
+ sz *= num;
+ off = sz - psize;
}
- psize -= off = PAGEHDRSZ;
- } else {
- sz *= num;
- off = sz - psize;
- }
- if ((ret = malloc(sz)) != NULL) {
- VGMEMP_ALLOC(env, ret, sz);
- if (!(env->me_flags & MDB_NOMEMINIT)) {
- memset((char *)ret + off, 0, psize);
- ret->mp_pad = 0;
+ if ((ret = malloc(sz)) != NULL) {
+ VGMEMP_ALLOC(env, ret, sz);
+ if (!(env->me_flags & MDB_NOMEMINIT)) {
+ memset((char *)ret + off, 0, psize);
+ ret->mp_pad = 0;
+ }
+ } else {
+ txn->mt_flags |= MDB_TXN_ERROR;
}
- } else {
- txn->mt_flags |= MDB_TXN_ERROR;
- }
- return ret;
+ return ret;
*/
return nil
}
@@ -77,19 +76,19 @@ func (t *transaction) allocPage(num int) *page {
// Find oldest txnid still referenced. Expects txn->mt_txnid > 0.
func (t *transaction) oldest() int {
/*
- int i;
- txnid_t mr, oldest = txn->mt_txnid - 1;
- if (txn->mt_env->me_txns) {
- MDB_reader *r = txn->mt_env->me_txns->mti_readers;
- for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) {
- if (r[i].mr_pid) {
- mr = r[i].mr_txnid;
- if (oldest > mr)
- oldest = mr;
+ int i;
+ txnid_t mr, oldest = txn->mt_txnid - 1;
+ if (txn->mt_env->me_txns) {
+ MDB_reader *r = txn->mt_env->me_txns->mti_readers;
+ for (i = txn->mt_env->me_txns->mti_numreaders; --i >= 0; ) {
+ if (r[i].mr_pid) {
+ mr = r[i].mr_txnid;
+ if (oldest > mr)
+ oldest = mr;
+ }
}
}
- }
- return oldest;
+ return oldest;
*/
return 0
}
@@ -97,19 +96,19 @@ func (t *transaction) oldest() int {
// Add a page to the txn's dirty list
func (t *transaction) dirty(p *page) {
/*
- MDB_ID2 mid;
- int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
-
- if (txn->mt_env->me_flags & MDB_WRITEMAP) {
- insert = mdb_mid2l_append;
- } else {
- insert = mdb_mid2l_insert;
- }
- mid.mid = mp->mp_pgno;
- mid.mptr = mp;
- rc = insert(txn->mt_u.dirty_list, &mid);
- mdb_tassert(txn, rc == 0);
- txn->mt_dirty_room--;
+ MDB_ID2 mid;
+ int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
+
+ if (txn->mt_env->me_flags & MDB_WRITEMAP) {
+ insert = mdb_mid2l_append;
+ } else {
+ insert = mdb_mid2l_insert;
+ }
+ mid.mid = mp->mp_pgno;
+ mid.mptr = mp;
+ rc = insert(txn->mt_u.dirty_list, &mid);
+ mdb_tassert(txn, rc == 0);
+ txn->mt_dirty_room--;
*/
}
@@ -122,53 +121,53 @@ func (t *transaction) dirty(p *page) {
// mp wasn't spilled.
func (t *transaction) unspill(p *page) *page {
/*
- MDB_env *env = txn->mt_env;
- const MDB_txn *tx2;
- unsigned x;
- pgno_t pgno = mp->mp_pgno, pn = pgno << 1;
-
- for (tx2 = txn; tx2; tx2=tx2->mt_parent) {
- if (!tx2->mt_spill_pgs)
- continue;
- x = mdb_midl_search(tx2->mt_spill_pgs, pn);
- if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
- MDB_page *np;
- int num;
- if (txn->mt_dirty_room == 0)
- return MDB_TXN_FULL;
- if (IS_OVERFLOW(mp))
- num = mp->mp_pages;
- else
- num = 1;
- if (env->me_flags & MDB_WRITEMAP) {
- np = mp;
- } else {
- np = mdb_page_malloc(txn, num);
- if (!np)
- return ENOMEM;
- if (num > 1)
- memcpy(np, mp, num * env->me_psize);
+ MDB_env *env = txn->mt_env;
+ const MDB_txn *tx2;
+ unsigned x;
+ pgno_t pgno = mp->mp_pgno, pn = pgno << 1;
+
+ for (tx2 = txn; tx2; tx2=tx2->mt_parent) {
+ if (!tx2->mt_spill_pgs)
+ continue;
+ x = mdb_midl_search(tx2->mt_spill_pgs, pn);
+ if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
+ MDB_page *np;
+ int num;
+ if (txn->mt_dirty_room == 0)
+ return MDB_TXN_FULL;
+ if (IS_OVERFLOW(mp))
+ num = mp->mp_pages;
else
- mdb_page_copy(np, mp, env->me_psize);
+ num = 1;
+ if (env->me_flags & MDB_WRITEMAP) {
+ np = mp;
+ } else {
+ np = mdb_page_malloc(txn, num);
+ if (!np)
+ return ENOMEM;
+ if (num > 1)
+ memcpy(np, mp, num * env->me_psize);
+ else
+ mdb_page_copy(np, mp, env->me_psize);
+ }
+ if (tx2 == txn) {
+ // If in current txn, this page is no longer spilled.
+ // If it happens to be the last page, truncate the spill list.
+ // Otherwise mark it as deleted by setting the LSB.
+ if (x == txn->mt_spill_pgs[0])
+ txn->mt_spill_pgs[0]--;
+ else
+ txn->mt_spill_pgs[x] |= 1;
+ } // otherwise, if belonging to a parent txn, the
+ // page remains spilled until child commits
+
+ mdb_page_dirty(txn, np);
+ np->mp_flags |= P_DIRTY;
+ *ret = np;
+ break;
}
- if (tx2 == txn) {
- // If in current txn, this page is no longer spilled.
- // If it happens to be the last page, truncate the spill list.
- // Otherwise mark it as deleted by setting the LSB.
- if (x == txn->mt_spill_pgs[0])
- txn->mt_spill_pgs[0]--;
- else
- txn->mt_spill_pgs[x] |= 1;
- } // otherwise, if belonging to a parent txn, the
- // page remains spilled until child commits
-
- mdb_page_dirty(txn, np);
- np->mp_flags |= P_DIRTY;
- *ret = np;
- break;
}
- }
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
*/
return nil
}
@@ -176,37 +175,37 @@ func (t *transaction) unspill(p *page) *page {
// Back up parent txn's cursors, then grab the originals for tracking
func (t *transaction) shadow(dst *transaction) error {
/*
- MDB_cursor *mc, *bk;
- MDB_xcursor *mx;
- size_t size;
- int i;
-
- for (i = src->mt_numdbs; --i >= 0; ) {
- if ((mc = src->mt_cursors[i]) != NULL) {
- size = sizeof(MDB_cursor);
- if (mc->mc_xcursor)
- size += sizeof(MDB_xcursor);
- for (; mc; mc = bk->mc_next) {
- bk = malloc(size);
- if (!bk)
- return ENOMEM;
- *bk = *mc;
- mc->mc_backup = bk;
- mc->mc_db = &dst->mt_dbs[i];
- // Kill pointers into src - and dst to reduce abuse: The
- // user may not use mc until dst ends. Otherwise we'd...
- mc->mc_txn = NULL; // ...set this to dst
- mc->mc_dbflag = NULL; // ...and &dst->mt_dbflags[i]
- if ((mx = mc->mc_xcursor) != NULL) {
- *(MDB_xcursor *)(bk+1) = *mx;
- mx->mx_cursor.mc_txn = NULL; // ...and dst.
+ MDB_cursor *mc, *bk;
+ MDB_xcursor *mx;
+ size_t size;
+ int i;
+
+ for (i = src->mt_numdbs; --i >= 0; ) {
+ if ((mc = src->mt_cursors[i]) != NULL) {
+ size = sizeof(MDB_cursor);
+ if (mc->mc_xcursor)
+ size += sizeof(MDB_xcursor);
+ for (; mc; mc = bk->mc_next) {
+ bk = malloc(size);
+ if (!bk)
+ return ENOMEM;
+ *bk = *mc;
+ mc->mc_backup = bk;
+ mc->mc_db = &dst->mt_dbs[i];
+ // Kill pointers into src - and dst to reduce abuse: The
+ // user may not use mc until dst ends. Otherwise we'd...
+ mc->mc_txn = NULL; // ...set this to dst
+ mc->mc_dbflag = NULL; // ...and &dst->mt_dbflags[i]
+ if ((mx = mc->mc_xcursor) != NULL) {
+ *(MDB_xcursor *)(bk+1) = *mx;
+ mx->mx_cursor.mc_txn = NULL; // ...and dst.
+ }
+ mc->mc_next = dst->mt_cursors[i];
+ dst->mt_cursors[i] = mc;
}
- mc->mc_next = dst->mt_cursors[i];
- dst->mt_cursors[i] = mc;
}
}
- }
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
*/
return nil
}
@@ -217,36 +216,36 @@ func (t *transaction) shadow(dst *transaction) error {
// @return 0 on success, non-zero on failure.
func (t *transaction) closeCursors(merge bool) {
/*
- MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk;
- MDB_xcursor *mx;
- int i;
-
- for (i = txn->mt_numdbs; --i >= 0; ) {
- for (mc = cursors[i]; mc; mc = next) {
- next = mc->mc_next;
- if ((bk = mc->mc_backup) != NULL) {
- if (merge) {
- // Commit changes to parent txn
- mc->mc_next = bk->mc_next;
- mc->mc_backup = bk->mc_backup;
- mc->mc_txn = bk->mc_txn;
- mc->mc_db = bk->mc_db;
- mc->mc_dbflag = bk->mc_dbflag;
- if ((mx = mc->mc_xcursor) != NULL)
- mx->mx_cursor.mc_txn = bk->mc_txn;
- } else {
- // Abort nested txn
- *mc = *bk;
- if ((mx = mc->mc_xcursor) != NULL)
- *mx = *(MDB_xcursor *)(bk+1);
+ MDB_cursor **cursors = txn->mt_cursors, *mc, *next, *bk;
+ MDB_xcursor *mx;
+ int i;
+
+ for (i = txn->mt_numdbs; --i >= 0; ) {
+ for (mc = cursors[i]; mc; mc = next) {
+ next = mc->mc_next;
+ if ((bk = mc->mc_backup) != NULL) {
+ if (merge) {
+ // Commit changes to parent txn
+ mc->mc_next = bk->mc_next;
+ mc->mc_backup = bk->mc_backup;
+ mc->mc_txn = bk->mc_txn;
+ mc->mc_db = bk->mc_db;
+ mc->mc_dbflag = bk->mc_dbflag;
+ if ((mx = mc->mc_xcursor) != NULL)
+ mx->mx_cursor.mc_txn = bk->mc_txn;
+ } else {
+ // Abort nested txn
+ *mc = *bk;
+ if ((mx = mc->mc_xcursor) != NULL)
+ *mx = *(MDB_xcursor *)(bk+1);
+ }
+ mc = bk;
}
- mc = bk;
+ // Only malloced cursors are permanently tracked.
+ free(mc);
}
- // Only malloced cursors are permanently tracked.
- free(mc);
+ cursors[i] = NULL;
}
- cursors[i] = NULL;
- }
*/
}
@@ -255,168 +254,168 @@ func (t *transaction) closeCursors(merge bool) {
// @return 0 on success, non-zero on failure.
func (t *transaction) renew() error {
/*
- MDB_env *env = txn->mt_env;
- MDB_txninfo *ti = env->me_txns;
- MDB_meta *meta;
- unsigned int i, nr;
- uint16_t x;
- int rc, new_notls = 0;
-
- // Setup db info
- txn->mt_numdbs = env->me_numdbs;
- txn->mt_dbxs = env->me_dbxs; // mostly static anyway
-
- if (txn->mt_flags & MDB_TXN_RDONLY) {
- if (!ti) {
- meta = env->me_metas[ mdb_env_pick_meta(env) ];
- txn->mt_txnid = meta->mm_txnid;
- txn->mt_u.reader = NULL;
- } else {
- MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader :
- pthread_getspecific(env->me_txkey);
- if (r) {
- if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1)
- return MDB_BAD_RSLOT;
+ MDB_env *env = txn->mt_env;
+ MDB_txninfo *ti = env->me_txns;
+ MDB_meta *meta;
+ unsigned int i, nr;
+ uint16_t x;
+ int rc, new_notls = 0;
+
+ // Setup db info
+ txn->mt_numdbs = env->me_numdbs;
+ txn->mt_dbxs = env->me_dbxs; // mostly static anyway
+
+ if (txn->mt_flags & MDB_TXN_RDONLY) {
+ if (!ti) {
+ meta = env->me_metas[ mdb_env_pick_meta(env) ];
+ txn->mt_txnid = meta->mm_txnid;
+ txn->mt_u.reader = NULL;
+ } else {
+ MDB_reader *r = (env->me_flags & MDB_NOTLS) ? txn->mt_u.reader :
+ pthread_getspecific(env->me_txkey);
+ if (r) {
+ if (r->mr_pid != env->me_pid || r->mr_txnid != (txnid_t)-1)
+ return MDB_BAD_RSLOT;
+ } else {
+ MDB_PID_T pid = env->me_pid;
+ pthread_t tid = pthread_self();
+
+ if (!(env->me_flags & MDB_LIVE_READER)) {
+ rc = mdb_reader_pid(env, Pidset, pid);
+ if (rc)
+ return rc;
+ env->me_flags |= MDB_LIVE_READER;
+ }
+
+ LOCK_MUTEX_R(env);
+ nr = ti->mti_numreaders;
+ for (i=0; i<nr; i++)
+ if (ti->mti_readers[i].mr_pid == 0)
+ break;
+ if (i == env->me_maxreaders) {
+ UNLOCK_MUTEX_R(env);
+ return MDB_READERS_FULL;
+ }
+ ti->mti_readers[i].mr_pid = pid;
+ ti->mti_readers[i].mr_tid = tid;
+ if (i == nr)
+ ti->mti_numreaders = ++nr;
+ // Save numreaders for un-mutexed mdb_env_close()
+ env->me_numreaders = nr;
+ UNLOCK_MUTEX_R(env);
+
+ r = &ti->mti_readers[i];
+ new_notls = (env->me_flags & MDB_NOTLS);
+ if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) {
+ r->mr_pid = 0;
+ return rc;
+ }
+ }
+ txn->mt_txnid = r->mr_txnid = ti->mti_txnid;
+ txn->mt_u.reader = r;
+ meta = env->me_metas[txn->mt_txnid & 1];
+ }
} else {
- MDB_PID_T pid = env->me_pid;
- pthread_t tid = pthread_self();
+ if (ti) {
+ LOCK_MUTEX_W(env);
- if (!(env->me_flags & MDB_LIVE_READER)) {
- rc = mdb_reader_pid(env, Pidset, pid);
- if (rc)
- return rc;
- env->me_flags |= MDB_LIVE_READER;
+ txn->mt_txnid = ti->mti_txnid;
+ meta = env->me_metas[txn->mt_txnid & 1];
+ } else {
+ meta = env->me_metas[ mdb_env_pick_meta(env) ];
+ txn->mt_txnid = meta->mm_txnid;
}
+ txn->mt_txnid++;
+ #if MDB_DEBUG
+ if (txn->mt_txnid == mdb_debug_start)
+ mdb_debug = 1;
+ #endif
+ txn->mt_dirty_room = MDB_IDL_UM_MAX;
+ txn->mt_u.dirty_list = env->me_dirty_list;
+ txn->mt_u.dirty_list[0].mid = 0;
+ txn->mt_free_pgs = env->me_free_pgs;
+ txn->mt_free_pgs[0] = 0;
+ txn->mt_spill_pgs = NULL;
+ env->me_txn = txn;
+ }
- LOCK_MUTEX_R(env);
- nr = ti->mti_numreaders;
- for (i=0; i<nr; i++)
- if (ti->mti_readers[i].mr_pid == 0)
- break;
- if (i == env->me_maxreaders) {
- UNLOCK_MUTEX_R(env);
- return MDB_READERS_FULL;
- }
- ti->mti_readers[i].mr_pid = pid;
- ti->mti_readers[i].mr_tid = tid;
- if (i == nr)
- ti->mti_numreaders = ++nr;
- // Save numreaders for un-mutexed mdb_env_close()
- env->me_numreaders = nr;
- UNLOCK_MUTEX_R(env);
-
- r = &ti->mti_readers[i];
- new_notls = (env->me_flags & MDB_NOTLS);
- if (!new_notls && (rc=pthread_setspecific(env->me_txkey, r))) {
- r->mr_pid = 0;
- return rc;
- }
+ // Copy the DB info and flags
+ memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db));
+
+ // Moved to here to avoid a data race in read TXNs
+ txn->mt_next_pgno = meta->mm_last_pg+1;
+
+ for (i=2; i<txn->mt_numdbs; i++) {
+ x = env->me_dbflags[i];
+ txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS;
+ txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0;
}
- txn->mt_txnid = r->mr_txnid = ti->mti_txnid;
- txn->mt_u.reader = r;
- meta = env->me_metas[txn->mt_txnid & 1];
- }
- } else {
- if (ti) {
- LOCK_MUTEX_W(env);
+ txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID;
- txn->mt_txnid = ti->mti_txnid;
- meta = env->me_metas[txn->mt_txnid & 1];
- } else {
- meta = env->me_metas[ mdb_env_pick_meta(env) ];
- txn->mt_txnid = meta->mm_txnid;
- }
- txn->mt_txnid++;
-#if MDB_DEBUG
- if (txn->mt_txnid == mdb_debug_start)
- mdb_debug = 1;
-#endif
- txn->mt_dirty_room = MDB_IDL_UM_MAX;
- txn->mt_u.dirty_list = env->me_dirty_list;
- txn->mt_u.dirty_list[0].mid = 0;
- txn->mt_free_pgs = env->me_free_pgs;
- txn->mt_free_pgs[0] = 0;
- txn->mt_spill_pgs = NULL;
- env->me_txn = txn;
- }
-
- // Copy the DB info and flags
- memcpy(txn->mt_dbs, meta->mm_dbs, 2 * sizeof(MDB_db));
-
- // Moved to here to avoid a data race in read TXNs
- txn->mt_next_pgno = meta->mm_last_pg+1;
-
- for (i=2; i<txn->mt_numdbs; i++) {
- x = env->me_dbflags[i];
- txn->mt_dbs[i].md_flags = x & PERSISTENT_FLAGS;
- txn->mt_dbflags[i] = (x & MDB_VALID) ? DB_VALID|DB_STALE : 0;
- }
- txn->mt_dbflags[0] = txn->mt_dbflags[1] = DB_VALID;
-
- if (env->me_maxpg < txn->mt_next_pgno) {
- mdb_txn_reset0(txn, "renew0-mapfail");
- if (new_notls) {
- txn->mt_u.reader->mr_pid = 0;
- txn->mt_u.reader = NULL;
- }
- return MDB_MAP_RESIZED;
- }
+ if (env->me_maxpg < txn->mt_next_pgno) {
+ mdb_txn_reset0(txn, "renew0-mapfail");
+ if (new_notls) {
+ txn->mt_u.reader->mr_pid = 0;
+ txn->mt_u.reader = NULL;
+ }
+ return MDB_MAP_RESIZED;
+ }
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
*/
return nil
}
func (t *transaction) Renew() error {
/*
- int rc;
-
- if (!txn || txn->mt_dbxs) // A reset txn has mt_dbxs==NULL
- return EINVAL;
-
- if (txn->mt_env->me_flags & MDB_FATAL_ERROR) {
- DPUTS("environment had fatal error, must shutdown!");
- return MDB_PANIC;
- }
-
- rc = mdb_txn_renew0(txn);
- if (rc == MDB_SUCCESS) {
- DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
- txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
- (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root));
- }
- return rc;
+ int rc;
+
+ if (!txn || txn->mt_dbxs) // A reset txn has mt_dbxs==NULL
+ return EINVAL;
+
+ if (txn->mt_env->me_flags & MDB_FATAL_ERROR) {
+ DPUTS("environment had fatal error, must shutdown!");
+ return MDB_PANIC;
+ }
+
+ rc = mdb_txn_renew0(txn);
+ if (rc == MDB_SUCCESS) {
+ DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
+ txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
+ (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root));
+ }
+ return rc;
*/
return nil
}
-func (t *transaction) DB() DB {
+func (t *transaction) DB() *DB {
return t.db
}
// Export or close DBI handles opened in this txn.
func (t *transaction) updateBuckets(keep bool) {
/*
- int i;
- MDB_dbi n = txn->mt_numdbs;
- MDB_env *env = txn->mt_env;
- unsigned char *tdbflags = txn->mt_dbflags;
-
- for (i = n; --i >= 2;) {
- if (tdbflags[i] & DB_NEW) {
- if (keep) {
- env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
- } else {
- char *ptr = env->me_dbxs[i].md_name.mv_data;
- env->me_dbxs[i].md_name.mv_data = NULL;
- env->me_dbxs[i].md_name.mv_size = 0;
- env->me_dbflags[i] = 0;
- free(ptr);
+ int i;
+ MDB_dbi n = txn->mt_numdbs;
+ MDB_env *env = txn->mt_env;
+ unsigned char *tdbflags = txn->mt_dbflags;
+
+ for (i = n; --i >= 2;) {
+ if (tdbflags[i] & DB_NEW) {
+ if (keep) {
+ env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
+ } else {
+ char *ptr = env->me_dbxs[i].md_name.mv_data;
+ env->me_dbxs[i].md_name.mv_data = NULL;
+ env->me_dbxs[i].md_name.mv_size = 0;
+ env->me_dbflags[i] = 0;
+ free(ptr);
+ }
}
}
- }
- if (keep && env->me_numdbs < n)
- env->me_numdbs = n;
+ if (keep && env->me_numdbs < n)
+ env->me_numdbs = n;
*/
}
@@ -426,80 +425,80 @@ func (t *transaction) updateBuckets(keep bool) {
// @param[in] act why the transaction is being reset
func (t *transaction) reset(act string) {
/*
- MDB_env *env = txn->mt_env;
+ MDB_env *env = txn->mt_env;
- // Close any DBI handles opened in this txn
- mdb_dbis_update(txn, 0);
+ // Close any DBI handles opened in this txn
+ mdb_dbis_update(txn, 0);
- DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
- act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
- (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root));
+ DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
+ act, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
+ (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root));
- if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
- if (txn->mt_u.reader) {
- txn->mt_u.reader->mr_txnid = (txnid_t)-1;
- if (!(env->me_flags & MDB_NOTLS))
- txn->mt_u.reader = NULL; // txn does not own reader
- }
- txn->mt_numdbs = 0; // close nothing if called again
- txn->mt_dbxs = NULL; // mark txn as reset
- } else {
- mdb_cursors_close(txn, 0);
+ if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+ if (txn->mt_u.reader) {
+ txn->mt_u.reader->mr_txnid = (txnid_t)-1;
+ if (!(env->me_flags & MDB_NOTLS))
+ txn->mt_u.reader = NULL; // txn does not own reader
+ }
+ txn->mt_numdbs = 0; // close nothing if called again
+ txn->mt_dbxs = NULL; // mark txn as reset
+ } else {
+ mdb_cursors_close(txn, 0);
- if (!(env->me_flags & MDB_WRITEMAP)) {
- mdb_dlist_free(txn);
- }
- mdb_midl_free(env->me_pghead);
-
- if (txn->mt_parent) {
- txn->mt_parent->mt_child = NULL;
- env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
- mdb_midl_free(txn->mt_free_pgs);
- mdb_midl_free(txn->mt_spill_pgs);
- free(txn->mt_u.dirty_list);
- return;
- }
+ if (!(env->me_flags & MDB_WRITEMAP)) {
+ mdb_dlist_free(txn);
+ }
+ mdb_midl_free(env->me_pghead);
+
+ if (txn->mt_parent) {
+ txn->mt_parent->mt_child = NULL;
+ env->me_pgstate = ((MDB_ntxn *)txn)->mnt_pgstate;
+ mdb_midl_free(txn->mt_free_pgs);
+ mdb_midl_free(txn->mt_spill_pgs);
+ free(txn->mt_u.dirty_list);
+ return;
+ }
- if (mdb_midl_shrink(&txn->mt_free_pgs))
- env->me_free_pgs = txn->mt_free_pgs;
- env->me_pghead = NULL;
- env->me_pglast = 0;
+ if (mdb_midl_shrink(&txn->mt_free_pgs))
+ env->me_free_pgs = txn->mt_free_pgs;
+ env->me_pghead = NULL;
+ env->me_pglast = 0;
- env->me_txn = NULL;
- // The writer mutex was locked in mdb_txn_begin.
- if (env->me_txns)
- UNLOCK_MUTEX_W(env);
- }
+ env->me_txn = NULL;
+ // The writer mutex was locked in mdb_txn_begin.
+ if (env->me_txns)
+ UNLOCK_MUTEX_W(env);
+ }
*/
}
func (t *transaction) Reset() {
/*
- if (txn == NULL)
- return;
+ if (txn == NULL)
+ return;
- // This call is only valid for read-only txns
- if (!(txn->mt_flags & MDB_TXN_RDONLY))
- return;
+ // This call is only valid for read-only txns
+ if (!(txn->mt_flags & MDB_TXN_RDONLY))
+ return;
- mdb_txn_reset0(txn, "reset");
+ mdb_txn_reset0(txn, "reset");
*/
}
func (t *transaction) Abort() {
/*
- if (txn == NULL)
- return;
+ if (txn == NULL)
+ return;
- if (txn->mt_child)
- mdb_txn_abort(txn->mt_child);
+ if (txn->mt_child)
+ mdb_txn_abort(txn->mt_child);
- mdb_txn_reset0(txn, "abort");
- // Free reader slot tied to this txn (if MDB_NOTLS && writable FS)
- if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
- txn->mt_u.reader->mr_pid = 0;
+ mdb_txn_reset0(txn, "abort");
+ // Free reader slot tied to this txn (if MDB_NOTLS && writable FS)
+ if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
+ txn->mt_u.reader->mr_pid = 0;
- free(txn);
+ free(txn);
*/
}
@@ -507,154 +506,154 @@ func (t *transaction) Abort() {
// This changes the freelist. Keep trying until it stabilizes.
func (t *transaction) saveFreelist() error {
/*
- // env->me_pghead[] can grow and shrink during this call.
- // env->me_pglast and txn->mt_free_pgs[] can only grow.
- // Page numbers cannot disappear from txn->mt_free_pgs[].
- MDB_cursor mc;
- MDB_env *env = txn->mt_env;
- int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1;
- txnid_t pglast = 0, head_id = 0;
- pgno_t freecnt = 0, *free_pgs, *mop;
- ssize_t head_room = 0, total_room = 0, mop_len, clean_limit;
-
- mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
-
- if (env->me_pghead) {
- // Make sure first page of freeDB is touched and on freelist
- rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY);
- if (rc && rc != MDB_NOTFOUND)
- return rc;
- }
+ // env->me_pghead[] can grow and shrink during this call.
+ // env->me_pglast and txn->mt_free_pgs[] can only grow.
+ // Page numbers cannot disappear from txn->mt_free_pgs[].
+ MDB_cursor mc;
+ MDB_env *env = txn->mt_env;
+ int rc, maxfree_1pg = env->me_maxfree_1pg, more = 1;
+ txnid_t pglast = 0, head_id = 0;
+ pgno_t freecnt = 0, *free_pgs, *mop;
+ ssize_t head_room = 0, total_room = 0, mop_len, clean_limit;
+
+ mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
+
+ if (env->me_pghead) {
+ // Make sure first page of freeDB is touched and on freelist
+ rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST|MDB_PS_MODIFY);
+ if (rc && rc != MDB_NOTFOUND)
+ return rc;
+ }
- // MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP)
- clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP))
- ? SSIZE_MAX : maxfree_1pg;
+ // MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP)
+ clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP))
+ ? SSIZE_MAX : maxfree_1pg;
- for (;;) {
- // Come back here after each Put() in case freelist changed
- MDB_val key, data;
- pgno_t *pgs;
- ssize_t j;
+ for (;;) {
+ // Come back here after each Put() in case freelist changed
+ MDB_val key, data;
+ pgno_t *pgs;
+ ssize_t j;
- // If using records from freeDB which we have not yet
- // deleted, delete them and any we reserved for me_pghead.
- while (pglast < env->me_pglast) {
- rc = mdb_cursor_first(&mc, &key, NULL);
- if (rc)
- return rc;
- pglast = head_id = *(txnid_t *)key.mv_data;
- total_room = head_room = 0;
- mdb_tassert(txn, pglast <= env->me_pglast);
- rc = mdb_cursor_del(&mc, 0);
- if (rc)
- return rc;
- }
+ // If using records from freeDB which we have not yet
+ // deleted, delete them and any we reserved for me_pghead.
+ while (pglast < env->me_pglast) {
+ rc = mdb_cursor_first(&mc, &key, NULL);
+ if (rc)
+ return rc;
+ pglast = head_id = *(txnid_t *)key.mv_data;
+ total_room = head_room = 0;
+ mdb_tassert(txn, pglast <= env->me_pglast);
+ rc = mdb_cursor_del(&mc, 0);
+ if (rc)
+ return rc;
+ }
- // Save the IDL of pages freed by this txn, to a single record
- if (freecnt < txn->mt_free_pgs[0]) {
- if (!freecnt) {
- // Make sure last page of freeDB is touched and on freelist
- rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY);
- if (rc && rc != MDB_NOTFOUND)
- return rc;
- }
- free_pgs = txn->mt_free_pgs;
- // Write to last page of freeDB
- key.mv_size = sizeof(txn->mt_txnid);
- key.mv_data = &txn->mt_txnid;
- do {
- freecnt = free_pgs[0];
- data.mv_size = MDB_IDL_SIZEOF(free_pgs);
+ // Save the IDL of pages freed by this txn, to a single record
+ if (freecnt < txn->mt_free_pgs[0]) {
+ if (!freecnt) {
+ // Make sure last page of freeDB is touched and on freelist
+ rc = mdb_page_search(&mc, NULL, MDB_PS_LAST|MDB_PS_MODIFY);
+ if (rc && rc != MDB_NOTFOUND)
+ return rc;
+ }
+ free_pgs = txn->mt_free_pgs;
+ // Write to last page of freeDB
+ key.mv_size = sizeof(txn->mt_txnid);
+ key.mv_data = &txn->mt_txnid;
+ do {
+ freecnt = free_pgs[0];
+ data.mv_size = MDB_IDL_SIZEOF(free_pgs);
+ rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
+ if (rc)
+ return rc;
+ // Retry if mt_free_pgs[] grew during the Put()
+ free_pgs = txn->mt_free_pgs;
+ } while (freecnt < free_pgs[0]);
+ mdb_midl_sort(free_pgs);
+ memcpy(data.mv_data, free_pgs, data.mv_size);
+ #if (MDB_DEBUG) > 1
+ {
+ unsigned int i = free_pgs[0];
+ DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u",
+ txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i));
+ for (; i; i--)
+ DPRINTF(("IDL %"Z"u", free_pgs[i]));
+ }
+ #endif
+ continue;
+ }
+
+ mop = env->me_pghead;
+ mop_len = mop ? mop[0] : 0;
+
+ // Reserve records for me_pghead[]. Split it if multi-page,
+ // to avoid searching freeDB for a page range. Use keys in
+ // range [1,me_pglast]: Smaller than txnid of oldest reader.
+ if (total_room >= mop_len) {
+ if (total_room == mop_len || --more < 0)
+ break;
+ } else if (head_room >= maxfree_1pg && head_id > 1) {
+ // Keep current record (overflow page), add a new one
+ head_id--;
+ head_room = 0;
+ }
+ // (Re)write {key = head_id, IDL length = head_room}
+ total_room -= head_room;
+ head_room = mop_len - total_room;
+ if (head_room > maxfree_1pg && head_id > 1) {
+ // Overflow multi-page for part of me_pghead
+ head_room /= head_id; // amortize page sizes
+ head_room += maxfree_1pg - head_room % (maxfree_1pg + 1);
+ } else if (head_room < 0) {
+ // Rare case, not bothering to delete this record
+ head_room = 0;
+ }
+ key.mv_size = sizeof(head_id);
+ key.mv_data = &head_id;
+ data.mv_size = (head_room + 1) * sizeof(pgno_t);
rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
if (rc)
return rc;
- // Retry if mt_free_pgs[] grew during the Put()
- free_pgs = txn->mt_free_pgs;
- } while (freecnt < free_pgs[0]);
- mdb_midl_sort(free_pgs);
- memcpy(data.mv_data, free_pgs, data.mv_size);
-#if (MDB_DEBUG) > 1
- {
- unsigned int i = free_pgs[0];
- DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u",
- txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i));
- for (; i; i--)
- DPRINTF(("IDL %"Z"u", free_pgs[i]));
+ // IDL is initially empty, zero out at least the length
+ pgs = (pgno_t *)data.mv_data;
+ j = head_room > clean_limit ? head_room : 0;
+ do {
+ pgs[j] = 0;
+ } while (--j >= 0);
+ total_room += head_room;
}
-#endif
- continue;
- }
-
- mop = env->me_pghead;
- mop_len = mop ? mop[0] : 0;
- // Reserve records for me_pghead[]. Split it if multi-page,
- // to avoid searching freeDB for a page range. Use keys in
- // range [1,me_pglast]: Smaller than txnid of oldest reader.
- if (total_room >= mop_len) {
- if (total_room == mop_len || --more < 0)
- break;
- } else if (head_room >= maxfree_1pg && head_id > 1) {
- // Keep current record (overflow page), add a new one
- head_id--;
- head_room = 0;
- }
- // (Re)write {key = head_id, IDL length = head_room}
- total_room -= head_room;
- head_room = mop_len - total_room;
- if (head_room > maxfree_1pg && head_id > 1) {
- // Overflow multi-page for part of me_pghead
- head_room /= head_id; // amortize page sizes
- head_room += maxfree_1pg - head_room % (maxfree_1pg + 1);
- } else if (head_room < 0) {
- // Rare case, not bothering to delete this record
- head_room = 0;
- }
- key.mv_size = sizeof(head_id);
- key.mv_data = &head_id;
- data.mv_size = (head_room + 1) * sizeof(pgno_t);
- rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
- if (rc)
- return rc;
- // IDL is initially empty, zero out at least the length
- pgs = (pgno_t *)data.mv_data;
- j = head_room > clean_limit ? head_room : 0;
- do {
- pgs[j] = 0;
- } while (--j >= 0);
- total_room += head_room;
- }
-
- // Fill in the reserved me_pghead records
- rc = MDB_SUCCESS;
- if (mop_len) {
- MDB_val key, data;
-
- mop += mop_len;
- rc = mdb_cursor_first(&mc, &key, &data);
- for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
- unsigned flags = MDB_CURRENT;
- txnid_t id = *(txnid_t *)key.mv_data;
- ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
- MDB_ID save;
-
- mdb_tassert(txn, len >= 0 && id <= env->me_pglast);
- key.mv_data = &id;
- if (len > mop_len) {
- len = mop_len;
- data.mv_size = (len + 1) * sizeof(MDB_ID);
- flags = 0;
+ // Fill in the reserved me_pghead records
+ rc = MDB_SUCCESS;
+ if (mop_len) {
+ MDB_val key, data;
+
+ mop += mop_len;
+ rc = mdb_cursor_first(&mc, &key, &data);
+ for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
+ unsigned flags = MDB_CURRENT;
+ txnid_t id = *(txnid_t *)key.mv_data;
+ ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
+ MDB_ID save;
+
+ mdb_tassert(txn, len >= 0 && id <= env->me_pglast);
+ key.mv_data = &id;
+ if (len > mop_len) {
+ len = mop_len;
+ data.mv_size = (len + 1) * sizeof(MDB_ID);
+ flags = 0;
+ }
+ data.mv_data = mop -= len;
+ save = mop[0];
+ mop[0] = len;
+ rc = mdb_cursor_put(&mc, &key, &data, flags);
+ mop[0] = save;
+ if (rc || !(mop_len -= len))
+ break;
+ }
}
- data.mv_data = mop -= len;
- save = mop[0];
- mop[0] = len;
- rc = mdb_cursor_put(&mc, &key, &data, flags);
- mop[0] = save;
- if (rc || !(mop_len -= len))
- break;
- }
- }
- return rc;
+ return rc;
*/
return nil
}
@@ -663,342 +662,342 @@ func (t *transaction) saveFreelist() error {
// @param[in] txn the transaction that's being committed
// @param[in] keep number of initial pages in dirty_list to keep dirty.
// @return 0 on success, non-zero on failure.
-func (t *transaction) flush(keep bool) {
+func (t *transaction) flush(keep bool) error {
/*
- MDB_env *env = txn->mt_env;
- MDB_ID2L dl = txn->mt_u.dirty_list;
- unsigned psize = env->me_psize, j;
- int i, pagecount = dl[0].mid, rc;
- size_t size = 0, pos = 0;
- pgno_t pgno = 0;
- MDB_page *dp = NULL;
-#ifdef _WIN32
- OVERLAPPED ov;
-#else
- struct iovec iov[MDB_COMMIT_PAGES];
- ssize_t wpos = 0, wsize = 0, wres;
- size_t next_pos = 1; // impossible pos, so pos != next_pos
- int n = 0;
-#endif
-
- j = i = keep;
-
- if (env->me_flags & MDB_WRITEMAP) {
- // Clear dirty flags
- while (++i <= pagecount) {
- dp = dl[i].mptr;
- // Don't flush this page yet
- if (dp->mp_flags & P_KEEP) {
- dp->mp_flags ^= P_KEEP;
- dl[++j] = dl[i];
- continue;
- }
- dp->mp_flags &= ~P_DIRTY;
- }
- goto done;
- }
-
- // Write the pages
- for (;;) {
- if (++i <= pagecount) {
- dp = dl[i].mptr;
- // Don't flush this page yet
- if (dp->mp_flags & P_KEEP) {
- dp->mp_flags ^= P_KEEP;
- dl[i].mid = 0;
- continue;
- }
- pgno = dl[i].mid;
- // clear dirty flag
- dp->mp_flags &= ~P_DIRTY;
- pos = pgno * psize;
- size = psize;
- if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
- }
-#ifdef _WIN32
- else break;
-
- // Windows actually supports scatter/gather I/O, but only on
- // unbuffered file handles. Since we're relying on the OS page
- // cache for all our data, that's self-defeating. So we just
- // write pages one at a time. We use the ov structure to set
- // the write offset, to at least save the overhead of a Seek
- // system call.
- DPRINTF(("committing page %"Z"u", pgno));
- memset(&ov, 0, sizeof(ov));
- ov.Offset = pos & 0xffffffff;
- ov.OffsetHigh = pos >> 16 >> 16;
- if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
- rc = ErrCode();
- DPRINTF(("WriteFile: %d", rc));
- return rc;
- }
-#else
- // Write up to MDB_COMMIT_PAGES dirty pages at a time.
- if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
- if (n) {
- // Write previous page(s)
-#ifdef MDB_USE_PWRITEV
- wres = pwritev(env->me_fd, iov, n, wpos);
-#else
- if (n == 1) {
- wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
- } else {
- if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
- rc = ErrCode();
- DPRINTF(("lseek: %s", strerror(rc)));
- return rc;
+ MDB_env *env = txn->mt_env;
+ MDB_ID2L dl = txn->mt_u.dirty_list;
+ unsigned psize = env->me_psize, j;
+ int i, pagecount = dl[0].mid, rc;
+ size_t size = 0, pos = 0;
+ pgno_t pgno = 0;
+ MDB_page *dp = NULL;
+ #ifdef _WIN32
+ OVERLAPPED ov;
+ #else
+ struct iovec iov[MDB_COMMIT_PAGES];
+ ssize_t wpos = 0, wsize = 0, wres;
+ size_t next_pos = 1; // impossible pos, so pos != next_pos
+ int n = 0;
+ #endif
+
+ j = i = keep;
+
+ if (env->me_flags & MDB_WRITEMAP) {
+ // Clear dirty flags
+ while (++i <= pagecount) {
+ dp = dl[i].mptr;
+ // Don't flush this page yet
+ if (dp->mp_flags & P_KEEP) {
+ dp->mp_flags ^= P_KEEP;
+ dl[++j] = dl[i];
+ continue;
}
- wres = writev(env->me_fd, iov, n);
+ dp->mp_flags &= ~P_DIRTY;
}
-#endif
- if (wres != wsize) {
- if (wres < 0) {
- rc = ErrCode();
- DPRINTF(("Write error: %s", strerror(rc)));
- } else {
- rc = EIO; // TODO: Use which error code?
- DPUTS("short write, filesystem full?");
+ goto done;
+ }
+
+ // Write the pages
+ for (;;) {
+ if (++i <= pagecount) {
+ dp = dl[i].mptr;
+ // Don't flush this page yet
+ if (dp->mp_flags & P_KEEP) {
+ dp->mp_flags ^= P_KEEP;
+ dl[i].mid = 0;
+ continue;
}
+ pgno = dl[i].mid;
+ // clear dirty flag
+ dp->mp_flags &= ~P_DIRTY;
+ pos = pgno * psize;
+ size = psize;
+ if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
+ }
+ #ifdef _WIN32
+ else break;
+
+ // Windows actually supports scatter/gather I/O, but only on
+ // unbuffered file handles. Since we're relying on the OS page
+ // cache for all our data, that's self-defeating. So we just
+ // write pages one at a time. We use the ov structure to set
+ // the write offset, to at least save the overhead of a Seek
+ // system call.
+ DPRINTF(("committing page %"Z"u", pgno));
+ memset(&ov, 0, sizeof(ov));
+ ov.Offset = pos & 0xffffffff;
+ ov.OffsetHigh = pos >> 16 >> 16;
+ if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
+ rc = ErrCode();
+ DPRINTF(("WriteFile: %d", rc));
return rc;
}
- n = 0;
- }
- if (i > pagecount)
- break;
- wpos = pos;
- wsize = 0;
- }
- DPRINTF(("committing page %"Z"u", pgno));
- next_pos = pos + size;
- iov[n].iov_len = size;
- iov[n].iov_base = (char *)dp;
- wsize += size;
- n++;
-#endif // _WIN32
- }
-
- for (i = keep; ++i <= pagecount; ) {
- dp = dl[i].mptr;
- // This is a page we skipped above
- if (!dl[i].mid) {
- dl[++j] = dl[i];
- dl[j].mid = dp->mp_pgno;
- continue;
- }
- mdb_dpage_free(env, dp);
- }
-
-done:
- i--;
- txn->mt_dirty_room += i - j;
- dl[0].mid = j;
- return MDB_SUCCESS;
-}
-
-int
-mdb_txn_commit(MDB_txn *txn)
-{
- int rc;
- unsigned int i;
- MDB_env *env;
-
- if (txn == NULL || txn->mt_env == NULL)
- return EINVAL;
-
- if (txn->mt_child) {
- rc = mdb_txn_commit(txn->mt_child);
- txn->mt_child = NULL;
- if (rc)
- goto fail;
- }
-
- env = txn->mt_env;
-
- if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
- mdb_dbis_update(txn, 1);
- txn->mt_numdbs = 2; // so txn_abort() doesn't close any new handles
- mdb_txn_abort(txn);
- return MDB_SUCCESS;
- }
-
- if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) {
- DPUTS("error flag is set, can't commit");
- if (txn->mt_parent)
- txn->mt_parent->mt_flags |= MDB_TXN_ERROR;
- rc = MDB_BAD_TXN;
- goto fail;
- }
-
- if (txn->mt_parent) {
- MDB_txn *parent = txn->mt_parent;
- MDB_ID2L dst, src;
- MDB_IDL pspill;
- unsigned x, y, len, ps_len;
-
- // Append our free list to parent's
- rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs);
- if (rc)
- goto fail;
- mdb_midl_free(txn->mt_free_pgs);
- // Failures after this must either undo the changes
- // to the parent or set MDB_TXN_ERROR in the parent.
-
- parent->mt_next_pgno = txn->mt_next_pgno;
- parent->mt_flags = txn->mt_flags;
-
- // Merge our cursors into parent's and close them
- mdb_cursors_close(txn, 1);
-
- // Update parent's DB table.
- memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
- parent->mt_numdbs = txn->mt_numdbs;
- parent->mt_dbflags[0] = txn->mt_dbflags[0];
- parent->mt_dbflags[1] = txn->mt_dbflags[1];
- for (i=2; i<txn->mt_numdbs; i++) {
- // preserve parent's DB_NEW status
- x = parent->mt_dbflags[i] & DB_NEW;
- parent->mt_dbflags[i] = txn->mt_dbflags[i] | x;
- }
-
- dst = parent->mt_u.dirty_list;
- src = txn->mt_u.dirty_list;
- // Remove anything in our dirty list from parent's spill list
- if ((pspill = parent->mt_spill_pgs) && (ps_len = pspill[0])) {
- x = y = ps_len;
- pspill[0] = (pgno_t)-1;
- // Mark our dirty pages as deleted in parent spill list
- for (i=0, len=src[0].mid; ++i <= len; ) {
- MDB_ID pn = src[i].mid << 1;
- while (pn > pspill[x])
- x--;
- if (pn == pspill[x]) {
- pspill[x] = 1;
- y = --x;
+ #else
+ // Write up to MDB_COMMIT_PAGES dirty pages at a time.
+ if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
+ if (n) {
+ // Write previous page(s)
+ #ifdef MDB_USE_PWRITEV
+ wres = pwritev(env->me_fd, iov, n, wpos);
+ #else
+ if (n == 1) {
+ wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
+ } else {
+ if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
+ rc = ErrCode();
+ DPRINTF(("lseek: %s", strerror(rc)));
+ return rc;
+ }
+ wres = writev(env->me_fd, iov, n);
+ }
+ #endif
+ if (wres != wsize) {
+ if (wres < 0) {
+ rc = ErrCode();
+ DPRINTF(("Write error: %s", strerror(rc)));
+ } else {
+ rc = EIO; // TODO: Use which error code?
+ DPUTS("short write, filesystem full?");
+ }
+ return rc;
+ }
+ n = 0;
+ }
+ if (i > pagecount)
+ break;
+ wpos = pos;
+ wsize = 0;
}
+ DPRINTF(("committing page %"Z"u", pgno));
+ next_pos = pos + size;
+ iov[n].iov_len = size;
+ iov[n].iov_base = (char *)dp;
+ wsize += size;
+ n++;
+ #endif // _WIN32
}
- // Squash deleted pagenums if we deleted any
- for (x=y; ++x <= ps_len; )
- if (!(pspill[x] & 1))
- pspill[++y] = pspill[x];
- pspill[0] = y;
- }
- // Find len = length of merging our dirty list with parent's
- x = dst[0].mid;
- dst[0].mid = 0; // simplify loops
- if (parent->mt_parent) {
- len = x + src[0].mid;
- y = mdb_mid2l_search(src, dst[x].mid + 1) - 1;
- for (i = x; y && i; y--) {
- pgno_t yp = src[y].mid;
- while (yp < dst[i].mid)
- i--;
- if (yp == dst[i].mid) {
- i--;
- len--;
+ for (i = keep; ++i <= pagecount; ) {
+ dp = dl[i].mptr;
+ // This is a page we skipped above
+ if (!dl[i].mid) {
+ dl[++j] = dl[i];
+ dl[j].mid = dp->mp_pgno;
+ continue;
}
+ mdb_dpage_free(env, dp);
}
- } else { // Simplify the above for single-ancestor case
- len = MDB_IDL_UM_MAX - txn->mt_dirty_room;
- }
- // Merge our dirty list with parent's
- y = src[0].mid;
- for (i = len; y; dst[i--] = src[y--]) {
- pgno_t yp = src[y].mid;
- while (yp < dst[x].mid)
- dst[i--] = dst[x--];
- if (yp == dst[x].mid)
- free(dst[x--].mptr);
- }
- mdb_tassert(txn, i == x);
- dst[0].mid = len;
- free(txn->mt_u.dirty_list);
- parent->mt_dirty_room = txn->mt_dirty_room;
- if (txn->mt_spill_pgs) {
- if (parent->mt_spill_pgs) {
- // TODO: Prevent failure here, so parent does not fail
- rc = mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs);
- if (rc)
- parent->mt_flags |= MDB_TXN_ERROR;
- mdb_midl_free(txn->mt_spill_pgs);
- mdb_midl_sort(parent->mt_spill_pgs);
- } else {
- parent->mt_spill_pgs = txn->mt_spill_pgs;
- }
+
+ done:
+ i--;
+ txn->mt_dirty_room += i - j;
+ dl[0].mid = j;
+ return MDB_SUCCESS;
}
- parent->mt_child = NULL;
- mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
- free(txn);
- return rc;
- }
+ int
+ mdb_txn_commit(MDB_txn *txn)
+ {
+ int rc;
+ unsigned int i;
+ MDB_env *env;
- if (txn != env->me_txn) {
- DPUTS("attempt to commit unknown transaction");
- rc = EINVAL;
- goto fail;
- }
+ if (txn == NULL || txn->mt_env == NULL)
+ return EINVAL;
- mdb_cursors_close(txn, 0);
+ if (txn->mt_child) {
+ rc = mdb_txn_commit(txn->mt_child);
+ txn->mt_child = NULL;
+ if (rc)
+ goto fail;
+ }
- if (!txn->mt_u.dirty_list[0].mid &&
- !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS)))
- goto done;
+ env = txn->mt_env;
- DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u",
- txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root));
+ if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY)) {
+ mdb_dbis_update(txn, 1);
+ txn->mt_numdbs = 2; // so txn_abort() doesn't close any new handles
+ mdb_txn_abort(txn);
+ return MDB_SUCCESS;
+ }
- // Update DB root pointers
- if (txn->mt_numdbs > 2) {
- MDB_cursor mc;
- MDB_dbi i;
- MDB_val data;
- data.mv_size = sizeof(MDB_db);
+ if (F_ISSET(txn->mt_flags, MDB_TXN_ERROR)) {
+ DPUTS("error flag is set, can't commit");
+ if (txn->mt_parent)
+ txn->mt_parent->mt_flags |= MDB_TXN_ERROR;
+ rc = MDB_BAD_TXN;
+ goto fail;
+ }
- mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
- for (i = 2; i < txn->mt_numdbs; i++) {
- if (txn->mt_dbflags[i] & DB_DIRTY) {
- data.mv_data = &txn->mt_dbs[i];
- rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
+ if (txn->mt_parent) {
+ MDB_txn *parent = txn->mt_parent;
+ MDB_ID2L dst, src;
+ MDB_IDL pspill;
+ unsigned x, y, len, ps_len;
+
+ // Append our free list to parent's
+ rc = mdb_midl_append_list(&parent->mt_free_pgs, txn->mt_free_pgs);
if (rc)
goto fail;
+ mdb_midl_free(txn->mt_free_pgs);
+ // Failures after this must either undo the changes
+ // to the parent or set MDB_TXN_ERROR in the parent.
+
+ parent->mt_next_pgno = txn->mt_next_pgno;
+ parent->mt_flags = txn->mt_flags;
+
+ // Merge our cursors into parent's and close them
+ mdb_cursors_close(txn, 1);
+
+ // Update parent's DB table.
+ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
+ parent->mt_numdbs = txn->mt_numdbs;
+ parent->mt_dbflags[0] = txn->mt_dbflags[0];
+ parent->mt_dbflags[1] = txn->mt_dbflags[1];
+ for (i=2; i<txn->mt_numdbs; i++) {
+ // preserve parent's DB_NEW status
+ x = parent->mt_dbflags[i] & DB_NEW;
+ parent->mt_dbflags[i] = txn->mt_dbflags[i] | x;
+ }
+
+ dst = parent->mt_u.dirty_list;
+ src = txn->mt_u.dirty_list;
+ // Remove anything in our dirty list from parent's spill list
+ if ((pspill = parent->mt_spill_pgs) && (ps_len = pspill[0])) {
+ x = y = ps_len;
+ pspill[0] = (pgno_t)-1;
+ // Mark our dirty pages as deleted in parent spill list
+ for (i=0, len=src[0].mid; ++i <= len; ) {
+ MDB_ID pn = src[i].mid << 1;
+ while (pn > pspill[x])
+ x--;
+ if (pn == pspill[x]) {
+ pspill[x] = 1;
+ y = --x;
+ }
+ }
+ // Squash deleted pagenums if we deleted any
+ for (x=y; ++x <= ps_len; )
+ if (!(pspill[x] & 1))
+ pspill[++y] = pspill[x];
+ pspill[0] = y;
+ }
+
+ // Find len = length of merging our dirty list with parent's
+ x = dst[0].mid;
+ dst[0].mid = 0; // simplify loops
+ if (parent->mt_parent) {
+ len = x + src[0].mid;
+ y = mdb_mid2l_search(src, dst[x].mid + 1) - 1;
+ for (i = x; y && i; y--) {
+ pgno_t yp = src[y].mid;
+ while (yp < dst[i].mid)
+ i--;
+ if (yp == dst[i].mid) {
+ i--;
+ len--;
+ }
+ }
+ } else { // Simplify the above for single-ancestor case
+ len = MDB_IDL_UM_MAX - txn->mt_dirty_room;
+ }
+ // Merge our dirty list with parent's
+ y = src[0].mid;
+ for (i = len; y; dst[i--] = src[y--]) {
+ pgno_t yp = src[y].mid;
+ while (yp < dst[x].mid)
+ dst[i--] = dst[x--];
+ if (yp == dst[x].mid)
+ free(dst[x--].mptr);
+ }
+ mdb_tassert(txn, i == x);
+ dst[0].mid = len;
+ free(txn->mt_u.dirty_list);
+ parent->mt_dirty_room = txn->mt_dirty_room;
+ if (txn->mt_spill_pgs) {
+ if (parent->mt_spill_pgs) {
+ // TODO: Prevent failure here, so parent does not fail
+ rc = mdb_midl_append_list(&parent->mt_spill_pgs, txn->mt_spill_pgs);
+ if (rc)
+ parent->mt_flags |= MDB_TXN_ERROR;
+ mdb_midl_free(txn->mt_spill_pgs);
+ mdb_midl_sort(parent->mt_spill_pgs);
+ } else {
+ parent->mt_spill_pgs = txn->mt_spill_pgs;
+ }
+ }
+
+ parent->mt_child = NULL;
+ mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
+ free(txn);
+ return rc;
+ }
+
+ if (txn != env->me_txn) {
+ DPUTS("attempt to commit unknown transaction");
+ rc = EINVAL;
+ goto fail;
+ }
+
+ mdb_cursors_close(txn, 0);
+
+ if (!txn->mt_u.dirty_list[0].mid &&
+ !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS)))
+ goto done;
+
+ DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u",
+ txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root));
+
+ // Update DB root pointers
+ if (txn->mt_numdbs > 2) {
+ MDB_cursor mc;
+ MDB_dbi i;
+ MDB_val data;
+ data.mv_size = sizeof(MDB_db);
+
+ mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
+ for (i = 2; i < txn->mt_numdbs; i++) {
+ if (txn->mt_dbflags[i] & DB_DIRTY) {
+ data.mv_data = &txn->mt_dbs[i];
+ rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
+ if (rc)
+ goto fail;
+ }
+ }
}
- }
- }
- rc = mdb_freelist_save(txn);
- if (rc)
- goto fail;
+ rc = mdb_freelist_save(txn);
+ if (rc)
+ goto fail;
- mdb_midl_free(env->me_pghead);
- env->me_pghead = NULL;
- if (mdb_midl_shrink(&txn->mt_free_pgs))
- env->me_free_pgs = txn->mt_free_pgs;
+ mdb_midl_free(env->me_pghead);
+ env->me_pghead = NULL;
+ if (mdb_midl_shrink(&txn->mt_free_pgs))
+ env->me_free_pgs = txn->mt_free_pgs;
-#if (MDB_DEBUG) > 2
- mdb_audit(txn);
-#endif
+ #if (MDB_DEBUG) > 2
+ mdb_audit(txn);
+ #endif
- if ((rc = mdb_page_flush(txn, 0)) ||
- (rc = mdb_env_sync(env, 0)) ||
- (rc = mdb_env_write_meta(txn)))
- goto fail;
+ if ((rc = mdb_page_flush(txn, 0)) ||
+ (rc = mdb_env_sync(env, 0)) ||
+ (rc = mdb_env_write_meta(txn)))
+ goto fail;
-done:
- env->me_pglast = 0;
- env->me_txn = NULL;
- mdb_dbis_update(txn, 1);
+ done:
+ env->me_pglast = 0;
+ env->me_txn = NULL;
+ mdb_dbis_update(txn, 1);
- if (env->me_txns)
- UNLOCK_MUTEX_W(env);
- free(txn);
+ if (env->me_txns)
+ UNLOCK_MUTEX_W(env);
+ free(txn);
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
-fail:
- mdb_txn_abort(txn);
- return rc;
+ fail:
+ mdb_txn_abort(txn);
+ return rc;
*/
return nil
}
@@ -1008,118 +1007,118 @@ fail:
// @return 0 on success, non-zero on failure.
func (t *transaction) writeMeta() error {
/*
- MDB_env *env;
- MDB_meta meta, metab, *mp;
- off_t off;
- int rc, len, toggle;
- char *ptr;
- HANDLE mfd;
-#ifdef _WIN32
- OVERLAPPED ov;
-#else
- int r2;
-#endif
-
- toggle = txn->mt_txnid & 1;
- DPRINTF(("writing meta page %d for root page %"Z"u",
- toggle, txn->mt_dbs[MAIN_DBI].md_root));
-
- env = txn->mt_env;
- mp = env->me_metas[toggle];
-
- if (env->me_flags & MDB_WRITEMAP) {
- // Persist any increases of mapsize config
- if (env->me_mapsize > mp->mm_mapsize)
- mp->mm_mapsize = env->me_mapsize;
- mp->mm_dbs[0] = txn->mt_dbs[0];
- mp->mm_dbs[1] = txn->mt_dbs[1];
- mp->mm_last_pg = txn->mt_next_pgno - 1;
- mp->mm_txnid = txn->mt_txnid;
- if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) {
- unsigned meta_size = env->me_psize;
- rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
- ptr = env->me_map;
- if (toggle) {
-#ifndef _WIN32 // POSIX msync() requires ptr = start of OS page
- if (meta_size < env->me_os_psize)
- meta_size += meta_size;
- else
-#endif
- ptr += meta_size;
+ MDB_env *env;
+ MDB_meta meta, metab, *mp;
+ off_t off;
+ int rc, len, toggle;
+ char *ptr;
+ HANDLE mfd;
+ #ifdef _WIN32
+ OVERLAPPED ov;
+ #else
+ int r2;
+ #endif
+
+ toggle = txn->mt_txnid & 1;
+ DPRINTF(("writing meta page %d for root page %"Z"u",
+ toggle, txn->mt_dbs[MAIN_DBI].md_root));
+
+ env = txn->mt_env;
+ mp = env->me_metas[toggle];
+
+ if (env->me_flags & MDB_WRITEMAP) {
+ // Persist any increases of mapsize config
+ if (env->me_mapsize > mp->mm_mapsize)
+ mp->mm_mapsize = env->me_mapsize;
+ mp->mm_dbs[0] = txn->mt_dbs[0];
+ mp->mm_dbs[1] = txn->mt_dbs[1];
+ mp->mm_last_pg = txn->mt_next_pgno - 1;
+ mp->mm_txnid = txn->mt_txnid;
+ if (!(env->me_flags & (MDB_NOMETASYNC|MDB_NOSYNC))) {
+ unsigned meta_size = env->me_psize;
+ rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
+ ptr = env->me_map;
+ if (toggle) {
+ #ifndef _WIN32 // POSIX msync() requires ptr = start of OS page
+ if (meta_size < env->me_os_psize)
+ meta_size += meta_size;
+ else
+ #endif
+ ptr += meta_size;
+ }
+ if (MDB_MSYNC(ptr, meta_size, rc)) {
+ rc = ErrCode();
+ goto fail;
+ }
+ }
+ goto done;
}
- if (MDB_MSYNC(ptr, meta_size, rc)) {
- rc = ErrCode();
- goto fail;
+ metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
+ metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
+
+ ptr = (char *)&meta;
+ if (env->me_mapsize > mp->mm_mapsize) {
+ // Persist any increases of mapsize config
+ meta.mm_mapsize = env->me_mapsize;
+ off = offsetof(MDB_meta, mm_mapsize);
+ } else {
+ off = offsetof(MDB_meta, mm_dbs[0].md_depth);
}
- }
- goto done;
- }
- metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
- metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
-
- ptr = (char *)&meta;
- if (env->me_mapsize > mp->mm_mapsize) {
- // Persist any increases of mapsize config
- meta.mm_mapsize = env->me_mapsize;
- off = offsetof(MDB_meta, mm_mapsize);
- } else {
- off = offsetof(MDB_meta, mm_dbs[0].md_depth);
- }
- len = sizeof(MDB_meta) - off;
-
- ptr += off;
- meta.mm_dbs[0] = txn->mt_dbs[0];
- meta.mm_dbs[1] = txn->mt_dbs[1];
- meta.mm_last_pg = txn->mt_next_pgno - 1;
- meta.mm_txnid = txn->mt_txnid;
-
- if (toggle)
- off += env->me_psize;
- off += PAGEHDRSZ;
-
- // Write to the SYNC fd
- mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ?
- env->me_fd : env->me_mfd;
-#ifdef _WIN32
- {
- memset(&ov, 0, sizeof(ov));
- ov.Offset = off;
- if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov))
- rc = -1;
- }
-#else
- rc = pwrite(mfd, ptr, len, off);
-#endif
- if (rc != len) {
- rc = rc < 0 ? ErrCode() : EIO;
- DPUTS("write failed, disk error?");
- // On a failure, the pagecache still contains the new data.
- // Write some old data back, to prevent it from being used.
- // Use the non-SYNC fd; we know it will fail anyway.
- meta.mm_last_pg = metab.mm_last_pg;
- meta.mm_txnid = metab.mm_txnid;
-#ifdef _WIN32
- memset(&ov, 0, sizeof(ov));
- ov.Offset = off;
- WriteFile(env->me_fd, ptr, len, NULL, &ov);
-#else
- r2 = pwrite(env->me_fd, ptr, len, off);
- (void)r2; // Silence warnings. We don't care about pwrite's return value
-#endif
-fail:
- env->me_flags |= MDB_FATAL_ERROR;
- return rc;
- }
-done:
- // Memory ordering issues are irrelevant; since the entire writer
- // is wrapped by wmutex, all of these changes will become visible
- // after the wmutex is unlocked. Since the DB is multi-version,
- // readers will get consistent data regardless of how fresh or
- // how stale their view of these values is.
- if (env->me_txns)
- env->me_txns->mti_txnid = txn->mt_txnid;
-
- return MDB_SUCCESS;
+ len = sizeof(MDB_meta) - off;
+
+ ptr += off;
+ meta.mm_dbs[0] = txn->mt_dbs[0];
+ meta.mm_dbs[1] = txn->mt_dbs[1];
+ meta.mm_last_pg = txn->mt_next_pgno - 1;
+ meta.mm_txnid = txn->mt_txnid;
+
+ if (toggle)
+ off += env->me_psize;
+ off += PAGEHDRSZ;
+
+ // Write to the SYNC fd
+ mfd = env->me_flags & (MDB_NOSYNC|MDB_NOMETASYNC) ?
+ env->me_fd : env->me_mfd;
+ #ifdef _WIN32
+ {
+ memset(&ov, 0, sizeof(ov));
+ ov.Offset = off;
+ if (!WriteFile(mfd, ptr, len, (DWORD *)&rc, &ov))
+ rc = -1;
+ }
+ #else
+ rc = pwrite(mfd, ptr, len, off);
+ #endif
+ if (rc != len) {
+ rc = rc < 0 ? ErrCode() : EIO;
+ DPUTS("write failed, disk error?");
+ // On a failure, the pagecache still contains the new data.
+ // Write some old data back, to prevent it from being used.
+ // Use the non-SYNC fd; we know it will fail anyway.
+ meta.mm_last_pg = metab.mm_last_pg;
+ meta.mm_txnid = metab.mm_txnid;
+ #ifdef _WIN32
+ memset(&ov, 0, sizeof(ov));
+ ov.Offset = off;
+ WriteFile(env->me_fd, ptr, len, NULL, &ov);
+ #else
+ r2 = pwrite(env->me_fd, ptr, len, off);
+ (void)r2; // Silence warnings. We don't care about pwrite's return value
+ #endif
+ fail:
+ env->me_flags |= MDB_FATAL_ERROR;
+ return rc;
+ }
+ done:
+ // Memory ordering issues are irrelevant; since the entire writer
+ // is wrapped by wmutex, all of these changes will become visible
+ // after the wmutex is unlocked. Since the DB is multi-version,
+ // readers will get consistent data regardless of how fresh or
+ // how stale their view of these values is.
+ if (env->me_txns)
+ env->me_txns->mti_txnid = txn->mt_txnid;
+
+ return MDB_SUCCESS;
*/
return nil
}
@@ -1132,53 +1131,53 @@ done:
// @return 0 on success, non-zero on failure.
func (t *transaction) getPage(id int) (*page, int, error) {
/*
- MDB_env *env = txn->mt_env;
- MDB_page *p = NULL;
- int level;
-
- if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) {
- MDB_txn *tx2 = txn;
- level = 1;
- do {
- MDB_ID2L dl = tx2->mt_u.dirty_list;
- unsigned x;
- // Spilled pages were dirtied in this txn and flushed
- // because the dirty list got full. Bring this page
- // back in from the map (but don't unspill it here,
- // leave that unless page_touch happens again).
- if (tx2->mt_spill_pgs) {
- MDB_ID pn = pgno << 1;
- x = mdb_midl_search(tx2->mt_spill_pgs, pn);
- if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
- p = (MDB_page *)(env->me_map + env->me_psize * pgno);
- goto done;
- }
+ MDB_env *env = txn->mt_env;
+ MDB_page *p = NULL;
+ int level;
+
+ if (!((txn->mt_flags & MDB_TXN_RDONLY) | (env->me_flags & MDB_WRITEMAP))) {
+ MDB_txn *tx2 = txn;
+ level = 1;
+ do {
+ MDB_ID2L dl = tx2->mt_u.dirty_list;
+ unsigned x;
+ // Spilled pages were dirtied in this txn and flushed
+ // because the dirty list got full. Bring this page
+ // back in from the map (but don't unspill it here,
+ // leave that unless page_touch happens again).
+ if (tx2->mt_spill_pgs) {
+ MDB_ID pn = pgno << 1;
+ x = mdb_midl_search(tx2->mt_spill_pgs, pn);
+ if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
+ p = (MDB_page *)(env->me_map + env->me_psize * pgno);
+ goto done;
+ }
+ }
+ if (dl[0].mid) {
+ unsigned x = mdb_mid2l_search(dl, pgno);
+ if (x <= dl[0].mid && dl[x].mid == pgno) {
+ p = dl[x].mptr;
+ goto done;
+ }
+ }
+ level++;
+ } while ((tx2 = tx2->mt_parent) != NULL);
}
- if (dl[0].mid) {
- unsigned x = mdb_mid2l_search(dl, pgno);
- if (x <= dl[0].mid && dl[x].mid == pgno) {
- p = dl[x].mptr;
- goto done;
- }
+
+ if (pgno < txn->mt_next_pgno) {
+ level = 0;
+ p = (MDB_page *)(env->me_map + env->me_psize * pgno);
+ } else {
+ DPRINTF(("page %"Z"u not found", pgno));
+ txn->mt_flags |= MDB_TXN_ERROR;
+ return MDB_PAGE_NOTFOUND;
}
- level++;
- } while ((tx2 = tx2->mt_parent) != NULL);
- }
-
- if (pgno < txn->mt_next_pgno) {
- level = 0;
- p = (MDB_page *)(env->me_map + env->me_psize * pgno);
- } else {
- DPRINTF(("page %"Z"u not found", pgno));
- txn->mt_flags |= MDB_TXN_ERROR;
- return MDB_PAGE_NOTFOUND;
- }
-
-done:
- *ret = p;
- if (lvl)
- *lvl = level;
- return MDB_SUCCESS;
+
+ done:
+ *ret = p;
+ if (lvl)
+ *lvl = level;
+ return MDB_SUCCESS;
*/
return nil, 0, nil
@@ -1191,349 +1190,354 @@ done:
// @return 0 on success, non-zero on failure.
func (t *transaction) readNode(leaf *node, data []byte) error {
/*
- MDB_page *omp; // overflow page
- pgno_t pgno;
- int rc;
+ MDB_page *omp; // overflow page
+ pgno_t pgno;
+ int rc;
- if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
- data->mv_size = NODEDSZ(leaf);
- data->mv_data = NODEDATA(leaf);
- return MDB_SUCCESS;
- }
+ if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
+ data->mv_size = NODEDSZ(leaf);
+ data->mv_data = NODEDATA(leaf);
+ return MDB_SUCCESS;
+ }
- // Read overflow data.
- data->mv_size = NODEDSZ(leaf);
- memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
- if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) {
- DPRINTF(("read overflow page %"Z"u failed", pgno));
- return rc;
- }
- data->mv_data = METADATA(omp);
+ // Read overflow data.
+ data->mv_size = NODEDSZ(leaf);
+ memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
+ if ((rc = mdb_page_get(txn, pgno, &omp, NULL)) != 0) {
+ DPRINTF(("read overflow page %"Z"u failed", pgno));
+ return rc;
+ }
+ data->mv_data = METADATA(omp);
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
*/
return nil
}
func (t *transaction) Get(bucket Bucket, key []byte) ([]byte, error) {
/*
- MDB_cursor mc;
- MDB_xcursor mx;
- int exact = 0;
- DKBUF;
+ MDB_cursor mc;
+ MDB_xcursor mx;
+ int exact = 0;
+ DKBUF;
- if (key == NULL || data == NULL)
- return EINVAL;
+ if (key == NULL || data == NULL)
+ return EINVAL;
- DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
+ DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
- return EINVAL;
+ if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
+ return EINVAL;
- if (txn->mt_flags & MDB_TXN_ERROR)
- return MDB_BAD_TXN;
+ if (txn->mt_flags & MDB_TXN_ERROR)
+ return MDB_BAD_TXN;
- mdb_cursor_init(&mc, txn, dbi, &mx);
- return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
+ mdb_cursor_init(&mc, txn, dbi, &mx);
+ return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
*/
return nil, nil
}
func (t *transaction) Cursor(b Bucket) (Cursor, error) {
/*
- MDB_cursor *mc;
- size_t size = sizeof(MDB_cursor);
+ MDB_cursor *mc;
+ size_t size = sizeof(MDB_cursor);
- if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
- return EINVAL;
+ if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
+ return EINVAL;
- if (txn->mt_flags & MDB_TXN_ERROR)
- return MDB_BAD_TXN;
+ if (txn->mt_flags & MDB_TXN_ERROR)
+ return MDB_BAD_TXN;
- // Allow read access to the freelist
- if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
- return EINVAL;
+ // Allow read access to the freelist
+ if (!dbi && !F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
+ return EINVAL;
- if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)
- size += sizeof(MDB_xcursor);
+ if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT)
+ size += sizeof(MDB_xcursor);
- if ((mc = malloc(size)) != NULL) {
- mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1));
- if (txn->mt_cursors) {
- mc->mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = mc;
- mc->mc_flags |= C_UNTRACK;
+ if ((mc = malloc(size)) != NULL) {
+ mdb_cursor_init(mc, txn, dbi, (MDB_xcursor *)(mc + 1));
+ if (txn->mt_cursors) {
+ mc->mc_next = txn->mt_cursors[dbi];
+ txn->mt_cursors[dbi] = mc;
+ mc->mc_flags |= C_UNTRACK;
+ }
+ } else {
+ return ENOMEM;
}
- } else {
- return ENOMEM;
- }
- *ret = mc;
+ *ret = mc;
- return MDB_SUCCESS;
+ return MDB_SUCCESS;
*/
- return nil
+ return nil, nil
}
-func (t *transaction) Renew(c Cursor) error {
+func (t *transaction) Renew1(c Cursor) error {
/*
- if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
- return EINVAL;
+ if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
+ return EINVAL;
- if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)
- return EINVAL;
+ if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)
+ return EINVAL;
- mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
- return MDB_SUCCESS;
+ mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
+ return MDB_SUCCESS;
*/
+ return nil
}
-func (t *transaction) Delete(b *bucket, key []byte, data []byte) error {
+func (t *transaction) Delete(b *Bucket, key []byte, data []byte) error {
/*
- MDB_cursor mc;
- MDB_xcursor mx;
- MDB_cursor_op op;
- MDB_val rdata, *xdata;
- int rc, exact;
- DKBUF;
-
- if (key == NULL)
- return EINVAL;
-
- DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
-
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
- return EINVAL;
-
- if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
- return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
-
- mdb_cursor_init(&mc, txn, dbi, &mx);
-
- exact = 0;
- if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
- // must ignore any data
- data = NULL;
- }
- if (data) {
- op = MDB_GET_BOTH;
- rdata = *data;
- xdata = &rdata;
- } else {
- op = MDB_SET;
- xdata = NULL;
- }
- rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
- if (rc == 0) {
- // let mdb_page_split know about this cursor if needed:
- // delete will trigger a rebalance; if it needs to move
- // a node from one page to another, it will have to
- // update the parent's separator key(s). If the new sepkey
- // is larger than the current one, the parent page may
- // run out of space, triggering a split. We need this
- // cursor to be consistent until the end of the rebalance.
- mc.mc_flags |= C_UNTRACK;
- mc.mc_next = txn->mt_cursors[dbi];
- txn->mt_cursors[dbi] = &mc;
- rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
- txn->mt_cursors[dbi] = mc.mc_next;
- }
- return rc;
+ MDB_cursor mc;
+ MDB_xcursor mx;
+ MDB_cursor_op op;
+ MDB_val rdata, *xdata;
+ int rc, exact;
+ DKBUF;
+
+ if (key == NULL)
+ return EINVAL;
+
+ DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
+
+ if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
+ return EINVAL;
+
+ if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
+ return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
+
+ mdb_cursor_init(&mc, txn, dbi, &mx);
+
+ exact = 0;
+ if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
+ // must ignore any data
+ data = NULL;
+ }
+ if (data) {
+ op = MDB_GET_BOTH;
+ rdata = *data;
+ xdata = &rdata;
+ } else {
+ op = MDB_SET;
+ xdata = NULL;
+ }
+ rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
+ if (rc == 0) {
+ // let mdb_page_split know about this cursor if needed:
+ // delete will trigger a rebalance; if it needs to move
+ // a node from one page to another, it will have to
+ // update the parent's separator key(s). If the new sepkey
+ // is larger than the current one, the parent page may
+ // run out of space, triggering a split. We need this
+ // cursor to be consistent until the end of the rebalance.
+ mc.mc_flags |= C_UNTRACK;
+ mc.mc_next = txn->mt_cursors[dbi];
+ txn->mt_cursors[dbi] = &mc;
+ rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
+ txn->mt_cursors[dbi] = mc.mc_next;
+ }
+ return rc;
*/
return nil
}
func (t *transaction) Put(b Bucket, key []byte, data []byte, flags int) error {
/*
- MDB_cursor mc;
- MDB_xcursor mx;
+ MDB_cursor mc;
+ MDB_xcursor mx;
- if (key == NULL || data == NULL)
- return EINVAL;
+ if (key == NULL || data == NULL)
+ return EINVAL;
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
- return EINVAL;
+ if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
+ return EINVAL;
- if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
- return EINVAL;
+ if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
+ return EINVAL;
- mdb_cursor_init(&mc, txn, dbi, &mx);
- return mdb_cursor_put(&mc, key, data, flags);
+ mdb_cursor_init(&mc, txn, dbi, &mx);
+ return mdb_cursor_put(&mc, key, data, flags);
*/
+ return nil
}
-func (t *transaction) Bucket(name string, flags int) (Bucket, error) {
+func (t *transaction) Bucket(name string, flags int) (*Bucket, error) {
/*
- MDB_val key, data;
- MDB_dbi i;
- MDB_cursor mc;
- int rc, dbflag, exact;
- unsigned int unused = 0;
- size_t len;
-
- if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
- mdb_default_cmp(txn, FREE_DBI);
- }
-
- if ((flags & VALID_FLAGS) != flags)
- return EINVAL;
- if (txn->mt_flags & MDB_TXN_ERROR)
- return MDB_BAD_TXN;
-
- // main DB?
- if (!name) {
- *dbi = MAIN_DBI;
- if (flags & PERSISTENT_FLAGS) {
- uint16_t f2 = flags & PERSISTENT_FLAGS;
- // make sure flag changes get committed
- if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) {
- txn->mt_dbs[MAIN_DBI].md_flags |= f2;
- txn->mt_flags |= MDB_TXN_DIRTY;
+ MDB_val key, data;
+ MDB_dbi i;
+ MDB_cursor mc;
+ int rc, dbflag, exact;
+ unsigned int unused = 0;
+ size_t len;
+
+ if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
+ mdb_default_cmp(txn, FREE_DBI);
+ }
+
+ if ((flags & VALID_FLAGS) != flags)
+ return EINVAL;
+ if (txn->mt_flags & MDB_TXN_ERROR)
+ return MDB_BAD_TXN;
+
+ // main DB?
+ if (!name) {
+ *dbi = MAIN_DBI;
+ if (flags & PERSISTENT_FLAGS) {
+ uint16_t f2 = flags & PERSISTENT_FLAGS;
+ // make sure flag changes get committed
+ if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) != txn->mt_dbs[MAIN_DBI].md_flags) {
+ txn->mt_dbs[MAIN_DBI].md_flags |= f2;
+ txn->mt_flags |= MDB_TXN_DIRTY;
+ }
}
+ mdb_default_cmp(txn, MAIN_DBI);
+ return MDB_SUCCESS;
}
- mdb_default_cmp(txn, MAIN_DBI);
- return MDB_SUCCESS;
- }
-
- if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) {
- mdb_default_cmp(txn, MAIN_DBI);
- }
-
- // Is the DB already open?
- len = strlen(name);
- for (i=2; i<txn->mt_numdbs; i++) {
- if (!txn->mt_dbxs[i].md_name.mv_size) {
- // Remember this free slot
- if (!unused) unused = i;
- continue;
+
+ if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) {
+ mdb_default_cmp(txn, MAIN_DBI);
}
- if (len == txn->mt_dbxs[i].md_name.mv_size &&
- !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
- *dbi = i;
- return MDB_SUCCESS;
+
+ // Is the DB already open?
+ len = strlen(name);
+ for (i=2; i<txn->mt_numdbs; i++) {
+ if (!txn->mt_dbxs[i].md_name.mv_size) {
+ // Remember this free slot
+ if (!unused) unused = i;
+ continue;
+ }
+ if (len == txn->mt_dbxs[i].md_name.mv_size &&
+ !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
+ *dbi = i;
+ return MDB_SUCCESS;
+ }
}
- }
-
- // If no free slot and max hit, fail
- if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs)
- return MDB_DBS_FULL;
-
- // Cannot mix named databases with some mainDB flags
- if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY))
- return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND;
-
- // Find the DB info
- dbflag = DB_NEW|DB_VALID;
- exact = 0;
- key.mv_size = len;
- key.mv_data = (void *)name;
- mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
- rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
- if (rc == MDB_SUCCESS) {
- // make sure this is actually a DB
- MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
- if (!(node->mn_flags & F_SUBDATA))
- return MDB_INCOMPATIBLE;
- } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
- // Create if requested
- MDB_db dummy;
- data.mv_size = sizeof(MDB_db);
- data.mv_data = &dummy;
- memset(&dummy, 0, sizeof(dummy));
- dummy.md_root = P_INVALID;
- dummy.md_flags = flags & PERSISTENT_FLAGS;
- rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
- dbflag |= DB_DIRTY;
- }
-
- // OK, got info, add to table
- if (rc == MDB_SUCCESS) {
- unsigned int slot = unused ? unused : txn->mt_numdbs;
- txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
- txn->mt_dbxs[slot].md_name.mv_size = len;
- txn->mt_dbxs[slot].md_rel = NULL;
- txn->mt_dbflags[slot] = dbflag;
- memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
- *dbi = slot;
- mdb_default_cmp(txn, slot);
- if (!unused) {
- txn->mt_numdbs++;
+
+ // If no free slot and max hit, fail
+ if (!unused && txn->mt_numdbs >= txn->mt_env->me_maxdbs)
+ return MDB_DBS_FULL;
+
+ // Cannot mix named databases with some mainDB flags
+ if (txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT|MDB_INTEGERKEY))
+ return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND;
+
+ // Find the DB info
+ dbflag = DB_NEW|DB_VALID;
+ exact = 0;
+ key.mv_size = len;
+ key.mv_data = (void *)name;
+ mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
+ rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
+ if (rc == MDB_SUCCESS) {
+ // make sure this is actually a DB
+ MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
+ if (!(node->mn_flags & F_SUBDATA))
+ return MDB_INCOMPATIBLE;
+ } else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
+ // Create if requested
+ MDB_db dummy;
+ data.mv_size = sizeof(MDB_db);
+ data.mv_data = &dummy;
+ memset(&dummy, 0, sizeof(dummy));
+ dummy.md_root = P_INVALID;
+ dummy.md_flags = flags & PERSISTENT_FLAGS;
+ rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
+ dbflag |= DB_DIRTY;
}
- }
- return rc;
+ // OK, got info, add to table
+ if (rc == MDB_SUCCESS) {
+ unsigned int slot = unused ? unused : txn->mt_numdbs;
+ txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
+ txn->mt_dbxs[slot].md_name.mv_size = len;
+ txn->mt_dbxs[slot].md_rel = NULL;
+ txn->mt_dbflags[slot] = dbflag;
+ memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
+ *dbi = slot;
+ mdb_default_cmp(txn, slot);
+ if (!unused) {
+ txn->mt_numdbs++;
+ }
+ }
+
+ return rc;
*/
return nil, nil
}
func (t *transaction) Stat(b Bucket) *Stat {
- if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
- return EINVAL;
-
- if (txn->mt_dbflags[dbi] & DB_STALE) {
- MDB_cursor mc;
- MDB_xcursor mx;
- /* Stale, must read the DB's root. cursor_init does it for us. */
- mdb_cursor_init(&mc, txn, dbi, &mx);
- }
- return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
+ /*
+ if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
+ return EINVAL;
+
+ if (txn->mt_dbflags[dbi] & DB_STALE) {
+ MDB_cursor mc;
+ MDB_xcursor mx;
+ // Stale, must read the DB's root. cursor_init does it for us.
+ mdb_cursor_init(&mc, txn, dbi, &mx);
+ }
+ return mdb_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
+ */
+ return nil
}
func (t *transaction) BucketFlags(b Bucket) (int, error) {
/*
- // We could return the flags for the FREE_DBI too but what's the point?
- if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs)
- return EINVAL;
- *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
- return MDB_SUCCESS;
+ // We could return the flags for the FREE_DBI too but what's the point?
+ if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs)
+ return EINVAL;
+ *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
+ return MDB_SUCCESS;
*/
return 0, nil
}
-func (t *transaction) Drop(b Bucket int del) error {
+func (t *transaction) Drop(b *Bucket, del int) error {
/*
- MDB_cursor *mc, *m2;
- int rc;
+ MDB_cursor *mc, *m2;
+ int rc;
- if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
- return EINVAL;
+ if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
+ return EINVAL;
- if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
- return EACCES;
+ if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
+ return EACCES;
- rc = mdb_cursor_open(txn, dbi, &mc);
- if (rc)
- return rc;
+ rc = mdb_cursor_open(txn, dbi, &mc);
+ if (rc)
+ return rc;
- rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
- // Invalidate the dropped DB's cursors
- for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
- m2->mc_flags &= ~(C_INITIALIZED|C_EOF);
- if (rc)
- goto leave;
-
- // Can't delete the main DB
- if (del && dbi > MAIN_DBI) {
- rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
- if (!rc) {
- txn->mt_dbflags[dbi] = DB_STALE;
- mdb_dbi_close(txn->mt_env, dbi);
- }
- } else {
- // reset the DB record, mark it dirty
- txn->mt_dbflags[dbi] |= DB_DIRTY;
- txn->mt_dbs[dbi].md_depth = 0;
- txn->mt_dbs[dbi].md_branch_pages = 0;
- txn->mt_dbs[dbi].md_leaf_pages = 0;
- txn->mt_dbs[dbi].md_overflow_pages = 0;
- txn->mt_dbs[dbi].md_entries = 0;
- txn->mt_dbs[dbi].md_root = P_INVALID;
-
- txn->mt_flags |= MDB_TXN_DIRTY;
- }
-leave:
- mdb_cursor_close(mc);
- return rc;
+ rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
+ // Invalidate the dropped DB's cursors
+ for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
+ m2->mc_flags &= ~(C_INITIALIZED|C_EOF);
+ if (rc)
+ goto leave;
+
+ // Can't delete the main DB
+ if (del && dbi > MAIN_DBI) {
+ rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
+ if (!rc) {
+ txn->mt_dbflags[dbi] = DB_STALE;
+ mdb_dbi_close(txn->mt_env, dbi);
+ }
+ } else {
+ // reset the DB record, mark it dirty
+ txn->mt_dbflags[dbi] |= DB_DIRTY;
+ txn->mt_dbs[dbi].md_depth = 0;
+ txn->mt_dbs[dbi].md_branch_pages = 0;
+ txn->mt_dbs[dbi].md_leaf_pages = 0;
+ txn->mt_dbs[dbi].md_overflow_pages = 0;
+ txn->mt_dbs[dbi].md_entries = 0;
+ txn->mt_dbs[dbi].md_root = P_INVALID;
+
+ txn->mt_flags |= MDB_TXN_DIRTY;
+ }
+ leave:
+ mdb_cursor_close(mc);
+ return rc;
*/
return nil
}