[Date Prev][Date Next] [Chronological] [Thread] [Top]

LMDB: successive fillups and drops break the DB after a while



Hello,

I've tried stress-testing LMDB in our use case and I've discovered something I consider a bug.

In our use case we need to keep inserting to the DB until it's full, flush it and repeat. We've designed the insert function to fail if there are not enough headroom pages for safe delete / DB drop. During the tests the DB works correctly for some amount of repeats, but then it suddenly refuses to insert anything. It seems LMDB doesn't clear or reuse all of its pages. I would like to get anyone's opinion on this.

I have attached the source file containing the test. The DB size is set to 20MB, the inserted values have by default 25kB. In the first run the DB accepts 365 inserts, which decreases over time and stabilizes at 280. After 18 repeats the DB does not accept any inserts at all:
ok 1 - pass #1 fillup run (365 inserts)
ok 2 - pass #2 fillup run (300 inserts)
ok 3 - pass #3 fillup run (286 inserts)
ok 4 - pass #4 fillup run (283 inserts)
ok 5 - pass #5 fillup run (281 inserts)
ok 6 - pass #6 fillup run (280 inserts)
ok 7 - pass #7 fillup run (280 inserts)
ok 8 - pass #8 fillup run (280 inserts)
ok 9 - pass #9 fillup run (280 inserts)
ok 10 - pass #10 fillup run (280 inserts)
ok 11 - pass #11 fillup run (280 inserts)
ok 12 - pass #12 fillup run (280 inserts)
ok 13 - pass #13 fillup run (280 inserts)
ok 14 - pass #14 fillup run (280 inserts)
ok 15 - pass #15 fillup run (280 inserts)
ok 16 - pass #16 fillup run (280 inserts)
ok 17 - pass #17 fillup run (280 inserts)
ok 18 - pass #18 fillup run (108 inserts)
not ok 19 - pass #19 fillup run (0 inserts)
not ok 20 - pass #20 fillup run (0 inserts)

The test is run 4 times with different approaches that we thought could have had some impact. These approaches combine opening the DB per insert or per single fillup, and reopening the DB just for the DB drop. In this case these options have no effect and the results are the same, but originally a unit test that this originates from had some differences so I kept them in.

Note that I also tried calling mdb_drop with "1" as the last parameter, but it had no effect on the result.

I am linking against the latest LMDB from sources.

I also tried adding a constant number of pages to the reserve before the insert function fails. This only changes the amount of inserted items but doesn't change the functionality.

If you compile the program you can supply it with an argument to change the default size of the item value size. Lower values break the DB sooner (20kB requires only 12 repeated fillups and drops).

If anyone has any idea what might be the issue, please let me know. I know deleting the DB from filesystem and creating a new one would work, but that's a hack, not a fix.

Thanks.

Dominik

// Test LMDB for filling up, clearing and its consistency

#include <lmdb.h>

#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

#define E(expr) CHECK((rc = (expr)) == MDB_SUCCESS, #expr)
#define RES(err, expr) ((rc = expr) == (err) || (CHECK(!rc, #expr), 0))
#define CHECK(test, msg) ((test) ? (void)0 : ((void)fprintf(stderr, \
	"%s:%d: %s: %s\n", __FILE__, __LINE__, msg, mdb_strerror(rc)), abort()))

static const size_t g_file_size = 20 * 1024 * 1024;
static size_t g_data_size = 25000;

static int test_n = 0;
#define ok(cond, fmt, ...) \
	do { \
		if (cond) { \
			printf("ok %d - ", ++test_n); \
		} else { \
			printf("not ok %d - ", ++test_n); \
		} \
		printf(fmt, ##__VA_ARGS__); \
		printf("\n"); \
	} while (0)

/* Open DB */
void db_open(MDB_env *env, MDB_dbi *dbi)
{
	int rc;
	MDB_txn *txn;

	E(mdb_txn_begin(env, NULL, 0, &txn));
	E(mdb_dbi_open(txn, NULL, 0, dbi));
	E(mdb_txn_commit(txn));
}

/* Carefully insert to DB, do not overflow to allow deletions. */
int db_insert(MDB_env *env, MDB_dbi dbi, MDB_val *key, MDB_val *val)
{
	int rc;
	MDB_txn *txn;
	E(mdb_txn_begin(env, NULL, 0, &txn));

	/* Gather info to reserve some pages */
	MDB_stat stat;
	MDB_stat stat_free;
	MDB_envinfo info;

	E(mdb_stat(txn, dbi, &stat));
	E(mdb_stat(txn, 0, &stat_free));
	E(mdb_env_info(env, &info));

	/* Count head room pages */
	size_t max_pages = (info.me_mapsize / stat.ms_psize) - info.me_last_pgno - 2;
	/* Add free leaf pages, allow worst-case headroom for branch pages */
	max_pages += stat_free.ms_leaf_pages - stat.ms_branch_pages;
	/* The freelist must be able to hold db tree pages */
	size_t used_pages = stat.ms_branch_pages + stat.ms_overflow_pages;
	if (used_pages + 1 + 128 >= max_pages) {
		mdb_txn_abort(txn);
		return -1;
	}

	E(mdb_put(txn, dbi, key, val, 0));
	E(mdb_txn_commit(txn));

	return 0;
}

static void test_stress_base(size_t data_size,
                             bool reopen_per_insert,
                             bool reopen_to_flush)
{
	int rc, i, k, count;
	MDB_env *env;
	MDB_txn *txn;
	MDB_dbi  dbi;

	char db_filename[] = "/tmp/db.XXXXXX";
	mkdtemp(db_filename);

	/* Prepare DB environment */
	E(mdb_env_create(&env));
	E(mdb_env_open(env, db_filename, 0, 0660));
	E(mdb_env_set_mapsize(env, g_file_size));

	uint8_t *data = malloc(data_size);
	MDB_val key = { sizeof(k), &k };
	MDB_val val = { data_size, data };
	k = 0;

	/* Repeat fillup */
	for (i = 0; i < 20; ++i) {
		rc = 0;
		count = 0;

		if (!reopen_per_insert) db_open(env, &dbi);

		/* Fillup the DB */
		for (; rc == 0 && k < 5000; ++k) {
			if (reopen_per_insert) db_open(env, &dbi);

			rc = db_insert(env, dbi, &key, &val);
			if (rc == 0) ++count;

			if (reopen_per_insert) mdb_dbi_close(env, dbi);
		}

		if (reopen_to_flush) {
			if (!reopen_per_insert) mdb_dbi_close(env, dbi);
			db_open(env, &dbi);
		}
		ok(count > 0, "pass #%d fillup run (%d inserts)", i + 1, count);

		E(mdb_txn_begin(env, NULL, 0, &txn));
		E(mdb_drop(txn, dbi, 1)); // can we use 1 as the last param and then close it?
		E(mdb_txn_commit(txn));

		//if (!reopen_per_insert || reopen_to_flush) mdb_dbi_close(env, dbi);
	}

	/* Delete everything. */
	free(data);
	mdb_env_close(env);
	char cmd[256];
	snprintf(cmd, sizeof(cmd), "rm -rf %s", db_filename);
	system(cmd);
}

int main(int argc, char ** argv)
{
	if (argc > 1) {
		g_data_size = atol(argv[1]);
	}

	printf("stress test: reopen per cycle, reopen for flush\n");
	test_stress_base(g_data_size, false,  true);
	printf("stress test: reopen per cycle\n");
	test_stress_base(g_data_size, false, false);
	printf("stress test: reopen per insert, reopen for flush\n");
	test_stress_base(g_data_size, true,  true);
	printf("stress test: reopen per insert\n");
	test_stress_base(g_data_size, true, false);

	return 0;
}