[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
Re: LMDB and text encoding
Here is a fixed version of the patch.
On Thu, Jan 29, 2015 at 10:29 AM, Timur Kristóf <timur.kristof@gmail.com> wrote:
>> mdb_dbi_open treats its name parameter as a C string. This means UTF-8 on
>> unixes and ANSI on Windows, which is problematic for cross-platform
>> applications. [...]
>
> Here is a patch that addresses this concern.
> If you like it, I'll move on to the other issue.
From 2e3a1ccaa7c36fb3a3d5db040067b21ed4d75962 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <venemo@msn.com>
Date: Thu, 29 Jan 2015 10:15:47 +0100
Subject: [PATCH] added an MDB_val variant of mdb_dbi_open
---
libraries/liblmdb/lmdb.h | 7 +++++++
libraries/liblmdb/mdb.c | 31 ++++++++++++++++++-------------
libraries/liblmdb/mdb_stat.c | 4 ++--
3 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index ebfbc5d..535ac8b 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -1099,6 +1099,13 @@ int mdb_txn_renew(MDB_txn *txn);
*/
int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
+ /** @brief Open a database in the environment.
+ *
+ * Same as mdb_dbi_open, but does not treat the name as a zero-terminated C string,
+ * thus enabling you to use the encoding of your own choice for database names.
+ */
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi);
+
/** @brief Retrieve statistics for a database.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index c4dc269..97e3c06 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -9144,15 +9144,22 @@ mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi)
: ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn));
}
-int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
+int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) {
+ MDB_val name_as_mdbval;
+ name_as_mdbval.mv_data = (void *)name;
+ name_as_mdbval.mv_size = name ? strlen(name) : 0;
+
+ return mdb_dbi_open2(txn, &name_as_mdbval, flags, dbi);
+}
+
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi)
{
- MDB_val key, data;
+ MDB_val data;
MDB_dbi i;
MDB_cursor mc;
MDB_db dummy;
int rc, dbflag, exact;
unsigned int unused = 0, seq;
- size_t len;
if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
mdb_default_cmp(txn, FREE_DBI);
@@ -9164,7 +9171,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
return MDB_BAD_TXN;
/* main DB? */
- if (!name) {
+ if (!name || !name->mv_data) {
*dbi = MAIN_DBI;
if (flags & PERSISTENT_FLAGS) {
uint16_t f2 = flags & PERSISTENT_FLAGS;
@@ -9183,15 +9190,14 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
}
/* Is the DB already open? */
- len = strlen(name);
for (i=2; i<txn->mt_numdbs; i++) {
if (!txn->mt_dbxs[i].md_name.mv_size) {
/* Remember this free slot */
if (!unused) unused = i;
continue;
}
- if (len == txn->mt_dbxs[i].md_name.mv_size &&
- !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
+ if (name->mv_size == txn->mt_dbxs[i].md_name.mv_size &&
+ !memcmp(name->mv_data, txn->mt_dbxs[i].md_name.mv_data, name->mv_size)) {
*dbi = i;
return MDB_SUCCESS;
}
@@ -9208,10 +9214,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
/* Find the DB info */
dbflag = DB_NEW|DB_VALID;
exact = 0;
- key.mv_size = len;
- key.mv_data = (void *)name;
mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
- rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
+ rc = mdb_cursor_set(&mc, name, &data, MDB_SET, &exact);
if (rc == MDB_SUCCESS) {
/* make sure this is actually a DB */
MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
@@ -9224,15 +9228,16 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
memset(&dummy, 0, sizeof(dummy));
dummy.md_root = P_INVALID;
dummy.md_flags = flags & PERSISTENT_FLAGS;
- rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
+ rc = mdb_cursor_put(&mc, name, &data, F_SUBDATA);
dbflag |= DB_DIRTY;
}
/* OK, got info, add to table */
if (rc == MDB_SUCCESS) {
unsigned int slot = unused ? unused : txn->mt_numdbs;
- txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
- txn->mt_dbxs[slot].md_name.mv_size = len;
+ txn->mt_dbxs[slot].md_name.mv_data = malloc(name->mv_size);
+ memcpy(txn->mt_dbxs[slot].md_name.mv_data, name->mv_data, name->mv_size);
+ txn->mt_dbxs[slot].md_name.mv_size = name->mv_size;
txn->mt_dbxs[slot].md_rel = NULL;
txn->mt_dbflags[slot] = dbflag;
/* txn-> and env-> are the same in read txns, use
diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c
index 1e92292..01f5ccb 100644
--- a/libraries/liblmdb/mdb_stat.c
+++ b/libraries/liblmdb/mdb_stat.c
@@ -229,8 +229,8 @@ int main(int argc, char *argv[])
while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) {
char *str;
MDB_dbi db2;
- if (memchr(key.mv_data, '\0', key.mv_size))
- continue;
+ /* We'd need an mdb_is_named_database function here to tell us if it really is a named database. */
+
str = malloc(key.mv_size+1);
memcpy(str, key.mv_data, key.mv_size);
str[key.mv_size] = '\0';
--
2.1.0