[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
Re: LMDB and text encoding
- To: openldap-devel@openldap.org
- Subject: Re: LMDB and text encoding
- From: Timur Kristóf <timur.kristof@gmail.com>
- Date: Sun, 1 Feb 2015 22:59:58 +0100
- Dkim-signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=mime-version:in-reply-to:references:date:message-id:subject:from:to :content-type; bh=QljG25wo/OffTcMCY7Oq5r4DamZI86E5ohkuA2nevg4=; b=T+8ALMolm3U2eDTDlFgHwMuXC8PK2G/0k3w2Ge+ahP1yolBXDdlSl2jJY05vPqWc94 JP2RPV/e6WY67iJo4D2sjiU5EpVOV4wTDw7YHViVxfPYBeRDkE8QQXs3rjobPZvSCpBJ 20EiwVTHZW6EZLpPetobFbJK/fH71ZeMJ+yA8WzMAPjgg1YP4fd/RW6zYlvlofzgqasP h3RP0ENrbE7Qqr2RfbK5Nz/otl1ljMtIYq1CKvoYP8PwqdR3k+84DhJjYXyqbEj6jVW8 9dSlETAtHrHacAcGoCk6Fa6iOrpUU3ii2Zj0bpZjwKW+iYmmO4eGVFpq6JOkUO13jhgK Yj8A==
- In-reply-to: <CAFF-SiV0Q1xxxQdGKCCFsmR+1Wt1jtSpSNoaGo_2UU_0jzTXLg@mail.gmail.com>
- References: <CAFF-SiUrJKGvG_z5vKgn13KX6oSbWQmLDj0VqGXMsuzJT5JBEg@mail.gmail.com> <CAFF-SiWh0Gv4Lo6SYwNcuZUCC2PcUDGZ24V0fP7hXnrx_e9PTw@mail.gmail.com> <CAFF-SiV0Q1xxxQdGKCCFsmR+1Wt1jtSpSNoaGo_2UU_0jzTXLg@mail.gmail.com>
Hi,
I forgot to add an ENOMEM check. I added it now. I think this patch is
ready for Howard and Hallvard to review. :)
Timur
On Thu, Jan 29, 2015 at 2:42 PM, Timur Kristóf <timur.kristof@gmail.com> wrote:
> Here is a fixed version of the patch.
>
> On Thu, Jan 29, 2015 at 10:29 AM, Timur Kristóf <timur.kristof@gmail.com> wrote:
>>> mdb_dbi_open treats its name parameter as a C string. This means UTF-8 on
>>> unixes and ANSI on Windows, which is problematic for cross-platform
>>> applications. [...]
>>
>> Here is a patch that addresses this concern.
>> If you like it, I'll move on to the other issue.
From 88f60779c5e259cc77d9da36d7a0f74a86ccf0e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <venemo@msn.com>
Date: Thu, 29 Jan 2015 10:15:47 +0100
Subject: [PATCH] added an MDB_val variant of mdb_dbi_open
---
libraries/liblmdb/lmdb.h | 7 +++++++
libraries/liblmdb/mdb.c | 38 +++++++++++++++++++++++++-------------
libraries/liblmdb/mdb_stat.c | 4 ++--
3 files changed, 34 insertions(+), 15 deletions(-)
diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h
index ebfbc5d..535ac8b 100644
--- a/libraries/liblmdb/lmdb.h
+++ b/libraries/liblmdb/lmdb.h
@@ -1099,6 +1099,13 @@ int mdb_txn_renew(MDB_txn *txn);
*/
int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi);
+ /** @brief Open a database in the environment.
+ *
+ * Same as mdb_dbi_open, but does not treat the name as a zero-terminated C string,
+ * thus enabling you to use the encoding of your own choice for database names.
+ */
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi);
+
/** @brief Retrieve statistics for a database.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c
index c4dc269..e8f5daf 100644
--- a/libraries/liblmdb/mdb.c
+++ b/libraries/liblmdb/mdb.c
@@ -9144,15 +9144,22 @@ mdb_default_cmp(MDB_txn *txn, MDB_dbi dbi)
: ((f & MDB_REVERSEDUP) ? mdb_cmp_memnr : mdb_cmp_memn));
}
-int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi)
+int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *dbi) {
+ MDB_val name_as_mdbval;
+ name_as_mdbval.mv_data = (void *)name;
+ name_as_mdbval.mv_size = name ? strlen(name) : 0;
+
+ return mdb_dbi_open2(txn, &name_as_mdbval, flags, dbi);
+}
+
+int mdb_dbi_open2(MDB_txn *txn, MDB_val *name, unsigned int flags, MDB_dbi *dbi)
{
- MDB_val key, data;
+ MDB_val data;
MDB_dbi i;
MDB_cursor mc;
MDB_db dummy;
int rc, dbflag, exact;
unsigned int unused = 0, seq;
- size_t len;
if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
mdb_default_cmp(txn, FREE_DBI);
@@ -9164,7 +9171,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
return MDB_BAD_TXN;
/* main DB? */
- if (!name) {
+ if (!name || !name->mv_data) {
*dbi = MAIN_DBI;
if (flags & PERSISTENT_FLAGS) {
uint16_t f2 = flags & PERSISTENT_FLAGS;
@@ -9183,15 +9190,14 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
}
/* Is the DB already open? */
- len = strlen(name);
for (i=2; i<txn->mt_numdbs; i++) {
if (!txn->mt_dbxs[i].md_name.mv_size) {
/* Remember this free slot */
if (!unused) unused = i;
continue;
}
- if (len == txn->mt_dbxs[i].md_name.mv_size &&
- !strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) {
+ if (name->mv_size == txn->mt_dbxs[i].md_name.mv_size &&
+ !memcmp(name->mv_data, txn->mt_dbxs[i].md_name.mv_data, name->mv_size)) {
*dbi = i;
return MDB_SUCCESS;
}
@@ -9208,10 +9214,8 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
/* Find the DB info */
dbflag = DB_NEW|DB_VALID;
exact = 0;
- key.mv_size = len;
- key.mv_data = (void *)name;
mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
- rc = mdb_cursor_set(&mc, &key, &data, MDB_SET, &exact);
+ rc = mdb_cursor_set(&mc, name, &data, MDB_SET, &exact);
if (rc == MDB_SUCCESS) {
/* make sure this is actually a DB */
MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
@@ -9224,15 +9228,23 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
memset(&dummy, 0, sizeof(dummy));
dummy.md_root = P_INVALID;
dummy.md_flags = flags & PERSISTENT_FLAGS;
- rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA);
+ rc = mdb_cursor_put(&mc, name, &data, F_SUBDATA);
dbflag |= DB_DIRTY;
}
/* OK, got info, add to table */
if (rc == MDB_SUCCESS) {
unsigned int slot = unused ? unused : txn->mt_numdbs;
- txn->mt_dbxs[slot].md_name.mv_data = strdup(name);
- txn->mt_dbxs[slot].md_name.mv_size = len;
+ void *copied_name = malloc(name->mv_size);
+
+ if (!copied_name) {
+ /* Could not allocate enough memory for copying the dbi name */
+ return ENOMEM;
+ }
+
+ memcpy(copied_name, name->mv_data, name->mv_size);
+ txn->mt_dbxs[slot].md_name.mv_data = copied_name;
+ txn->mt_dbxs[slot].md_name.mv_size = name->mv_size;
txn->mt_dbxs[slot].md_rel = NULL;
txn->mt_dbflags[slot] = dbflag;
/* txn-> and env-> are the same in read txns, use
diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c
index 1e92292..0f9b1fc 100644
--- a/libraries/liblmdb/mdb_stat.c
+++ b/libraries/liblmdb/mdb_stat.c
@@ -229,8 +229,8 @@ int main(int argc, char *argv[])
while ((rc = mdb_cursor_get(cursor, &key, NULL, MDB_NEXT_NODUP)) == 0) {
char *str;
MDB_dbi db2;
- if (memchr(key.mv_data, '\0', key.mv_size))
- continue;
+ /* We'd need an mdb_is_named_database function here to tell us if it really is a named database. */
+
str = malloc(key.mv_size+1);
memcpy(str, key.mv_data, key.mv_size);
str[key.mv_size] = '\0';
--
2.1.0