I'm not sure what you mean by an "ISO-8859-1 Multibyte String" as ISO-8859-1 is only an 8-bit character set. As far as converting from ISO-8859-1 to UTF8, this is already performed by the ldap_ucs_to_utf8s() function in libldap. If you want the rest considered for inclusion, please submit it via ITS. -- Howard Chu Chief Architect, Symas Corp. Director, Highland Sun http://www.symas.com http://highlandsun.com/hyc Symas: Premier OpenSource Development and Support > -----Original Message----- > From: owner-openldap-devel@OpenLDAP.org > [mailto:owner-openldap-devel@OpenLDAP.org]On Behalf Of Patrick Dreyer, > SY-UCP > The OpenLDAP client supports some kind of string conversions but lacks > supporting conversions between UTF8 <-> ISO-8859-1. > Bellow you find the code taken from the internet and adapted to the > OpenLDAP interface. > Hope you will add this to the OpenLDAP client. > > Patrick Dreyer > > > > ldap_utf8.h > =========== > > /* > * ISO-8859-1 MultiByte Char / UTF-8 Conversion Routines > */ > > /* UTF-8 string to ISO-8859-1 MultiByte string */ > LDAP_F(int) ldap_x_utf8s_to_iso_8859_1s LDAP_P(( > char *mbstr, LDAP_CONST char *utf8str, size_t count)); > > /* ISO-8859-1 MultiByte string to UTF-8 string */ > LDAP_F(int) ldap_x_iso_8859_1s_to_utf8s LDAP_P(( > char *utf8str, LDAP_CONST char *mbstr, size_t count)); > > > utf8-8-conv.c (top of file) > =========================== > > // Map from the most-significant 6 bits of the first byte to the total > number of bytes in a > // UTF-8 character. > static char UTF8_2_ISO_8859_1_len[] = > { > 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, > 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */ > 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6 > }; > > static char UTF8_2_ISO_8859_1_mask[] = {0x3F, 0x7F, 0x1F, 0x0F, 0x07, > 0x03, 0x01}; > > > utf8-8-conv.c (end of file) > =========================== > > > /*------------------------------------------------------------ > ---------- > ------- > Convert a UTF-8 string to a ISO-8859-1 MultiByte string. > No more than 'count' bytes will be written to the output buffer. > Return the size of the converted string in bytes, excl null > terminator. > */ > int > ldap_x_utf8s_to_iso_8859_1s( char *mbstr, const char *utf8str, size_t > count ) > { > int res = 0; > > while (*utf8str != '\0') > { > int len = UTF8_2_ISO_8859_1_len[(*utf8str >> 2) & 0x3F]; > unsigned long u = *utf8str & UTF8_2_ISO_8859_1_mask[len]; > > // erroneous > if (len == 0) > len = 5; > > for (++utf8str; --len > 0 && (*utf8str != '\0'); ++utf8str) > { > // be sure this is not an unexpected start of a new character > if ((*utf8str & 0xC0) != 0x80) > break; > > u = (u << 6) | (*utf8str & 0x3F); > } > > if (mbstr != 0 && count != 0) > { > // be sure there is enough space left in the destination buffer > if (res >= count) > return res; > > // add the mapped character to the destination string or '?' > (0x1A, SUB) if character > // can't be represented in ISO-8859-1 > *mbstr++ = (u <= 0xFF ? (char)u : '?'); > } > ++res; > } > > // add the terminating null character > if (mbstr != 0 && count != 0) > { > // be sure there is enough space left in the destination buffer > if (res >= count) > return res; > *mbstr = 0; > } > > return res; > } // ldap_x_utf8s_to_iso_8859_1s > > > /*------------------------------------------------------------ > ---------- > ------- > Convert a ISO-8859-1 MultiByte string to a UTF-8 string. > No more than 'count' bytes will be written to the output buffer. > Return the size of the converted string in bytes, excl null > terminator. > */ > int > ldap_x_iso_8859_1s_to_utf8s(char *utf8str, const char *mbstr, size_t > count) > { > int res = 0; > > // loop until we reach the end of the mb string > for (; *mbstr != '\0'; ++mbstr) > { > // the character needs no mapping if the highest bit is not set > if ((*mbstr & 0x80) == 0) > { > if (utf8str != 0 && count != 0) > { > // be sure there is enough space left in the > destination buffer > if (res >= count) > return res; > > *utf8str++ = *mbstr; > } > ++res; > } > > // otherwise mapping is necessary > else > { > if (utf8str != 0 && count != 0) > { > // be sure there is enough space left in the > destination buffer > if (res+1 >= count) > return res; > > *utf8str++ = (0xC0 | (0x03 & (*mbstr >> 6))); > *utf8str++ = (0x80 | (0x3F & *mbstr)); > } > res += 2; > } > } > > // add the terminating null character > if (utf8str != 0 && count != 0) > { > // be sure there is enough space left in the destination buffer > if (res >= count) > return res; > *utf8str = 0; > } > > return res; > } // ldap_x_iso_8859_1s_to_utf8s >
<<attachment: winmail.dat>>