I'm not sure what you mean by an "ISO-8859-1 Multibyte String" as ISO-8859-1
is only an 8-bit character set. As far as converting from ISO-8859-1 to UTF8,
this is already performed by the ldap_ucs_to_utf8s() function in libldap. If
you want the rest considered for inclusion, please submit it via ITS.
-- Howard Chu
Chief Architect, Symas Corp. Director, Highland Sun
http://www.symas.com http://highlandsun.com/hyc
Symas: Premier OpenSource Development and Support
> -----Original Message-----
> From: owner-openldap-devel@OpenLDAP.org
> [mailto:owner-openldap-devel@OpenLDAP.org]On Behalf Of Patrick Dreyer,
> SY-UCP
> The OpenLDAP client supports some kind of string conversions but lacks
> supporting conversions between UTF8 <-> ISO-8859-1.
> Bellow you find the code taken from the internet and adapted to the
> OpenLDAP interface.
> Hope you will add this to the OpenLDAP client.
>
> Patrick Dreyer
>
>
>
> ldap_utf8.h
> ===========
>
> /*
> * ISO-8859-1 MultiByte Char / UTF-8 Conversion Routines
> */
>
> /* UTF-8 string to ISO-8859-1 MultiByte string */
> LDAP_F(int) ldap_x_utf8s_to_iso_8859_1s LDAP_P((
> char *mbstr, LDAP_CONST char *utf8str, size_t count));
>
> /* ISO-8859-1 MultiByte string to UTF-8 string */
> LDAP_F(int) ldap_x_iso_8859_1s_to_utf8s LDAP_P((
> char *utf8str, LDAP_CONST char *mbstr, size_t count));
>
>
> utf8-8-conv.c (top of file)
> ===========================
>
> // Map from the most-significant 6 bits of the first byte to the total
> number of bytes in a
> // UTF-8 character.
> static char UTF8_2_ISO_8859_1_len[] =
> {
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */
> 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6
> };
>
> static char UTF8_2_ISO_8859_1_mask[] = {0x3F, 0x7F, 0x1F, 0x0F, 0x07,
> 0x03, 0x01};
>
>
> utf8-8-conv.c (end of file)
> ===========================
>
>
> /*------------------------------------------------------------
> ----------
> -------
> Convert a UTF-8 string to a ISO-8859-1 MultiByte string.
> No more than 'count' bytes will be written to the output buffer.
> Return the size of the converted string in bytes, excl null
> terminator.
> */
> int
> ldap_x_utf8s_to_iso_8859_1s( char *mbstr, const char *utf8str, size_t
> count )
> {
> int res = 0;
>
> while (*utf8str != '\0')
> {
> int len = UTF8_2_ISO_8859_1_len[(*utf8str >> 2) & 0x3F];
> unsigned long u = *utf8str & UTF8_2_ISO_8859_1_mask[len];
>
> // erroneous
> if (len == 0)
> len = 5;
>
> for (++utf8str; --len > 0 && (*utf8str != '\0'); ++utf8str)
> {
> // be sure this is not an unexpected start of a new character
> if ((*utf8str & 0xC0) != 0x80)
> break;
>
> u = (u << 6) | (*utf8str & 0x3F);
> }
>
> if (mbstr != 0 && count != 0)
> {
> // be sure there is enough space left in the destination buffer
> if (res >= count)
> return res;
>
> // add the mapped character to the destination string or '?'
> (0x1A, SUB) if character
> // can't be represented in ISO-8859-1
> *mbstr++ = (u <= 0xFF ? (char)u : '?');
> }
> ++res;
> }
>
> // add the terminating null character
> if (mbstr != 0 && count != 0)
> {
> // be sure there is enough space left in the destination buffer
> if (res >= count)
> return res;
> *mbstr = 0;
> }
>
> return res;
> } // ldap_x_utf8s_to_iso_8859_1s
>
>
> /*------------------------------------------------------------
> ----------
> -------
> Convert a ISO-8859-1 MultiByte string to a UTF-8 string.
> No more than 'count' bytes will be written to the output buffer.
> Return the size of the converted string in bytes, excl null
> terminator.
> */
> int
> ldap_x_iso_8859_1s_to_utf8s(char *utf8str, const char *mbstr, size_t
> count)
> {
> int res = 0;
>
> // loop until we reach the end of the mb string
> for (; *mbstr != '\0'; ++mbstr)
> {
> // the character needs no mapping if the highest bit is not set
> if ((*mbstr & 0x80) == 0)
> {
> if (utf8str != 0 && count != 0)
> {
> // be sure there is enough space left in the
> destination buffer
> if (res >= count)
> return res;
>
> *utf8str++ = *mbstr;
> }
> ++res;
> }
>
> // otherwise mapping is necessary
> else
> {
> if (utf8str != 0 && count != 0)
> {
> // be sure there is enough space left in the
> destination buffer
> if (res+1 >= count)
> return res;
>
> *utf8str++ = (0xC0 | (0x03 & (*mbstr >> 6)));
> *utf8str++ = (0x80 | (0x3F & *mbstr));
> }
> res += 2;
> }
> }
>
> // add the terminating null character
> if (utf8str != 0 && count != 0)
> {
> // be sure there is enough space left in the destination buffer
> if (res >= count)
> return res;
> *utf8str = 0;
> }
>
> return res;
> } // ldap_x_iso_8859_1s_to_utf8s
>
<<attachment: winmail.dat>>