[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
String conversions UTF8 <-> ISO-8859-1
- To: <openldap-devel@OpenLDAP.org>
- Subject: String conversions UTF8 <-> ISO-8859-1
- From: "Patrick Dreyer, SY-UCP" <Patrick.Dreyer@swisscom-ucp.com>
- Date: Mon, 28 Apr 2003 10:14:42 +0200
- Content-class: urn:content-classes:message
- Thread-index: AcMNXjiYpEtXE0k3RpKg7z1Dt2a2lw==
- Thread-topic: String conversions UTF8 <-> ISO-8859-1
The OpenLDAP client supports some kind of string conversions but lacks
supporting conversions between UTF8 <-> ISO-8859-1.
Bellow you find the code taken from the internet and adapted to the
OpenLDAP interface.
Hope you will add this to the OpenLDAP client.
Patrick Dreyer
ldap_utf8.h
===========
/*
* ISO-8859-1 MultiByte Char / UTF-8 Conversion Routines
*/
/* UTF-8 string to ISO-8859-1 MultiByte string */
LDAP_F(int) ldap_x_utf8s_to_iso_8859_1s LDAP_P((
char *mbstr, LDAP_CONST char *utf8str, size_t count));
/* ISO-8859-1 MultiByte string to UTF-8 string */
LDAP_F(int) ldap_x_iso_8859_1s_to_utf8s LDAP_P((
char *utf8str, LDAP_CONST char *mbstr, size_t count));
utf8-8-conv.c (top of file)
===========================
// Map from the most-significant 6 bits of the first byte to the total
number of bytes in a
// UTF-8 character.
static char UTF8_2_ISO_8859_1_len[] =
{
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* erroneous */
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6
};
static char UTF8_2_ISO_8859_1_mask[] = {0x3F, 0x7F, 0x1F, 0x0F, 0x07,
0x03, 0x01};
utf8-8-conv.c (end of file)
===========================
/*----------------------------------------------------------------------
-------
Convert a UTF-8 string to a ISO-8859-1 MultiByte string.
No more than 'count' bytes will be written to the output buffer.
Return the size of the converted string in bytes, excl null
terminator.
*/
int
ldap_x_utf8s_to_iso_8859_1s( char *mbstr, const char *utf8str, size_t
count )
{
int res = 0;
while (*utf8str != '\0')
{
int len = UTF8_2_ISO_8859_1_len[(*utf8str >> 2) & 0x3F];
unsigned long u = *utf8str & UTF8_2_ISO_8859_1_mask[len];
// erroneous
if (len == 0)
len = 5;
for (++utf8str; --len > 0 && (*utf8str != '\0'); ++utf8str)
{
// be sure this is not an unexpected start of a new character
if ((*utf8str & 0xC0) != 0x80)
break;
u = (u << 6) | (*utf8str & 0x3F);
}
if (mbstr != 0 && count != 0)
{
// be sure there is enough space left in the destination buffer
if (res >= count)
return res;
// add the mapped character to the destination string or '?'
(0x1A, SUB) if character
// can't be represented in ISO-8859-1
*mbstr++ = (u <= 0xFF ? (char)u : '?');
}
++res;
}
// add the terminating null character
if (mbstr != 0 && count != 0)
{
// be sure there is enough space left in the destination buffer
if (res >= count)
return res;
*mbstr = 0;
}
return res;
} // ldap_x_utf8s_to_iso_8859_1s
/*----------------------------------------------------------------------
-------
Convert a ISO-8859-1 MultiByte string to a UTF-8 string.
No more than 'count' bytes will be written to the output buffer.
Return the size of the converted string in bytes, excl null
terminator.
*/
int
ldap_x_iso_8859_1s_to_utf8s(char *utf8str, const char *mbstr, size_t
count)
{
int res = 0;
// loop until we reach the end of the mb string
for (; *mbstr != '\0'; ++mbstr)
{
// the character needs no mapping if the highest bit is not set
if ((*mbstr & 0x80) == 0)
{
if (utf8str != 0 && count != 0)
{
// be sure there is enough space left in the destination buffer
if (res >= count)
return res;
*utf8str++ = *mbstr;
}
++res;
}
// otherwise mapping is necessary
else
{
if (utf8str != 0 && count != 0)
{
// be sure there is enough space left in the destination buffer
if (res+1 >= count)
return res;
*utf8str++ = (0xC0 | (0x03 & (*mbstr >> 6)));
*utf8str++ = (0x80 | (0x3F & *mbstr));
}
res += 2;
}
}
// add the terminating null character
if (utf8str != 0 && count != 0)
{
// be sure there is enough space left in the destination buffer
if (res >= count)
return res;
*utf8str = 0;
}
return res;
} // ldap_x_iso_8859_1s_to_utf8s