[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
is_utf8 function for ldapsearch w/ UTF-8 strings
Hey,
I would like to contribute the below function in the public
domain. This could be used by ldapsearch and friends as an alternative
to ldif_is_not_printable to determine if an attribute value is a string
or if it should be represented in base64 (currently non-ASCII strings
are simply base64 encoded).
Mike
int
is_utf8(const unsigned char *src, int n)
{
const unsigned char *slim = src + n;
while (src < slim) {
int wc;
if (*src < 0x80) {
wc = *src;
src++;
} else if ((*src & 0xE0) == 0xC0) {
if ((slim - src) < 2) return 0;
wc = (*src++ & 0x1F) << 6;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= *src & 0x3F;
}
if (wc < 0x80) {
return 0;
}
src++;
} else if ((*src & 0xF0) == 0xE0) {
/* less common */
if ((slim - src) < 3) return 0;
wc = (*src++ & 0x0F) << 12;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= (*src++ & 0x3F) << 6;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= *src & 0x3F;
}
}
if (wc < 0x800) {
return 0;
}
src++;
} else {
/* very unlikely */
return 0;
}
}
/* it's UTF-8 */
return 1;
}