[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
(ITS#4655) is_utf8 function for ldapsearch w/ UTF-8 strings
Full_Name: Michael B Allen
Version:
OS:
URL: ftp://ftp.openldap.org/incoming/
Submission from: (NULL) (69.142.196.170)
I would like to contribute the below function in the public domain. This could
be used by ldapsearch and friends as an alternative to ldif_is_not_printable to
determine if an attribute value is a string or if it should be represented in
base64 (currently non-ASCII strings are simply base64 encoded).
Sorry for not submitting a patch. I'm setup for stock packages at the moment.
Mike
int
is_utf8(const unsigned char *src, int n)
{
const unsigned char *slim = src + n;
while (src < slim) {
int wc;
if (*src < 0x80) {
src++;
} else if ((*src & 0xE0) == 0xC0) {
if ((slim - src) < 2) return 0;
wc = (*src++ & 0x1F) << 6;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= *src & 0x3F;
}
if (wc < 0x80) {
return 0;
}
src++;
} else if ((*src & 0xF0) == 0xE0) {
/* less common */
if ((slim - src) < 3) return 0;
wc = (*src++ & 0x0F) << 12;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= (*src++ & 0x3F) << 6;
if ((*src & 0xC0) != 0x80) {
return 0;
} else {
wc |= *src & 0x3F;
}
}
if (wc < 0x800) {
return 0;
}
src++;
} else {
/* very unlikely */
return 0;
}
}
/* it's UTF-8 */
return 1;
}