[Date Prev][Date Next]
[Chronological]
[Thread]
[Top]
Re: ordered indexing for integers
[Oops, sent this only to Howard at first.]
Howard Chu writes:
> Thanks, fixed now. Seems we should have a test script for this though.
A quick one in python so far. And it found more problems...
------------------------------------------------------------------------
#!/usr/bin/python
import os, random, re, sys
slapadd = '../RE24/servers/slapd/slapadd'
conffile = 'test.conf'
ldiffile = 'test.ldif'
dumpfile = 'test.dump'
debug = False
def testkeys():
randint = random.Random().randint
yield 0
for n in (1, 0x101, 0x10000000, 0x40000000, 0x7fffffff,
0x80000000, 0x80000001, 0x100000000, 0x100000001,
0x200000000, 0x220000000, 0x1000000000000000000):
yield n
yield -n
for i in xrange(0, 4097, 40):
v = 1L << randint(i, i + 50)
v = randint(v/2, v)
yield -v
yield v
def printall(all):
for n, key in all:
print "%s\t%d\t(%s0x%x)" % (key, n, (n < 0 and '-' or ''), abs(n))
f = file(conffile, "w")
print >>f, """include /ldap/src/openldap/db/schema/core.schema
attributetype ( 1.2.3.4.5.6.7.8.9.10 NAME 'testNumber'
EQUALITY integerMatch
ORDERING integerOrderingMatch
SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 )
database bdb
directory /ldap/src/openldap/db
suffix o=test
rootdn o=test
rootpw secret
index objectClass,testNumber eq
database monitor"""
f.close()
found = []
find_index = re.compile(r'\nHEADER=END\n (\w+)\n \w+\nDATA=END\n').search
for n in testkeys():
os.system("rm -f __db.00* *.bdb log.0000000001 alock")
f = file(ldiffile, "w")
print >>f, """dn: o=test
o: test
objectClass: organization
objectClass: extensibleObject
testNumber: """ + str(n)
f.close()
if os.system("%s -f %s -l %s" % (slapadd, conffile, ldiffile)):
sys.exit(slapadd + ' failed')
if os.system("db_dump testNumber.bdb >" + dumpfile):
sys.exit('db_dump failed')
key = find_index(file(dumpfile).read()).group(1)
found.append((n, key))
found.sort()
if debug: printall(found)
prev = found.pop(0)
for next in found:
if prev[1] > next[1]:
print "\nBad ordering:"
printall((prev, next))
prev = next
------------------------------------------------------------------------
When you prepend a negative sign byte you should add 0xff (like when
sign-extending), not 0x80.
Got crashes due to a write after 'tmp', incremented size by 1.
Haven't looked at why yet, or whether that's the complete fix.
Also the utils.c code is rather larger than necessary. So I added a few
other things while I was at it:
- add with carry can be done in a one-liner, and doesn't need any shifts.
- can use the same code to prepend positive and negative sign byte
- it makes little sense to check if there is room for the sign byte
and then return wrong result if not. For now I added a failure there,
but either that error check can be removed, or there should be more
error checks.
Index: utils.c
===================================================================
RCS file: /repo/OpenLDAP/pkg/ldap/libraries/liblutil/utils.c,v
retrieving revision 1.33.2.12
diff -u -2 -r1.33.2.12 utils.c
--- utils.c 1 Dec 2007 10:17:31 -0000 1.33.2.12
+++ utils.c 1 Dec 2007 13:55:53 -0000
@@ -664,4 +664,6 @@
* Hex input can be "0x1234" or "'1234'H"
*
+ * Temporarily modifies the input string.
+ *
* Note: High bit of binary form is always the sign bit. If the number
* is supposed to be positive but has the high bit set, a zero byte
@@ -737,5 +739,5 @@
num.len = 0;
if ( pin[0] == '-' ) {
- neg = 1;
+ neg = 0xff;
len--;
pin++;
@@ -744,6 +746,6 @@
#define DECMAX 8 /* 8 digits at a time */
- if ( len > sizeof(tmpbuf)) {
- tmp = ber_memalloc( len );
+ if ( len >= sizeof(tmpbuf)) {
+ tmp = ber_memalloc( len+1 );
} else {
tmp = tmpbuf;
@@ -779,27 +781,16 @@
ptr[i] ^= 0xff;
- /* Add 1, with carry */
- i--;
- j = 1;
- for ( ; i>=0; i-- ) {
- j += ptr[i];
- ptr[i] = j & 0xff;
- j >>= 8;
- if (!j)
- break;
- }
- /* If we overflowed and there's still room,
- * set an explicit sign byte
- */
- if ( !( ptr[0] & 0x80 ) && num.beg ) {
- num.beg--;
- num.len++;
- num.buf[num.beg] = 0x80;
+ /* add 1, with carry - overflow handled below */
+ while ( i-- && ! (ptr[i] = (ptr[i] + 1) & 0xff )) ;
+ }
+ /* Prepend sign byte if wrong sign bit */
+ if (( num.buf[num.beg] ^ neg ) & 0x80 ) {
+ if ( !num.beg ) {
+ rc = -1;
+ goto decfail;
}
- } else if (( num.buf[num.beg] & 0x80 ) && num.beg ) {
- /* positive int with high bit set, prepend 0 */
num.beg--;
num.len++;
- num.buf[num.beg] = 0;
+ num.buf[num.beg] = neg;
}
if ( num.beg )
------------------------------------------------------------------------
Also the index is wrong for huge numbers. At some point the indexing
should just give up and use max/min values, but I suppose at least
cryptograpy-sized numbers should be usefully indexed. I.e. at least
two length bytes.
So here is a suggestion similar to what I wrote before, except using the
utf-8 trick to count initial bits to say how many length-bytes are
needed. That also means only one bit is needed to say 'no length
bytes', so I reduced the indexkey size to exactly index_intlen.
Index: schema_init.c
===================================================================
RCS file: /repo/OpenLDAP/pkg/ldap/servers/slapd/schema_init.c,v
retrieving revision 1.386.2.16
diff -u -2 -r1.386.2.16 schema_init.c
--- schema_init.c 1 Dec 2007 10:45:01 -0000 1.386.2.16
+++ schema_init.c 1 Dec 2007 14:34:55 -0000
@@ -2120,42 +2120,59 @@
)
{
- struct berval iv;
- int neg;
+ /* index format:
+ * one's complement (sign*length) if too large to fit in index,
+ * two's complement value (sign-extended or chopped as needed),
+ * with 1st byte of the above adjusted as follows:
+ * inverse sign in the top <number of length bytes + 1> bits,
+ * the next bit is the sign as delimiter.
+ */
+
+ ber_len_t k;
+ struct berval iv = *tmp;
+ unsigned char neg = 0, signmask = 0x80;
- iv = *tmp;
if ( lutil_str2bin( val, &iv )) {
return LDAP_INVALID_SYNTAX;
}
- neg = iv.bv_val[0] & 0x80;
+ if ( iv.bv_val[0] & 0x80 )
+ neg = 0xff;
- /* Omit leading 0 pad byte */
- if ( !iv.bv_val[0] ) {
+ /* Omit leading sign byte */
+ if ( iv.bv_val[0] == neg ) {
iv.bv_val++;
iv.bv_len--;
}
- /* If too small, sign-extend */
- if ( iv.bv_len < index_intlen ) {
- int j, k, pad;
- key->bv_val[0] = index_intlen;
- k = index_intlen - iv.bv_len + 1;
- if ( neg )
- pad = 0xff;
- else
- pad = 0;
- for ( j=1; j<k; j++)
- key->bv_val[j] = pad;
- for ( j = 0; j<iv.bv_len; j++ )
- key->bv_val[j+k] = iv.bv_val[j];
+ if ( index_intlen > iv.bv_len ) {
+ k = index_intlen - iv.bv_len;
+ memset( key->bv_val, neg, k ); /* sign-extend */
} else {
- key->bv_val[0] = iv.bv_len;
- memcpy( key->bv_val+1, iv.bv_val, index_intlen );
- }
- if ( neg ) {
- key->bv_val[0] = -key->bv_val[0];
+ k = iv.bv_len - index_intlen;
+ if ( k || ((iv.bv_val[0] ^ neg) & 0xc0) ) {
+ unsigned char lenbuf[sizeof(k) + 2];
+ unsigned char *lenp = lenbuf + sizeof(lenbuf);
+ do {
+ *--lenp = k ^ neg;
+ signmask >>= 1;
+ } while ( (k >>= 8) != 0 );
+ if ( (signmask >> 1) <= (*lenp ^ neg) ) {
+ *--lenp = neg;
+ signmask >>= 1;
+ }
+ k = (lenbuf + sizeof(lenbuf)) - lenp;
+ if ( k > 7 ) { /* overflow in length of length */
+ k = index_intlen;
+ memset( key->bv_val, ~neg, k );
+ } else {
+ if ( k > index_intlen )
+ k = index_intlen;
+ memcpy( key->bv_val, lenp, k );
+ }
+ iv.bv_len = index_intlen - k;
+ }
}
- /* convert signed to unsigned */
- key->bv_val[0] ^= 0x80;
+ memcpy( key->bv_val + k, iv.bv_val, iv.bv_len );
+ key->bv_val[0] ^= -signmask; /* invert sign */
return 0;
}
@@ -2187,6 +2204,6 @@
keys = slap_sl_malloc( sizeof( struct berval ) * (i+1), ctx );
for ( i = 0; !BER_BVISNULL( &values[i] ); i++ ) {
- keys[i].bv_len = index_intlen+1;
- keys[i].bv_val = slap_sl_malloc( index_intlen+1, ctx );
+ keys[i].bv_len = index_intlen;
+ keys[i].bv_val = slap_sl_malloc( index_intlen, ctx );
}
keys[i].bv_len = 0;
@@ -2239,6 +2256,6 @@
keys = slap_sl_malloc( sizeof( struct berval ) * 2, ctx );
- keys[0].bv_len = index_intlen + 1;
- keys[0].bv_val = slap_sl_malloc( index_intlen+1, ctx );
+ keys[0].bv_len = index_intlen;
+ keys[0].bv_val = slap_sl_malloc( index_intlen, ctx );
if ( value->bv_len > sizeof( ibuf )) {
--
Regards,
Hallvard