[Date Prev][Date Next] [Chronological] [Thread] [Top]

Re: ordered indexing for integers

To: OpenLDAP-devel@openldap.org
Subject: Re: ordered indexing for integers
From: Hallvard B Furuseth <h.b.furuseth@usit.uio.no>
Date: Sat, 1 Dec 2007 15:37:23 +0100
In-reply-to: <47513544.4000601@symas.com>
References: <4742F5EE.5070100@symas.com> <hbf.20071130al9x@bombur.uio.no> <hbf.20071130be5u@bombur.uio.no> <47504610.9050607@symas.com> <475080B3.8020505@symas.com> <hbf.20071201n2xi@bombur.uio.no> <47513544.4000601@symas.com>

[Oops, sent this only to Howard at first.]

Howard Chu writes:
> Thanks, fixed now. Seems we should have a test script for this though.

A quick one in python so far.  And it found more problems...
------------------------------------------------------------------------
#!/usr/bin/python
import os, random, re, sys

slapadd  = '../RE24/servers/slapd/slapadd'
conffile = 'test.conf'
ldiffile = 'test.ldif'
dumpfile = 'test.dump'
debug    = False

def testkeys():
    randint = random.Random().randint
    yield 0
    for n in (1, 0x101, 0x10000000, 0x40000000, 0x7fffffff,
              0x80000000, 0x80000001, 0x100000000, 0x100000001,
              0x200000000, 0x220000000, 0x1000000000000000000):
        yield n
        yield -n
    for i in xrange(0, 4097, 40):
        v = 1L << randint(i, i + 50)
        v = randint(v/2, v)
        yield -v
        yield v

def printall(all):
    for n, key in all:
        print "%s\t%d\t(%s0x%x)" % (key, n, (n < 0 and '-' or ''), abs(n))

f = file(conffile, "w")
print >>f, """include	/ldap/src/openldap/db/schema/core.schema
attributetype ( 1.2.3.4.5.6.7.8.9.10 NAME 'testNumber'
	EQUALITY integerMatch
	ORDERING integerOrderingMatch
	SYNTAX 1.3.6.1.4.1.1466.115.121.1.27 )

database	bdb
directory	/ldap/src/openldap/db
suffix		o=test
rootdn		o=test
rootpw		secret
index		objectClass,testNumber	eq

database	monitor"""
f.close()

found = []
find_index = re.compile(r'\nHEADER=END\n (\w+)\n \w+\nDATA=END\n').search
for n in testkeys():
    os.system("rm -f __db.00* *.bdb log.0000000001 alock")
    f = file(ldiffile, "w")
    print >>f, """dn: o=test
o: test
objectClass: organization
objectClass: extensibleObject
testNumber: """ + str(n)
    f.close()
    if os.system("%s -f %s -l %s" % (slapadd, conffile, ldiffile)):
        sys.exit(slapadd + ' failed')
    if os.system("db_dump testNumber.bdb >" + dumpfile):
        sys.exit('db_dump failed')
    key = find_index(file(dumpfile).read()).group(1)
    found.append((n, key))
found.sort()
if debug: printall(found)
prev = found.pop(0)
for next in found:
    if prev[1] > next[1]:
        print "\nBad ordering:"
        printall((prev, next))
    prev = next
------------------------------------------------------------------------

When you prepend a negative sign byte you should add 0xff (like when
sign-extending), not 0x80.

Got crashes due to a write after 'tmp', incremented size by 1.
Haven't looked at why yet, or whether that's the complete fix.
Also the utils.c code is rather larger than necessary.  So I added a few
other things while I was at it:

- add with carry can be done in a one-liner, and doesn't need any shifts.
- can use the same code to prepend positive and negative sign byte
- it makes little sense to check if there is room for the sign byte
  and then return wrong result if not.  For now I added a failure there,
  but either that error check can be removed, or there should be more
  error checks.

Index: utils.c
===================================================================
RCS file: /repo/OpenLDAP/pkg/ldap/libraries/liblutil/utils.c,v
retrieving revision 1.33.2.12
diff -u -2 -r1.33.2.12 utils.c
--- utils.c	1 Dec 2007 10:17:31 -0000	1.33.2.12
+++ utils.c	1 Dec 2007 13:55:53 -0000
@@ -664,4 +664,6 @@
  * Hex input can be "0x1234" or "'1234'H"
  *
+ * Temporarily modifies the input string.
+ *
  * Note: High bit of binary form is always the sign bit. If the number
  * is supposed to be positive but has the high bit set, a zero byte
@@ -737,5 +739,5 @@
 		num.len = 0;
 		if ( pin[0] == '-' ) {
-			neg = 1;
+			neg = 0xff;
 			len--;
 			pin++;
@@ -744,6 +746,6 @@
 #define	DECMAX	8	/* 8 digits at a time */
 
-		if ( len > sizeof(tmpbuf)) {
-			tmp = ber_memalloc( len );
+		if ( len >= sizeof(tmpbuf)) {
+			tmp = ber_memalloc( len+1 );
 		} else {
 			tmp = tmpbuf;
@@ -779,27 +781,16 @@
 				ptr[i] ^= 0xff;
 
-			/* Add 1, with carry */
-			i--;
-			j = 1;
-			for ( ; i>=0; i-- ) {
-				j += ptr[i];
-				ptr[i] = j & 0xff;
-				j >>= 8;
-				if (!j)
-					break;
-			}
-			/* If we overflowed and there's still room,
-			 * set an explicit sign byte
-			 */
-			if ( !(  ptr[0] & 0x80 ) && num.beg ) {
-				num.beg--;
-				num.len++;
-				num.buf[num.beg] = 0x80;
+			/* add 1, with carry - overflow handled below */
+			while ( i-- && ! (ptr[i] = (ptr[i] + 1) & 0xff )) ;
+		}
+		/* Prepend sign byte if wrong sign bit */
+		if (( num.buf[num.beg] ^ neg ) & 0x80 ) {
+			if ( !num.beg ) {
+				rc = -1;
+				goto decfail;
 			}
-		} else if (( num.buf[num.beg] & 0x80 ) && num.beg ) {
-			/* positive int with high bit set, prepend 0 */
 			num.beg--;
 			num.len++;
-			num.buf[num.beg] = 0;
+			num.buf[num.beg] = neg;
 		}
 		if ( num.beg )
------------------------------------------------------------------------

Also the index is wrong for huge numbers.  At some point the indexing
should just give up and use max/min values, but I suppose at least
cryptograpy-sized numbers should be usefully indexed.  I.e. at least
two length bytes.

So here is a suggestion similar to what I wrote before, except using the
utf-8 trick to count initial bits to say how many length-bytes are
needed.  That also means only one bit is needed to say 'no length
bytes', so I reduced the indexkey size to exactly index_intlen.

Index: schema_init.c
===================================================================
RCS file: /repo/OpenLDAP/pkg/ldap/servers/slapd/schema_init.c,v
retrieving revision 1.386.2.16
diff -u -2 -r1.386.2.16 schema_init.c
--- schema_init.c	1 Dec 2007 10:45:01 -0000	1.386.2.16
+++ schema_init.c	1 Dec 2007 14:34:55 -0000
@@ -2120,42 +2120,59 @@
 )
 {
-	struct berval iv;
-	int neg;
+	/* index format:
+	 * one's complement (sign*length) if too large to fit in index,
+	 * two's complement value (sign-extended or chopped as needed),
+	 * with 1st byte of the above adjusted as follows:
+	 *  inverse sign in the top <number of length bytes + 1> bits,
+	 *  the next bit is the sign as delimiter.
+	 */
+
+	ber_len_t k;
+	struct berval iv = *tmp;
+	unsigned char neg = 0, signmask = 0x80;
 
-	iv = *tmp;
 	if ( lutil_str2bin( val, &iv )) {
 		return LDAP_INVALID_SYNTAX;
 	}
 
-	neg = iv.bv_val[0] & 0x80;
+	if ( iv.bv_val[0] & 0x80 )
+		neg = 0xff;
 
-	/* Omit leading 0 pad byte */
-	if ( !iv.bv_val[0] ) {
+	/* Omit leading sign byte */
+	if ( iv.bv_val[0] == neg ) {
 		iv.bv_val++;
 		iv.bv_len--;
 	}
 
-	/* If too small, sign-extend */
-	if ( iv.bv_len < index_intlen ) {
-		int j, k, pad;
-		key->bv_val[0] = index_intlen;
-		k = index_intlen - iv.bv_len + 1;
-		if ( neg )
-			pad = 0xff;
-		else
-			pad = 0;
-		for ( j=1; j<k; j++)
-			key->bv_val[j] = pad;
-		for ( j = 0; j<iv.bv_len; j++ )
-			key->bv_val[j+k] = iv.bv_val[j];
+	if ( index_intlen > iv.bv_len ) {
+		k = index_intlen - iv.bv_len;
+		memset( key->bv_val, neg, k );	/* sign-extend */
 	} else {
-		key->bv_val[0] = iv.bv_len;
-		memcpy( key->bv_val+1, iv.bv_val, index_intlen );
-	}
-	if ( neg ) {
-		key->bv_val[0] = -key->bv_val[0];
+		k = iv.bv_len - index_intlen;
+		if ( k || ((iv.bv_val[0] ^ neg) & 0xc0) ) {
+			unsigned char lenbuf[sizeof(k) + 2];
+			unsigned char *lenp = lenbuf + sizeof(lenbuf);
+			do {
+				*--lenp = k ^ neg;
+				signmask >>= 1;
+			} while ( (k >>= 8) != 0 );
+			if ( (signmask >> 1) <= (*lenp ^ neg) ) {
+				*--lenp = neg;
+				signmask >>= 1;
+			}
+			k = (lenbuf + sizeof(lenbuf)) - lenp;
+			if ( k > 7 ) {	/* overflow in length of length */
+				k = index_intlen;
+				memset( key->bv_val, ~neg, k );
+			} else {
+				if ( k > index_intlen )
+					k = index_intlen;
+				memcpy( key->bv_val, lenp, k );
+			}
+			iv.bv_len = index_intlen - k;
+		}
 	}
-	/* convert signed to unsigned */
-	key->bv_val[0] ^= 0x80;
+	memcpy( key->bv_val + k, iv.bv_val, iv.bv_len );
+	key->bv_val[0] ^= -signmask;	/* invert sign */
 	return 0;
 }
@@ -2187,6 +2204,6 @@
 	keys = slap_sl_malloc( sizeof( struct berval ) * (i+1), ctx );
 	for ( i = 0; !BER_BVISNULL( &values[i] ); i++ ) {
-		keys[i].bv_len = index_intlen+1;
-		keys[i].bv_val = slap_sl_malloc( index_intlen+1, ctx );
+		keys[i].bv_len = index_intlen;
+		keys[i].bv_val = slap_sl_malloc( index_intlen, ctx );
 	}
 	keys[i].bv_len = 0;
@@ -2239,6 +2256,6 @@
 	keys = slap_sl_malloc( sizeof( struct berval ) * 2, ctx );
 
-	keys[0].bv_len = index_intlen + 1;
-	keys[0].bv_val = slap_sl_malloc( index_intlen+1, ctx );
+	keys[0].bv_len = index_intlen;
+	keys[0].bv_val = slap_sl_malloc( index_intlen, ctx );
 
 	if ( value->bv_len > sizeof( ibuf )) {

-- 
Regards,
Hallvard

Follow-Ups:
- Re: ordered indexing for integers
  - From: Hallvard B Furuseth <h.b.furuseth@usit.uio.no>
- Re: ordered indexing for integers
  - From: Howard Chu <hyc@symas.com>

References:
- Re: ordered indexing for integers
  - From: Hallvard B Furuseth <h.b.furuseth@usit.uio.no>
- Re: ordered indexing for integers
  - From: Howard Chu <hyc@symas.com>

Prev by Date: Re: ordered indexing for integers
Next by Date: Re: ordered indexing for integers
Index(es):
- Chronological
- Thread