Merge branch 'jk/xdiff-drop-xdl-fast-hash'

Retire the "fast hash" that had disastrous performance issues in some corner cases. * jk/xdiff-drop-xdl-fast-hash: xdiff: drop XDL_FAST_HASH
2016-12-19 14:45:35 -08:00
parent 06cd5a1e01 1f7c926132
commit 731490bf06
3 changed files with 0 additions and 118 deletions
--- a/9
+++ b/9
@ -338,11 +338,6 @@ all::
 #
 # Define NATIVE_CRLF if your platform uses CRLF for line endings.
 #
-# Define XDL_FAST_HASH to use an alternative line-hashing method in
-# the diff algorithm.  It gives a nice speedup if your processor has
-# fast unaligned word loads.  Does NOT work on big-endian systems!
-# Enabled by default on x86_64.
-#
 # Define GIT_USER_AGENT if you want to change how git identifies itself during
 # network interactions.  The default is "git/$(GIT_VERSION)".
 #
@ -1485,10 +1480,6 @@ ifndef NO_MSGFMT_EXTENDED_OPTIONS
 	MSGFMT += --check --statistics
 endif

-ifneq (,$(XDL_FAST_HASH))
-	BASIC_CFLAGS += -DXDL_FAST_HASH
-endif
-
 ifdef GMTIME_UNRELIABLE_ERRORS
 	COMPAT_OBJS += compat/gmtime.o
 	BASIC_CFLAGS += -DGMTIME_UNRELIABLE_ERRORS
--- a/config.mak.uname
+++ b/config.mak.uname
@ -17,9 +17,6 @@ endif
 # because maintaining the nesting to match is a pain.  If
 # we had "elif" things would have been much nicer...

-ifeq ($(uname_M),x86_64)
-	XDL_FAST_HASH = YesPlease
-endif
 ifeq ($(uname_S),OSF1)
 	# Need this for u_short definitions et al
 	BASIC_CFLAGS += -D_OSF_SOURCE
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@ -264,110 +264,6 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
 	return ha;
 }

-#ifdef XDL_FAST_HASH
-
-#define REPEAT_BYTE(x)  ((~0ul / 0xff) * (x))
-
-#define ONEBYTES	REPEAT_BYTE(0x01)
-#define NEWLINEBYTES	REPEAT_BYTE(0x0a)
-#define HIGHBITS	REPEAT_BYTE(0x80)
-
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
-{
-	return ((a - ONEBYTES) & ~a) & HIGHBITS;
-}
-
-static inline long count_masked_bytes(unsigned long mask)
-{
-	if (sizeof(long) == 8) {
-		/*
-		 * Jan Achrenius on G+: microoptimized version of
-		 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
-		 * that works for the bytemasks without having to
-		 * mask them first.
-		 */
-		/*
-		 * return mask * 0x0001020304050608 >> 56;
-		 *
-		 * Doing it like this avoids warnings on 32-bit machines.
-		 */
-		long a = (REPEAT_BYTE(0x01) / 0xff + 1);
-		return mask * a >> (sizeof(long) * 7);
-	} else {
-		/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-		/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
-		long a = (0x0ff0001 + mask) >> 23;
-		/* Fix the 1 for 00 case */
-		return a & mask;
-	}
-}
-
-unsigned long xdl_hash_record(char const **data, char const *top, long flags)
-{
-	unsigned long hash = 5381;
-	unsigned long a = 0, mask = 0;
-	char const *ptr = *data;
-	char const *end = top - sizeof(unsigned long) + 1;
-
-	if (flags & XDF_WHITESPACE_FLAGS)
-		return xdl_hash_record_with_whitespace(data, top, flags);
-
-	ptr -= sizeof(unsigned long);
-	do {
-		hash += hash << 5;
-		hash ^= a;
-		ptr += sizeof(unsigned long);
-		if (ptr >= end)
-			break;
-		a = *(unsigned long *)ptr;
-		/* Do we have any '\n' bytes in this word? */
-		mask = has_zero(a ^ NEWLINEBYTES);
-	} while (!mask);
-
-	if (ptr >= end) {
-		/*
-		 * There is only a partial word left at the end of the
-		 * buffer. Because we may work with a memory mapping,
-		 * we have to grab the rest byte by byte instead of
-		 * blindly reading it.
-		 *
-		 * To avoid problems with masking in a signed value,
-		 * we use an unsigned char here.
-		 */
-		const char *p;
-		for (p = top - 1; p >= ptr; p--)
-			a = (a << 8) + *((const unsigned char *)p);
-		mask = has_zero(a ^ NEWLINEBYTES);
-		if (!mask)
-			/*
-			 * No '\n' found in the partial word.  Make a
-			 * mask that matches what we read.
-			 */
-			mask = 1UL << (8 * (top - ptr) + 7);
-	}
-
-	/* The mask *below* the first high bit set */
-	mask = (mask - 1) & ~mask;
-	mask >>= 7;
-	hash += hash << 5;
-	hash ^= a & mask;
-
-	/* Advance past the last (possibly partial) word */
-	ptr += count_masked_bytes(mask);
-
-	if (ptr < top) {
-		assert(*ptr == '\n');
-		ptr++;
-	}
-
-	*data = ptr;
-
-	return hash;
-}
-
-#else /* XDL_FAST_HASH */
-
 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	unsigned long ha = 5381;
 	char const *ptr = *data;
@ -384,8 +280,6 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	return ha;
 }

-#endif /* XDL_FAST_HASH */
-
 unsigned int xdl_hashbits(unsigned int size) {
 	unsigned int val = 1, bits = 0;