/* Benchmark various hash functions. */ #include #include #include #include #include #define N 200000000 char *buf; static double tdiff (struct timeval *a, struct timeval *b) { return a->tv_sec - b->tv_sec + (1e-6)*(a->tv_usec - b->tv_usec); } #define measure_bandwidth(str, body) ({ \ int c; \ struct timeval start,end; \ gettimeofday(&start, 0); \ body; \ gettimeofday(&end, 0); \ double diff = tdiff(&end, &start); \ printf("%s=%08x %d bytes in %8.6fs for %8.3fMB/s\n", str, c, N, diff, N*(1e-6)/diff); \ }) int sum32 (int start, void *buf, int bytecount) { int *ibuf = buf; assert(bytecount%4==0); while (bytecount>0) { start+=*ibuf; ibuf++; bytecount-=4; } return start; } static const u_int32_t m = 0x5bd1e995; static const int r = 24; static const u_int32_t seed = 0x3dd3b51a; #define USE_ZERO_CHECKSUM 0 static u_int32_t MurmurHash2 ( const void * key, int len) { if (USE_ZERO_CHECKSUM) return 0; // 'm' and 'r' are mixing constants generated offline. // They're not really 'magic', they just happen to work well. // Initialize the hash to a 'random' value u_int32_t h = seed; // Mix 4 bytes at a time into the hash const unsigned char * data = (const unsigned char *)key; while(len >= 4) { u_int32_t k = *(u_int32_t *)data; k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; } // Handle the last few bytes of the input array switch(len) { case 3: h ^= data[2] << 16; case 2: h ^= data[1] << 8; case 1: h ^= data[0]; h *= m; }; // Do a few final mixes of the hash to ensure the last few // bytes are well-incorporated. h ^= h >> 29; h *= m; h ^= h >> 31; return h; } struct murmur { int n_bytes_in_k; // How many bytes in k u_int32_t k; // These are the extra bytes. Bytes are shifted into the low-order bits. u_int32_t h; // The hash so far (up to the most recent 4-byte boundary) }; void murmur_init (struct murmur *mm) { mm->n_bytes_in_k=0; mm->k =0; mm->h = seed; } void murmur_add (struct murmur *mm, const void * key, unsigned int len) { if (USE_ZERO_CHECKSUM) return; assert(mm->n_bytes_in_k<4); const unsigned char *data = key; u_int32_t h = mm->h; { int n_bytes_in_k = mm->n_bytes_in_k; if (n_bytes_in_k>0) { u_int32_t k = mm->k; while (n_bytes_in_k<4 && len>0) { k = (k << 8) | *data; n_bytes_in_k++; data++; len--; } if (n_bytes_in_k==4) { //printf(" oldh=%08x k=%08x", h, k); k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; mm->n_bytes_in_k = 0; mm->k=0; //printf(" h=%08x\n", h); } else { assert(len==0); mm->n_bytes_in_k = n_bytes_in_k; mm->k = k; mm->h = h; return; } } } // We've used up the partial bytes at the beginning of k. assert(mm->n_bytes_in_k==0); while (len >= 4) { u_int32_t k = ntohl(*(u_int32_t *)data); //printf(" oldh=%08x k=%08x", h, k); k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; data += 4; len -= 4; //printf(" h=%08x\n", h); } mm->h=h; //printf("%s:%d h=%08x\n", __FILE__, __LINE__, h); { u_int32_t k=0; switch (len) { case 3: k = *data << 16; data++; case 2: k |= *data << 8; data++; case 1: k |= *data; } mm->k = k; mm->n_bytes_in_k = len; //printf("now extra=%08x (%d bytes) n_bytes=%d\n", mm->k, len, mm->n_bytes_in_k); } } u_int32_t murmur_finish (struct murmur *mm) { if (USE_ZERO_CHECKSUM) return 0; u_int32_t h = mm->h; if (mm->n_bytes_in_k>0) { h ^= mm->k; h *= m; } if (0) { // The real murmur function does this extra mixing at the end. We don't need that for fingerprint. h ^= h >> 29; h *= m; h ^= h >> 31; } return h; } struct sum84 { u_int32_t sum; int i; }; void sum84_init (struct sum84 *s) { s->sum=0; s->i=0; }; void sum84_add (struct sum84 *s, char *buf, int count) { while (s->i%4!=0 && count>0) { char v = *buf; s->sum ^= v << (s->i%4)*8; buf++; count--; s->i++; } while (count>4) { s->sum ^= *(int*)buf; buf+=4; count-=4; } while (count>0) { char v = *buf; s->sum ^= v << (s->i%4)*8; buf++; count--; s->i++; } } int sum84_finish (struct sum84 *s) { return s->sum; } u_int32_t xor8_add (u_int32_t x, char *buf, int count) { while (count>4) { x ^= *(int*)buf; buf+=4; count-=4; } while (count>0) { char v = *buf; x ^= v; buf++; count--; } return x; } u_int32_t xor8_finish (u_int32_t x) { return (x ^ (x>>8) ^ (x>>16) ^ (x>>24))&0xff; } u_int64_t xor8_64_add (u_int64_t x, char *buf, int count) { while (count>8) { x ^= *(u_int64_t*)buf; buf+=8; count-=8; } while (count>0) { char v = *buf; x ^= v; buf++; count--; } return x; } u_int32_t xor8_64_finish (u_int64_t x) { return (x ^ (x>>8) ^ (x>>16) ^ (x>>24) ^ (x>>32) ^ (x>>40) ^ (x>>48) ^ (x>>56))&0xff; } static void measure_bandwidths (void) { measure_bandwidth("crc32 ", c=crc32(0, buf, N)); measure_bandwidth("sum32 ", c=sum32(0, buf, N)); measure_bandwidth("murmur ", c=MurmurHash2(buf, N)); measure_bandwidth("murmurf ", ({ struct murmur mm; murmur_init(&mm); murmur_add(&mm, buf, N); c=murmur_finish(&mm); })); measure_bandwidth("sum84 ", ({ struct sum84 s; sum84_init(&s); sum84_add(&s, buf, N); c=sum84_finish(&s); })); measure_bandwidth("xor32 ", ({ c=0; int j; for(j=0; j