#include #include #include #define BUFSIZE (8*1048576) /* bufsize in bytes: 8 megs of junk */ #define ITERATIONS 300 /* count the bits in a bunch of junk and time it */ #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255) #define BX_(x) ((x) - (((x)>>1)&0x77777777) \ - (((x)>>2)&0x33333333) \ - (((x)>>3)&0x11111111)) unsigned long get_usecs(void) { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec*1000000+tv.tv_usec; } static inline int popcount_fbsd1(unsigned *buf, int n) { int cnt=0; do { unsigned m = *buf++; m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1); m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2); m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4); m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8); m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16); cnt += m; } while(--n); return cnt; } static inline int popcount_fbsd2(unsigned *buf, int n) { int cnt=0; do { unsigned v = *buf++; v -= ((v >> 1) & 0x55555555); v = (v & 0x33333333) + ((v >> 2) & 0x33333333); v = (v + (v >> 4)) & 0x0F0F0F0F; v = (v * 0x01010101) >> 24; cnt += v; } while(--n); return cnt; } unsigned char lut[65536]; static inline int popcount_lut16(unsigned *buf, int n) { int cnt=0; do { cnt += lut[(*buf)&65535]; cnt += lut[(*buf)>>16]; buf++; } while(--n); return cnt; } static inline int popcount_lut8(unsigned *buf, int n) { int cnt=0; do { cnt += lut[(*buf)&255]; cnt += lut[((*buf)>>8)&255]; cnt += lut[((*buf)>>16)&255]; cnt += lut[((*buf)>>24)&255]; buf++; } while(--n); return cnt; } void init_lut(void) { int i; for(i=0;i<65536;i++) { lut[i] = BITCOUNT(i); } } #define ROLADC8 __asm__("rolb %%al; "\ "adcl $0,%1;": "=a"(c), "=r"(cnt) : "0"(c), "1"(cnt)) static inline int popcount_roladc8(unsigned char *buf, int n) { int cnt=0; do { char c = *buf++; ROLADC8; ROLADC8; ROLADC8; ROLADC8; ROLADC8; ROLADC8; ROLADC8; ROLADC8; } while(--n); return cnt; } static inline int popcount_roladc32(unsigned *buf, int n) { int cnt=0; do { unsigned c = *buf++; #define ROLADC32 __asm__("roll %0; "\ "adcl $0,%1;": "=r"(c), "=r"(cnt) : "0"(c), "1"(cnt)) ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; ROLADC32; } while(--n); return cnt; } #define MAKETEST(fn_def, ptr_arg_t, fn_call, diag_string) \ void fn_def(void *buf, int n) { int i,cnt; unsigned long t0=get_usecs(), t1; \ for(i=0;i