int numberOfSetBits(uint32_t i){// Java: use int, and use >>> instead of >>. Or use Integer.bitCount()// C or C++: use uint32_ti = i - ((i >> 1) & 0x55555555); // add pairs of bitsi = (i & 0x33333333) + ((i >> 2) & 0x33333333); // quadsi = (i + (i >> 4)) & 0x0F0F0F0F; // groups of 8return (i * 0x01010101) >> 24; // horizontal sum of bytes}
这个的64位版本可以使用0x01010101010101乘法器在64位整数中执行8 x 8位元素,并使用>>56提取高字节。因此它不需要任何额外的步骤,只需更宽的常量。这就是GCC在x86系统上未启用硬件popcnt指令时用于__builtin_popcountll的内容。如果您可以为此使用内置或内在函数,请这样做以使编译器有机会进行特定于目标的优化。
#include <bitset>#include <limits>#include <type_traits>
template<typename T>//static inline // static if you want to compile with -mpopcnt in one compilation unit but not otherstypename std::enable_if<std::is_integral<T>::value, unsigned >::typepopcount(T x){static_assert(std::numeric_limits<T>::radix == 2, "non-binary type");
// sizeof(x)*CHAR_BITconstexpr int bitwidth = std::numeric_limits<T>::digits + std::numeric_limits<T>::is_signed;// std::bitset constructor was only unsigned long before C++11. Beware if porting to C++03static_assert(bitwidth <= std::numeric_limits<unsigned long long>::digits, "arg too wide for std::bitset() constructor");
typedef typename std::make_unsigned<T>::type UT; // probably not needed, bitset width chops after sign-extension
std::bitset<bitwidth> bs( static_cast<UT>(x) );return bs.count();}
# clang 11bar(unsigned int): # @bar(unsigned int)popcnt eax, edicmove eax, edi # redundant: if popcnt result is 0, return the original 0 instead of the popcnt-generated 0...ret
但是GCC编译得很好:
# gcc 10xor eax, eax # break false dependency on Intel SnB-family before Ice Lake.popcnt eax, ediret
static final int[] BIT_COUNT = { 0, 1, 1, ... 256 values with a bitsize of a byte ... };static int bitCountOfByte( int value ){return BIT_COUNT[ value & 0xFF ];}
static int bitCountOfInt( int value ){return bitCountOfByte( value )+ bitCountOfByte( value >> 8 )+ bitCountOfByte( value >> 16 )+ bitCountOfByte( value >> 24 );}
static unsigned char wordbits[65536] = { bitcounts of ints between 0 and 65535 };static int popcount( unsigned int i ){return( wordbits[i&0xFFFF] + wordbits[i>>16] );}
Integer.highestOneBit(n);Integer.lowestOneBit(n);Integer.numberOfLeadingZeros(n);Integer.numberOfTrailingZeros(n);
//Beginning with the value 1, rotate left 16 timesn = 1;for (int i = 0; i < 16; i++) {n = Integer.rotateLeft(n, 1);System.out.println(n);}
#ifndef _BITCOUNT_H_#define _BITCOUNT_H_
/* Return the Hamming Wieght of val, i.e. the number of 'on' bits. */int bitcount( unsigned int );
/* List of available bitcount algorithms.* onTheFly: Calculate the bitcount on demand.** lookupTalbe: Uses a small lookup table to determine the bitcount. This* method is on average 3 times as fast as onTheFly, but incurs a small* upfront cost to initialize the lookup table on the first call.** strategyCount is just a placeholder.*/enum strategy { onTheFly, lookupTable, strategyCount };
/* String represenations of the algorithm names */extern const char *strategyNames[];
/* Choose which bitcount algorithm to use. */void setStrategy( enum strategy );
#endif
.
#include <limits.h>
#include "bitcount.h"
/* The number of entries needed in the table is equal to the number of unique* values a char can represent which is always UCHAR_MAX + 1*/static unsigned char _bitCountTable[UCHAR_MAX + 1];static unsigned int _lookupTableInitialized = 0;
static int _defaultBitCount( unsigned int val ) {int count;
/* Starting with:* 1100 - 1 == 1011, 1100 & 1011 == 1000* 1000 - 1 == 0111, 1000 & 0111 == 0000*/for ( count = 0; val; ++count )val &= val - 1;
return count;}
/* Looks up each byte of the integer in a lookup table.** The first time the function is called it initializes the lookup table.*/static int _tableBitCount( unsigned int val ) {int bCount = 0;
if ( !_lookupTableInitialized ) {unsigned int i;for ( i = 0; i != UCHAR_MAX + 1; ++i )_bitCountTable[i] =( unsigned char )_defaultBitCount( i );
_lookupTableInitialized = 1;}
for ( ; val; val >>= CHAR_BIT )bCount += _bitCountTable[val & UCHAR_MAX];
return bCount;}
static int ( *_bitcount ) ( unsigned int ) = _defaultBitCount;
const char *strategyNames[] = { "onTheFly", "lookupTable" };
void setStrategy( enum strategy s ) {switch ( s ) {case onTheFly:_bitcount = _defaultBitCount;break;case lookupTable:_bitcount = _tableBitCount;break;case strategyCount:break;}}
/* Just a forwarding function which will call whichever version of the* algorithm has been selected by the client*/int bitcount( unsigned int val ) {return _bitcount( val );}
#ifdef _BITCOUNT_EXE_
#include <stdio.h>#include <stdlib.h>#include <time.h>
/* Use the same sequence of pseudo random numbers to benmark each Hamming* Weight algorithm.*/void benchmark( int reps ) {clock_t start, stop;int i, j;static const int iterations = 1000000;
for ( j = 0; j != strategyCount; ++j ) {setStrategy( j );
srand( 257 );
start = clock( );
for ( i = 0; i != reps * iterations; ++i )bitcount( rand( ) );
stop = clock( );
printf( "\n\t%d psudoe-random integers using %s: %f seconds\n\n",reps * iterations, strategyNames[j],( double )( stop - start ) / CLOCKS_PER_SEC );}}
int main( void ) {int option;
while ( 1 ) {printf( "Menu Options\n""\t1.\tPrint the Hamming Weight of an Integer\n""\t2.\tBenchmark Hamming Weight implementations\n""\t3.\tExit ( or cntl-d )\n\n\t" );
if ( scanf( "%d", &option ) == EOF )break;
switch ( option ) {case 1:printf( "Please enter the integer: " );if ( scanf( "%d", &option ) != EOF )printf( "The Hamming Weight of %d ( 0x%X ) is %d\n\n",option, option, bitcount( option ) );break;case 2:printf( "Please select number of reps ( in millions ): " );if ( scanf( "%d", &option ) != EOF )benchmark( option );break;case 3:goto EXIT;break;default:printf( "Invalid option\n" );}
}
EXIT:printf( "\n" );
return 0;}
#endif
unsigned int v; // count the number of bits set in vunsigned int c; // c accumulates the total bits set in v
// option 1, for at most 14-bit values in v:c = (v * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
// option 2, for at most 24-bit values in v:c = ((v & 0xfff) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;c += (((v & 0xfff000) >> 12) * 0x1001001001001ULL & 0x84210842108421ULL)% 0x1f;
// option 3, for at most 32-bit values in v:c = ((v & 0xfff) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;c += (((v & 0xfff000) >> 12) * 0x1001001001001ULL & 0x84210842108421ULL) %0x1f;c += ((v >> 24) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;
// recursive template to sum bits in an inttemplate <int BITS>int countBits(int val) {// return the least significant bit plus the result of calling ourselves with// .. the shifted valuereturn (val & 0x1) + countBits<BITS-1>(val >> 1);}
// template specialisation to terminate the recursion when there's only one bit lefttemplate<>int countBits<1>(int val) {return val & 0x1;}
使用将是:
// to count bits in a byte/char (this returns 8)countBits<8>( 255 )
// another byte (this returns 7)countBits<8>( 254 )
// counting bits in a word/short (this returns 1)countBits<16>( 256 )
#!/user/local/bin/perl
$c=0x11BBBBAB;$count=0;$m=0x00000001;for($i=0;$i<32;$i++){$f=$c & $m;if($f == 1){$count++;}$c=$c >> 1;}printf("%d",$count);
ive done it through a perl script. the number taken is $c=0x11BBBBABB=3 1sA=2 1sso in total1+1+3+3+3+2+3+3=19
int countSetBits(int n){n=((n&0xAAAAAAAA)>>1) + (n&0x55555555);n=((n&0xCCCCCCCC)>>2) + (n&0x33333333);n=((n&0xF0F0F0F0)>>4) + (n&0x0F0F0F0F);n=((n&0xFF00FF00)>>8) + (n&0x00FF00FF);return n;}
int main(){int n=10;printf("Number of set bits: %d",countSetBits(n));return 0;}
int popcount(int v) {v = v - ((v >> 1) & 0x55555555); // put count of each 2 bits into those 2 bitsv = (v & 0x33333333) + ((v >> 2) & 0x33333333); // put count of each 4 bits into those 4 bitsreturn ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;}
它之所以有效,是因为您可以通过将其分成两半来计算设置位的总数,计算两半中设置位的数量,然后将它们相加。也称为Divide and Conquer范式。让我们详细介绍一下…
v = v - ((v >> 1) & 0x55555555);
两位中的位数可以是0b00、0b01或0b10。让我们尝试在2位上计算出来。
---------------------------------------------| v | (v >> 1) & 0b0101 | v - x |---------------------------------------------0b00 0b00 0b000b01 0b00 0b010b10 0b01 0b010b11 0b01 0b10
package countSetBitsInAnInteger;
import java.util.Scanner;
public class UsingLoop {
public static void main(String[] args) {Scanner in = new Scanner(System.in);try {System.out.println("Enter a integer number to check for set bits in it");int n = in.nextInt();System.out.println("Using while loop, we get the number of set bits as: " + usingLoop(n));System.out.println("Using Brain Kernighan's Algorithm, we get the number of set bits as: " + usingBrainKernighan(n));System.out.println("Using ");}finally {in.close();}}
private static int usingBrainKernighan(int n) {int count = 0;while(n > 0) {n& = (n-1);count++;}return count;}
/*Analysis:Time complexity = O(lgn)Space complexity = O(1)*/
private static int usingLoop(int n) {int count = 0;for(int i=0; i<32; i++) {if((n&(1 << i)) != 0)count++;}return count;}
/*Analysis:Time Complexity = O(32) // Maybe the complexity is O(lgn)Space Complexity = O(1)*/}
#include <stdio.h>int countBits(int x){int n = 0;if (x) do n++; /* Totally Normal Valid code. */while(x=x&(x-1)); /* Nothing to see here. */return n;}
int main(void) {printf("%d\n", countBits(25));return 0;}
输出:
3
如果你想为了清晰而重写它,它看起来像:
if (x){do{n++;} while(x=x&(x-1));}
但在我看来,这似乎有些过分。
然而,我也意识到这个函数可以变得更短,但也许更神秘,写成:
int countBits(int x){int n = 0;while (x) x=(n++,x&(x-1));return n;}
#include <smmintrin.h>#include <stdint.h>
const __m128i Z = _mm_set1_epi8(0x0);const __m128i F = _mm_set1_epi8(0xF);//Vector with pre-calculated bit count:const __m128i T = _mm_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
uint64_t BitCount(const uint8_t * src, size_t size){__m128i _sum = _mm128_setzero_si128();for (size_t i = 0; i < size; i += 16){//load 16-byte vector__m128i _src = _mm_loadu_si128((__m128i*)(src + i));//get low 4 bit for every byte in vector__m128i lo = _mm_and_si128(_src, F);//sum precalculated value from T_sum = _mm_add_epi64(_sum, _mm_sad_epu8(Z, _mm_shuffle_epi8(T, lo)));//get high 4 bit for every byte in vector__m128i hi = _mm_and_si128(_mm_srli_epi16(_src, 4), F);//sum precalculated value from T_sum = _mm_add_epi64(_sum, _mm_sad_epu8(Z, _mm_shuffle_epi8(T, hi)));}uint64_t sum[2];_mm_storeu_si128((__m128i*)sum, _sum);return sum[0] + sum[1];}
AVX2版本:
#include <immintrin.h>#include <stdint.h>
const __m256i Z = _mm256_set1_epi8(0x0);const __m256i F = _mm256_set1_epi8(0xF);//Vector with pre-calculated bit count:const __m256i T = _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
uint64_t BitCount(const uint8_t * src, size_t size){__m256i _sum = _mm256_setzero_si256();for (size_t i = 0; i < size; i += 32){//load 32-byte vector__m256i _src = _mm256_loadu_si256((__m256i*)(src + i));//get low 4 bit for every byte in vector__m256i lo = _mm256_and_si256(_src, F);//sum precalculated value from T_sum = _mm256_add_epi64(_sum, _mm256_sad_epu8(Z, _mm256_shuffle_epi8(T, lo)));//get high 4 bit for every byte in vector__m256i hi = _mm256_and_si256(_mm256_srli_epi16(_src, 4), F);//sum precalculated value from T_sum = _mm256_add_epi64(_sum, _mm256_sad_epu8(Z, _mm256_shuffle_epi8(T, hi)));}uint64_t sum[4];_mm256_storeu_si256((__m256i*)sum, _sum);return sum[0] + sum[1] + sum[2] + sum[3];}
int countSetBits(unsigned int n) {unsigned int n; // count the number of bits set in nunsigned int c; // c accumulates the total bits set in nfor (c=0;n>0;n=n&(n-1)) c++;return c;}
发表于1988年的C编程语言第二版(Brian W. Kernighan和Dennis M. Ritchie)在练习2-9中提到了这一点。2006年4月19日,Don Knuth向我指出,这种方法“最初由Peter Wegner在CACM 3(1960),322中发表。(也由Derrick Lehmer独立发现,并于1964年发表在Beckenbach编辑的一本书中。)”
private int get_bits_set(int v){int c; // 'c' accumulates the total bits set in 'v'for (c = 0; v>0; c++){v &= v - 1; // Clear the least significant bit set}return c;}
08 bit Builtin 8.2 ns08 bit Parallel 8.2 ns08 bit Kernighan 26.7 ns
16 bit Builtin 7.7 ns16 bit Parallel 7.7 ns16 bit Kernighan 39.7 ns
32 bit Builtin 7.0 ns32 bit Parallel 7.0 ns32 bit Kernighan 47.9 ns
64 bit Builtin 7.5 ns64 bit Parallel 7.5 ns64 bit Kernighan 59.4 ns
128 bit Builtin 7.8 ns128 bit Parallel 13.8 ns128 bit Kernighan 127.6 ns
fun NumberOfSetBits(i: Int): Int {var i = ii -= (i ushr 1 and 0x55555555)i = (i and 0x33333333) + (i ushr 2 and 0x33333333)return (i + (i ushr 4) and 0x0F0F0F0F) * 0x01010101 ushr 24}
fun NumberOfSetBits(i: Int): Int {return i.countOneBits()}
在引擎盖下,它使用Integer.bitCount,如下所示:
@SinceKotlin("1.4")@WasExperimental(ExperimentalStdlibApi::class)@kotlin.internal.InlineOnlypublic actual inline fun Int.countOneBits(): Int = Integer.bitCount(this)
int countSet(unsigned int n){int res=0;while(n!=0){res += (n&1);n >>= 1; // logical right shift, like C unsigned or Java >>>}return res;}
Brian Kerningam的算法
时间复杂度为O(n中没有设置位)
int countSet(unsigned int n){int res=0;while(n != 0){n = (n & (n-1));res++;}return res;}
32位数字的查找表方法-在这个方法中,我们将32位数字分成四个8位数字的块
时间复杂度为O(1)
static unsigned char table[256]; /* the table size is 256,the number of values i&0xFF (8 bits) can have */
void initialize() //holds the number of set bits from 0 to 255{table[0]=0;for(unsigned int i=1;i<256;i++)table[i]=(i&1)+table[i>>1];}
int countSet(unsigned int n){// 0xff is hexadecimal representation of 8 set bits.int res=table[n & 0xff];n=n>>8;res=res+ table[n & 0xff];n=n>>8;res=res+ table[n & 0xff];n=n>>8;res=res+ table[n & 0xff];return res;}
#include <stdint.h>#include <limits>#include <type_traits>
const constexpr uint32_t uint32_max = (std::numeric_limits<uint32_t>::max)();
namespace detail{template <typename T>inline constexpr T _count_bits_0(const T & v){return v - ((v >> 1) & 0x55555555);}
template <typename T>inline constexpr T _count_bits_1(const T & v){return (v & 0x33333333) + ((v >> 2) & 0x33333333);}
template <typename T>inline constexpr T _count_bits_2(const T & v){return (v + (v >> 4)) & 0x0F0F0F0F;}
template <typename T>inline constexpr T _count_bits_3(const T & v){return v + (v >> 8);}
template <typename T>inline constexpr T _count_bits_4(const T & v){return v + (v >> 16);}
template <typename T>inline constexpr T _count_bits_5(const T & v){return v & 0x0000003F;}
template <typename T, bool greater_than_uint32>struct _impl{static inline constexpr T _count_bits_with_shift(const T & v){returndetail::_count_bits_5(detail::_count_bits_4(detail::_count_bits_3(detail::_count_bits_2(detail::_count_bits_1(detail::_count_bits_0(v)))))) + count_bits(v >> 32);}};
template <typename T>struct _impl<T, false>{static inline constexpr T _count_bits_with_shift(const T & v){return 0;}};}
template <typename T>inline constexpr T count_bits(const T & v){static_assert(std::is_integral<T>::value, "type T must be an integer");static_assert(!std::is_signed<T>::value, "type T must be not signed");
return uint32_max >= v ?detail::_count_bits_5(detail::_count_bits_4(detail::_count_bits_3(detail::_count_bits_2(detail::_count_bits_1(detail::_count_bits_0(v)))))) :detail::_impl<T, sizeof(uint32_t) < sizeof(v)>::_count_bits_with_shift(v);}
Google测试库中的附加测试:
#include <stdlib.h>#include <time.h>
namespace {template <typename T>inline uint32_t _test_count_bits(const T & v){uint32_t count = 0;T n = v;while (n > 0) {if (n % 2) {count += 1;}n /= 2;}return count;}}
TEST(FunctionsTest, random_count_bits_uint32_100K){srand(uint_t(time(NULL)));for (uint32_t i = 0; i < 100000; i++) {const uint32_t r = uint32_t(rand()) + (uint32_t(rand()) << 16);ASSERT_EQ(_test_count_bits(r), count_bits(r));}}
TEST(FunctionsTest, random_count_bits_uint64_100K){srand(uint_t(time(NULL)));for (uint32_t i = 0; i < 100000; i++) {const uint64_t r = uint64_t(rand()) + (uint64_t(rand()) << 16) + (uint64_t(rand()) << 32) + (uint64_t(rand()) << 48);ASSERT_EQ(_test_count_bits(r), count_bits(r));}}