URI: 
                   ,    _
                  /|   | |
                _/_\_  >_<
               .-\-/.   |
              /  | | \_ |
              \ \| |\__(/
              /(`---')  |
             / /     \  |
          _.'  \'-'  /  |
          `----'`=-='   '    hjw
          C Thaumaturgy Center
       
       You like magic?
       
       
       Compute modulus division by (1 << s) - 1 in parallel without a
       division operator
       
       // The following is for a word size of 32 bits!
       static const unsigned int M[] =
       {
       0x00000000, 0x55555555, 0x33333333, 0xc71c71c7,
       0x0f0f0f0f, 0xc1f07c1f, 0x3f03f03f, 0xf01fc07f,
       0x00ff00ff, 0x07fc01ff, 0x3ff003ff, 0xffc007ff,
       0xff000fff, 0xfc001fff, 0xf0003fff, 0xc0007fff,
       0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff,
       0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff,
       0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff,
       0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff
       };
       static const unsigned int Q[][6] =
       {
       { 0, 0, 0, 0, 0, 0}, {16, 8, 4, 2, 1, 1}, {16, 8, 4, 2, 2, 2},
       {15, 6, 3, 3, 3, 3}, {16, 8, 4, 4, 4, 4}, {15, 5, 5, 5, 5, 5},
       {12, 6, 6, 6, 6, 6}, {14, 7, 7, 7, 7, 7}, {16, 8, 8, 8, 8, 8},
       { 9, 9, 9, 9, 9, 9}, {10, 10, 10, 10, 10, 10}, {11, 11, 11, 11, 11,
       11},
       {12, 12, 12, 12, 12, 12}, {13, 13, 13, 13, 13, 13}, {14, 14, 14, 14,
       14, 14},
       {15, 15, 15, 15, 15, 15}, {16, 16, 16, 16, 16, 16}, {17, 17, 17, 17,
       17, 17},
       {18, 18, 18, 18, 18, 18}, {19, 19, 19, 19, 19, 19}, {20, 20, 20, 20,
       20, 20},
       {21, 21, 21, 21, 21, 21}, {22, 22, 22, 22, 22, 22}, {23, 23, 23, 23,
       23, 23},
       {24, 24, 24, 24, 24, 24}, {25, 25, 25, 25, 25, 25}, {26, 26, 26, 26,
       26, 26},
       {27, 27, 27, 27, 27, 27}, {28, 28, 28, 28, 28, 28}, {29, 29, 29, 29,
       29, 29},
       {30, 30, 30, 30, 30, 30}, {31, 31, 31, 31, 31, 31}
       };
       static const unsigned int R[][6] =
       {
       {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
       0x00000000},
       {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000001,
       0x00000001},
       {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000003,
       0x00000003},
       {0x00007fff, 0x0000003f, 0x00000007, 0x00000007, 0x00000007,
       0x00000007},
       {0x0000ffff, 0x000000ff, 0x0000000f, 0x0000000f, 0x0000000f,
       0x0000000f},
       {0x00007fff, 0x0000001f, 0x0000001f, 0x0000001f, 0x0000001f,
       0x0000001f},
       {0x00000fff, 0x0000003f, 0x0000003f, 0x0000003f, 0x0000003f,
       0x0000003f},
       {0x00003fff, 0x0000007f, 0x0000007f, 0x0000007f, 0x0000007f,
       0x0000007f},
       {0x0000ffff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff,
       0x000000ff},
       {0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff,
       0x000001ff},
       {0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff,
       0x000003ff},
       {0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff,
       0x000007ff},
       {0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff,
       0x00000fff},
       {0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff,
       0x00001fff},
       {0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff,
       0x00003fff},
       {0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff,
       0x00007fff},
       {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff,
       0x0000ffff},
       {0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff,
       0x0001ffff},
       {0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff,
       0x0003ffff},
       {0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff,
       0x0007ffff},
       {0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff,
       0x000fffff},
       {0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff,
       0x001fffff},
       {0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff,
       0x003fffff},
       {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff,
       0x007fffff},
       {0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff,
       0x00ffffff},
       {0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff,
       0x01ffffff},
       {0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff,
       0x03ffffff},
       {0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff,
       0x07ffffff},
       {0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff,
       0x0fffffff},
       {0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff,
       0x1fffffff},
       {0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff,
       0x3fffffff},
       {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff,
       0x7fffffff}
       };
       unsigned int n; // numerator
       const unsigned int s; // s> 0
       const unsigned int d = (1 << s) - 1; // so d is either 1, 3, 7, 15,
       31,...).
       unsigned int m; // n % d goes here.
       m = (n & M[s]) + ((n>> s) & M[s]);
       for (const unsigned int * q = &Q[s][0], * r = &R[s][0]; m> d; q++,
       r++)
       {
       m = (m>> *q) + (m & *r);
       }
       m = m == d? 0: m; // OR, less portably: m = m & -((signed)(m - d)>>
       s);
       
       This method of finding modulus division by an integer that is one less
       than a power of 2 takes at most O(lg(N)) time, where N is the number
       of bits in the numerator (32 bits, for the code above). The number of
       operations is at most 12 + 9 * ceil(lg(N)). The tables may be removed
       if you know the denominator at compile time; just extract the few
       relevent entries and unroll the loop. It may be easily extended to
       more bits.
       
       It finds the result by summing the values in base (1 << s) in
       parallel. First every other base (1 << s) value is added to the
       previous one. Imagine that the result is written on a piece of paper.
       Cut the paper in half, so that half the values are on each cut piece.
       Align the values and sum them onto a new piece of paper. Repeat by
       cutting this paper in half (which will be a quarter of the size of the
       previous one) and summing, until you cannot cut further. After
       performing lg(N/s/2) cuts, we cut no more; just continue to add the
       values and put the result onto a new piece of paper as before, while
       there are at least two s-bit values.
       
       Devised by Sean Anderson, August 20, 2001. A typo was spotted by Randy
       E. Bryant on May 3, 2005 (after pasting the code, I had later added
       "unsinged" to a variable declaration). As in the previous hack, I
       mistakenly commented that we could alternatively assign m = ((m + 1) &
       d) - 1; at the end, and Don Knuth corrected me on April 19, 2006 and
       suggested m = m & -((signed)(m - d)>> s). On June 18, 2009 Sean Irvine
       proposed a change that used ((n>> s) & M[s]) instead of ((n & ~M[s])>>
       s), which typically requires fewer operations because the M[s]
       constant is already loaded.
       
       
  TEXT Get the full database.
  HTML Got some C magic? Send the spell source to 20h@r-36.net
  TEXT Problems reading C?
   DIR << back to bitreich.org