[GFS2] Faster gfs2_bitfit algorithm
authorBob Peterson <rpeterso@redhat.com>
Mon, 10 Mar 2008 23:17:47 +0000 (18:17 -0500)
committerSteven Whitehouse <swhiteho@redhat.com>
Mon, 31 Mar 2008 09:41:39 +0000 (10:41 +0100)
This version of the gfs2_bitfit algorithm includes the latest
suggestions from Steve Whitehouse.  It is typically eight to
ten times faster than the version we're using today.  If there
is a lot of metadata mixed in (lots of small files) the
algorithm is often 15 times faster, and given the right
conditions, I've seen peaks of 20 times faster.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
fs/gfs2/rgrp.c

index 4291375cecc6e5f4700acbba719a7d1ead1ed6cf..7e8f0b1d6c6ea2d39cb18811a7762c1b8e15ee6c 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/prefetch.h>
 
 #include "gfs2.h"
 #include "incore.h"
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+#if BITS_PER_LONG == 32
+#define LBITMASK   (0x55555555UL)
+#define LBITSKIP55 (0x55555555UL)
+#define LBITSKIP00 (0x00000000UL)
+#else
+#define LBITMASK   (0x5555555555555555UL)
+#define LBITSKIP55 (0x5555555555555555UL)
+#define LBITSKIP00 (0x0000000000000000UL)
+#endif
+
 /*
  * These routines are used by the resource group routines (rgrp.c)
  * to keep track of block allocation.  Each block is represented by two
@@ -138,45 +149,63 @@ static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd,
 static u32 gfs2_bitfit(const u8 *buffer, unsigned int buflen, u32 goal,
                       u8 old_state)
 {
-       const u8 *byte;
-       u32 blk = goal;
-       unsigned int bit, bitlong;
-       const unsigned long *plong;
-#if BITS_PER_LONG == 32
-       const unsigned long plong55 = 0x55555555;
-#else
-       const unsigned long plong55 = 0x5555555555555555;
-#endif
-
-       byte = buffer + (goal / GFS2_NBBY);
-       plong = (const unsigned long *)(buffer + (goal / GFS2_NBBY));
-       bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
-       bitlong = bit;
-
-       while (byte < buffer + buflen) {
-
-               if (bitlong == 0 && old_state == 0 && *plong == plong55) {
-                       plong++;
-                       byte += sizeof(unsigned long);
-                       blk += sizeof(unsigned long) * GFS2_NBBY;
-                       continue;
+       const u8 *byte, *start, *end;
+       int bit, startbit;
+       u32 g1, g2, misaligned;
+       unsigned long *plong;
+       unsigned long lskipval;
+
+       lskipval = (old_state & GFS2_BLKST_USED) ? LBITSKIP00 : LBITSKIP55;
+       g1 = (goal / GFS2_NBBY);
+       start = buffer + g1;
+       byte = start;
+        end = buffer + buflen;
+       g2 = ALIGN(g1, sizeof(unsigned long));
+       plong = (unsigned long *)(buffer + g2);
+       startbit = bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE;
+       misaligned = g2 - g1;
+       if (!misaligned)
+               goto ulong_aligned;
+/* parse the bitmap a byte at a time */
+misaligned:
+       while (byte < end) {
+               if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) {
+                       return goal +
+                               (((byte - start) * GFS2_NBBY) +
+                                ((bit - startbit) >> 1));
                }
-               if (((*byte >> bit) & GFS2_BIT_MASK) == old_state)
-                       return blk;
                bit += GFS2_BIT_SIZE;
-               if (bit >= 8) {
+               if (bit >= GFS2_NBBY * GFS2_BIT_SIZE) {
                        bit = 0;
                        byte++;
+                       misaligned--;
+                       if (!misaligned) {
+                               plong = (unsigned long *)byte;
+                               goto ulong_aligned;
+                       }
                }
-               bitlong += GFS2_BIT_SIZE;
-               if (bitlong >= sizeof(unsigned long) * 8) {
-                       bitlong = 0;
-                       plong++;
-               }
-
-               blk++;
        }
+       return BFITNOENT;
 
+/* parse the bitmap a unsigned long at a time */
+ulong_aligned:
+       /* Stop at "end - 1" or else prefetch can go past the end and segfault.
+          We could "if" it but we'd lose some of the performance gained.
+          This way will only slow down searching the very last 4/8 bytes
+          depending on architecture.  I've experimented with several ways
+          of writing this section such as using an else before the goto
+          but this one seems to be the fastest. */
+       while ((unsigned char *)plong < end - 1) {
+               prefetch(plong + 1);
+               if (((*plong) & LBITMASK) != lskipval)
+                       break;
+               plong++;
+       }
+       if ((unsigned char *)plong < end) {
+               byte = (const u8 *)plong;
+               misaligned += sizeof(unsigned long) - 1;
+               goto misaligned;
+       }
        return BFITNOENT;
 }