Summary:
Wrote an SSE4.2-optimized version of find_first_of (>10x faster in
some cases). For cases where SSE4.2 is not supported, rewrote
find_first_of to use Aho/Hopcroft/Ullman's "sparse, lazy" set (which
is faster than std::find_first_of in most cases).
Note that the overhead of ifunc (especially the lack of inlining)
means that the new implementations could be slightly slower for
super-tiny strings, but the inflection point is around 3-4 characters
in haystack, which seems reasonable.
Test Plan:
Added tests and benchmarks:
string length 1:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 5.91ns 169.16M
FindSingleCharRange 130.02% 4.55ns 219.95M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 11.37ns 87.98M
FindFirstOf2NeedlesNoSSE 108.69% 10.46ns 95.63M
FindFirstOf2NeedlesStd 147.04% 7.73ns 129.37M
FindFirstOf2NeedlesMemchr 57.66% 19.71ns 50.73M
FindFirstOf2NeedlesByteSet 83.32% 13.64ns 73.30M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 10.91ns 91.64M
FindFirstOf4NeedlesNoSSE 88.87% 12.28ns 81.45M
FindFirstOf4NeedlesStd 114.28% 9.55ns 104.73M
FindFirstOf4NeedlesMemchr 34.77% 31.38ns 31.87M
FindFirstOf4NeedlesByteSet 60.00% 18.19ns 54.98M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 10.91ns 91.64M
FindFirstOf8NeedlesNoSSE 48.00% 22.73ns 43.99M
FindFirstOf8NeedlesStd 54.54% 20.01ns 49.99M
FindFirstOf8NeedlesMemchr 16.27% 67.06ns 14.91M
FindFirstOf8NeedlesByteSet 39.99% 27.28ns 36.65M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 10.91ns 91.64M
FindFirstOf16NeedlesNoSSE 33.33% 32.74ns 30.54M
FindFirstOf16NeedlesStd 36.36% 30.01ns 33.32M
FindFirstOf16NeedlesMemchr 10.25% 106.42ns 9.40M
FindFirstOf16NeedlesByteSet 24.00% 45.46ns 22.00M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 18.91ns 52.89M
FindFirstOf32NeedlesNoSSE 21.00% 90.02ns 11.11M
FindFirstOf32NeedlesStd 39.99% 47.28ns 21.15M
FindFirstOf32NeedlesMemchr 8.48% 223.04ns 4.48M
FindFirstOf32NeedlesByteSet 22.35% 84.60ns 11.82M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 25.92ns 38.58M
FindFirstOf64NeedlesNoSSE 17.45% 148.51ns 6.73M
FindFirstOf64NeedlesStd 33.93% 76.39ns 13.09M
FindFirstOf64NeedlesMemchr 6.07% 426.94ns 2.34M
FindFirstOf64NeedlesByteSet 18.10% 143.22ns 6.98M
----------------------------------------------------------------------------
FindFirstOfRandomBase 23.28ns 42.95M
FindFirstOfRandomNoSSE 88.96% 26.17ns 38.21M
FindFirstOfRandomStd 112.78% 20.64ns 48.44M
FindFirstOfRandomMemchr 35.68% 65.24ns 15.33M
FindFirstOfRandomByteSet 62.62% 37.18ns 26.90M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 12.73ns 78.54M
----------------------------------------------------------------------------
============================================================================
string length 8:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 7.05ns 141.75M
FindSingleCharRange 50.05% 14.10ns 70.95M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 11.37ns 87.98M
FindFirstOf2NeedlesNoSSE 53.04% 21.43ns 46.67M
FindFirstOf2NeedlesStd 37.87% 30.01ns 33.32M
FindFirstOf2NeedlesMemchr 54.81% 20.74ns 48.22M
FindFirstOf2NeedlesByteSet 33.78% 33.65ns 29.72M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 10.91ns 91.64M
FindFirstOf4NeedlesNoSSE 25.53% 42.74ns 23.40M
FindFirstOf4NeedlesStd 24.49% 44.56ns 22.44M
FindFirstOf4NeedlesMemchr 33.66% 32.42ns 30.85M
FindFirstOf4NeedlesByteSet 28.57% 38.19ns 26.18M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 10.91ns 91.64M
FindFirstOf8NeedlesNoSSE 21.05% 51.84ns 19.29M
FindFirstOf8NeedlesStd 13.56% 80.48ns 12.43M
FindFirstOf8NeedlesMemchr 17.32% 62.99ns 15.88M
FindFirstOf8NeedlesByteSet 23.08% 47.28ns 21.15M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 10.91ns 91.64M
FindFirstOf16NeedlesNoSSE 15.58% 70.02ns 14.28M
FindFirstOf16NeedlesStd 7.23% 150.84ns 6.63M
FindFirstOf16NeedlesMemchr 9.52% 114.63ns 8.72M
FindFirstOf16NeedlesByteSet 16.67% 65.47ns 15.27M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 18.91ns 52.89M
FindFirstOf32NeedlesNoSSE 18.42% 102.62ns 9.74M
FindFirstOf32NeedlesStd 7.08% 266.97ns 3.75M
FindFirstOf32NeedlesMemchr 8.43% 224.41ns 4.46M
FindFirstOf32NeedlesByteSet 19.29% 98.00ns 10.20M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 25.92ns 38.58M
FindFirstOf64NeedlesNoSSE 16.13% 160.73ns 6.22M
FindFirstOf64NeedlesStd 4.58% 565.53ns 1.77M
FindFirstOf64NeedlesMemchr 6.05% 428.22ns 2.34M
FindFirstOf64NeedlesByteSet 16.58% 156.33ns 6.40M
----------------------------------------------------------------------------
FindFirstOfRandomBase 23.28ns 42.96M
FindFirstOfRandomNoSSE 44.00% 52.91ns 18.90M
FindFirstOfRandomStd 24.62% 94.56ns 10.58M
FindFirstOfRandomMemchr 30.88% 75.38ns 13.27M
FindFirstOfRandomByteSet 43.33% 53.72ns 18.62M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 12.73ns 78.54M
----------------------------------------------------------------------------
============================================================================
string length 10:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 7.06ns 141.61M
FindSingleCharRange 41.98% 16.82ns 59.44M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 11.37ns 87.98M
FindFirstOf2NeedlesNoSSE 52.05% 21.84ns 45.79M
FindFirstOf2NeedlesStd 31.25% 36.37ns 27.49M
FindFirstOf2NeedlesMemchr 52.48% 21.66ns 46.17M
FindFirstOf2NeedlesByteSet 29.07% 39.10ns 25.57M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 10.91ns 91.64M
FindFirstOf4NeedlesNoSSE 28.93% 37.71ns 26.52M
FindFirstOf4NeedlesStd 20.00% 54.57ns 18.33M
FindFirstOf4NeedlesMemchr 30.39% 35.91ns 27.85M
FindFirstOf4NeedlesByteSet 25.00% 43.65ns 22.91M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 10.91ns 91.64M
FindFirstOf8NeedlesNoSSE 17.02% 64.12ns 15.60M
FindFirstOf8NeedlesStd 11.16% 97.77ns 10.23M
FindFirstOf8NeedlesMemchr 17.52% 62.30ns 16.05M
FindFirstOf8NeedlesByteSet 25.00% 43.65ns 22.91M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 10.91ns 91.64M
FindFirstOf16NeedlesNoSSE 16.28% 67.02ns 14.92M
FindFirstOf16NeedlesStd 5.98% 182.32ns 5.48M
FindFirstOf16NeedlesMemchr 9.09% 120.06ns 8.33M
FindFirstOf16NeedlesByteSet 17.65% 61.84ns 16.17M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 19.10ns 52.36M
FindFirstOf32NeedlesNoSSE 17.91% 106.63ns 9.38M
FindFirstOf32NeedlesStd 5.79% 329.70ns 3.03M
FindFirstOf32NeedlesMemchr 7.89% 241.91ns 4.13M
FindFirstOf32NeedlesByteSet 18.92% 100.95ns 9.91M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 26.15ns 38.24M
FindFirstOf64NeedlesNoSSE 15.84% 165.05ns 6.06M
FindFirstOf64NeedlesStd 3.71% 704.28ns 1.42M
FindFirstOf64NeedlesMemchr 5.49% 476.63ns 2.10M
FindFirstOf64NeedlesByteSet 16.48% 158.68ns 6.30M
----------------------------------------------------------------------------
FindFirstOfRandomBase 22.83ns 43.81M
FindFirstOfRandomNoSSE 43.25% 52.78ns 18.95M
FindFirstOfRandomStd 22.33% 102.23ns 9.78M
FindFirstOfRandomMemchr 31.61% 72.23ns 13.85M
FindFirstOfRandomByteSet 41.64% 54.82ns 18.24M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 12.73ns 78.54M
----------------------------------------------------------------------------
============================================================================
string length 16:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 7.06ns 141.72M
FindSingleCharRange 28.21% 25.01ns 39.98M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 15.91ns 62.84M
FindFirstOf2NeedlesNoSSE 72.89% 21.84ns 45.80M
FindFirstOf2NeedlesStd 28.68% 55.48ns 18.02M
FindFirstOf2NeedlesMemchr 74.47% 21.37ns 46.79M
FindFirstOf2NeedlesByteSet 23.34% 68.19ns 14.66M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 15.46ns 64.68M
FindFirstOf4NeedlesNoSSE 40.77% 37.92ns 26.37M
FindFirstOf4NeedlesStd 18.28% 84.59ns 11.82M
FindFirstOf4NeedlesMemchr 42.97% 35.97ns 27.80M
FindFirstOf4NeedlesByteSet 25.76% 60.02ns 16.66M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 15.46ns 64.68M
FindFirstOf8NeedlesNoSSE 24.03% 64.34ns 15.54M
FindFirstOf8NeedlesStd 9.74% 158.74ns 6.30M
FindFirstOf8NeedlesMemchr 24.55% 62.98ns 15.88M
FindFirstOf8NeedlesByteSet 28.33% 54.57ns 18.33M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 15.46ns 64.68M
FindFirstOf16NeedlesNoSSE 19.83% 77.98ns 12.82M
FindFirstOf16NeedlesStd 5.56% 277.82ns 3.60M
FindFirstOf16NeedlesMemchr 12.95% 119.35ns 8.38M
FindFirstOf16NeedlesByteSet 21.25% 72.75ns 13.75M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 32.80ns 30.49M
FindFirstOf32NeedlesNoSSE 27.86% 117.69ns 8.50M
FindFirstOf32NeedlesStd 6.33% 517.97ns 1.93M
FindFirstOf32NeedlesMemchr 13.72% 239.09ns 4.18M
FindFirstOf32NeedlesByteSet 29.06% 112.85ns 8.86M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 46.83ns 21.35M
FindFirstOf64NeedlesNoSSE 26.68% 175.50ns 5.70M
FindFirstOf64NeedlesStd 4.20% 1.11us 897.48K
FindFirstOf64NeedlesMemchr 10.04% 466.39ns 2.14M
FindFirstOf64NeedlesByteSet 27.47% 170.50ns 5.87M
----------------------------------------------------------------------------
FindFirstOfRandomBase 23.41ns 42.72M
FindFirstOfRandomNoSSE 38.00% 61.61ns 16.23M
FindFirstOfRandomStd 13.91% 168.34ns 5.94M
FindFirstOfRandomMemchr 29.03% 80.64ns 12.40M
FindFirstOfRandomByteSet 33.31% 70.28ns 14.23M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 15.12ns 66.15M
----------------------------------------------------------------------------
============================================================================
string length 32:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 8.23ns 121.52M
FindSingleCharRange 17.57% 46.83ns 21.35M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 20.46ns 48.88M
FindFirstOf2NeedlesNoSSE 82.29% 24.86ns 40.22M
FindFirstOf2NeedlesStd 17.69% 115.65ns 8.65M
FindFirstOf2NeedlesMemchr 85.17% 24.02ns 41.63M
FindFirstOf2NeedlesByteSet 28.19% 72.58ns 13.78M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 20.01ns 49.99M
FindFirstOf4NeedlesNoSSE 48.57% 41.19ns 24.28M
FindFirstOf4NeedlesStd 11.52% 173.72ns 5.76M
FindFirstOf4NeedlesMemchr 50.55% 39.58ns 25.27M
FindFirstOf4NeedlesByteSet 26.33% 75.99ns 13.16M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 20.01ns 49.99M
FindFirstOf8NeedlesNoSSE 26.94% 74.27ns 13.46M
FindFirstOf8NeedlesStd 6.73% 297.31ns 3.36M
FindFirstOf8NeedlesMemchr 27.44% 72.90ns 13.72M
FindFirstOf8NeedlesByteSet 23.91% 83.66ns 11.95M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 20.01ns 49.99M
FindFirstOf16NeedlesNoSSE 18.37% 108.92ns 9.18M
FindFirstOf16NeedlesStd 3.75% 532.80ns 1.88M
FindFirstOf16NeedlesMemchr 14.53% 137.71ns 7.26M
FindFirstOf16NeedlesByteSet 19.55% 102.32ns 9.77M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 45.92ns 21.78M
FindFirstOf32NeedlesNoSSE 31.17% 147.32ns 6.79M
FindFirstOf32NeedlesStd 4.50% 1.02us 980.43K
FindFirstOf32NeedlesMemchr 16.13% 284.64ns 3.51M
FindFirstOf32NeedlesByteSet 32.63% 140.73ns 7.11M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 68.20ns 14.66M
FindFirstOf64NeedlesNoSSE 29.97% 227.55ns 4.39M
FindFirstOf64NeedlesStd 3.08% 2.21us 452.08K
FindFirstOf64NeedlesMemchr 12.51% 545.17ns 1.83M
FindFirstOf64NeedlesByteSet 30.74% 221.86ns 4.51M
----------------------------------------------------------------------------
FindFirstOfRandomBase 29.99ns 33.35M
FindFirstOfRandomNoSSE 45.10% 66.49ns 15.04M
FindFirstOfRandomStd 10.28% 291.67ns 3.43M
FindFirstOfRandomMemchr 34.56% 86.76ns 11.53M
FindFirstOfRandomByteSet 28.64% 104.72ns 9.55M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 19.55ns 51.15M
----------------------------------------------------------------------------
============================================================================
string length 64:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 10.91ns 91.65M
FindSingleCharRange 13.26% 82.29ns 12.15M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 29.56ns 33.83M
FindFirstOf2NeedlesNoSSE 100.77% 29.33ns 34.09M
FindFirstOf2NeedlesStd 13.59% 217.44ns 4.60M
FindFirstOf2NeedlesMemchr 104.83% 28.19ns 35.47M
FindFirstOf2NeedlesByteSet 22.01% 134.28ns 7.45M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 29.10ns 34.36M
FindFirstOf4NeedlesNoSSE 56.14% 51.84ns 19.29M
FindFirstOf4NeedlesStd 8.72% 333.84ns 3.00M
FindFirstOf4NeedlesMemchr 58.18% 50.02ns 19.99M
FindFirstOf4NeedlesByteSet 19.73% 147.48ns 6.78M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 29.10ns 34.36M
FindFirstOf8NeedlesNoSSE 30.48% 95.48ns 10.47M
FindFirstOf8NeedlesStd 5.07% 573.76ns 1.74M
FindFirstOf8NeedlesMemchr 30.92% 94.11ns 10.63M
FindFirstOf8NeedlesByteSet 19.26% 151.13ns 6.62M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 29.10ns 34.36M
FindFirstOf16NeedlesNoSSE 15.84% 183.68ns 5.44M
FindFirstOf16NeedlesStd 2.79% 1.04us 959.63K
FindFirstOf16NeedlesMemchr 16.04% 181.41ns 5.51M
FindFirstOf16NeedlesByteSet 16.54% 175.95ns 5.68M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 73.21ns 13.66M
FindFirstOf32NeedlesNoSSE 32.76% 223.49ns 4.47M
FindFirstOf32NeedlesStd 3.62% 2.02us 494.08K
FindFirstOf32NeedlesMemchr 19.49% 375.70ns 2.66M
FindFirstOf32NeedlesByteSet 33.45% 218.87ns 4.57M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 109.95ns 9.09M
FindFirstOf64NeedlesNoSSE 38.99% 282.01ns 3.55M
FindFirstOf64NeedlesStd 2.49% 4.41us 226.78K
FindFirstOf64NeedlesMemchr 15.21% 723.03ns 1.38M
FindFirstOf64NeedlesByteSet 39.68% 277.13ns 3.61M
----------------------------------------------------------------------------
FindFirstOfRandomBase 40.57ns 24.65M
FindFirstOfRandomNoSSE 47.65% 85.15ns 11.74M
FindFirstOfRandomStd 7.62% 532.10ns 1.88M
FindFirstOfRandomMemchr 39.23% 103.43ns 9.67M
FindFirstOfRandomByteSet 22.95% 176.82ns 5.66M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 28.65ns 34.91M
----------------------------------------------------------------------------
============================================================================
string length 128:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 16.37ns 61.09M
FindSingleCharRange 11.62% 140.85ns 7.10M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 47.74ns 20.95M
FindFirstOf2NeedlesNoSSE 118.64% 40.24ns 24.85M
FindFirstOf2NeedlesStd 11.33% 421.18ns 2.37M
FindFirstOf2NeedlesMemchr 120.68% 39.56ns 25.28M
FindFirstOf2NeedlesByteSet 21.47% 222.36ns 4.50M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 47.28ns 21.15M
FindFirstOf4NeedlesNoSSE 63.80% 74.11ns 13.49M
FindFirstOf4NeedlesStd 7.23% 653.94ns 1.53M
FindFirstOf4NeedlesMemchr 65.40% 72.30ns 13.83M
FindFirstOf4NeedlesByteSet 19.96% 236.85ns 4.22M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 47.28ns 21.15M
FindFirstOf8NeedlesNoSSE 33.87% 139.59ns 7.16M
FindFirstOf8NeedlesStd 4.20% 1.13us 887.82K
FindFirstOf8NeedlesMemchr 34.43% 137.32ns 7.28M
FindFirstOf8NeedlesByteSet 18.98% 249.17ns 4.01M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 47.28ns 21.15M
FindFirstOf16NeedlesNoSSE 16.83% 281.00ns 3.56M
FindFirstOf16NeedlesStd 2.30% 2.06us 485.36K
FindFirstOf16NeedlesMemchr 16.98% 278.50ns 3.59M
FindFirstOf16NeedlesByteSet 15.75% 300.13ns 3.33M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 128.45ns 7.79M
FindFirstOf32NeedlesNoSSE 37.09% 346.28ns 2.89M
FindFirstOf32NeedlesStd 3.19% 4.03us 248.02K
FindFirstOf32NeedlesMemchr 23.13% 555.26ns 1.80M
FindFirstOf32NeedlesByteSet 37.74% 340.32ns 2.94M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 193.23ns 5.18M
FindFirstOf64NeedlesNoSSE 47.76% 404.60ns 2.47M
FindFirstOf64NeedlesStd 2.20% 8.80us 113.61K
FindFirstOf64NeedlesMemchr 17.91% 1.08us 926.70K
FindFirstOf64NeedlesByteSet 48.35% 399.64ns 2.50M
----------------------------------------------------------------------------
FindFirstOfRandomBase 59.66ns 16.76M
FindFirstOfRandomNoSSE 53.67% 111.17ns 9.00M
FindFirstOfRandomStd 6.41% 930.67ns 1.07M
FindFirstOfRandomMemchr 46.01% 129.68ns 7.71M
FindFirstOfRandomByteSet 19.80% 301.38ns 3.32M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 46.83ns 21.35M
----------------------------------------------------------------------------
============================================================================
string length 256:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 27.28ns 36.65M
FindSingleCharRange 10.62% 256.90ns 3.89M
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 61.39ns 16.29M
FindFirstOf2NeedlesNoSSE 99.28% 61.84ns 16.17M
FindFirstOf2NeedlesStd 7.41% 828.62ns 1.21M
FindFirstOf2NeedlesMemchr 100.01% 61.39ns 16.29M
FindFirstOf2NeedlesByteSet 15.36% 399.65ns 2.50M
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 83.65ns 11.95M
FindFirstOf4NeedlesNoSSE 71.03% 117.77ns 8.49M
FindFirstOf4NeedlesStd 6.46% 1.29us 772.77K
FindFirstOf4NeedlesMemchr 72.14% 115.95ns 8.62M
FindFirstOf4NeedlesByteSet 20.66% 404.81ns 2.47M
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 83.66ns 11.95M
FindFirstOf8NeedlesNoSSE 35.38% 236.46ns 4.23M
FindFirstOf8NeedlesStd 3.75% 2.23us 447.99K
FindFirstOf8NeedlesMemchr 35.71% 234.26ns 4.27M
FindFirstOf8NeedlesByteSet 20.13% 415.56ns 2.41M
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 83.66ns 11.95M
FindFirstOf16NeedlesNoSSE 18.04% 463.82ns 2.16M
FindFirstOf16NeedlesStd 2.04% 4.10us 244.06K
FindFirstOf16NeedlesMemchr 18.14% 461.09ns 2.17M
FindFirstOf16NeedlesByteSet 14.81% 564.87ns 1.77M
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 237.14ns 4.22M
FindFirstOf32NeedlesNoSSE 38.92% 609.24ns 1.64M
FindFirstOf32NeedlesStd 2.95% 8.05us 124.26K
FindFirstOf32NeedlesMemchr 25.90% 915.44ns 1.09M
FindFirstOf32NeedlesByteSet 39.21% 604.86ns 1.65M
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 360.78ns 2.77M
FindFirstOf64NeedlesNoSSE 54.03% 667.71ns 1.50M
FindFirstOf64NeedlesStd 2.05% 17.59us 56.86K
FindFirstOf64NeedlesMemchr 20.04% 1.80us 555.45K
FindFirstOf64NeedlesByteSet 54.61% 660.63ns 1.51M
----------------------------------------------------------------------------
FindFirstOfRandomBase 98.24ns 10.18M
FindFirstOfRandomNoSSE 47.37% 207.40ns 4.82M
FindFirstOfRandomStd 5.24% 1.88us 533.28K
FindFirstOfRandomMemchr 39.75% 247.14ns 4.05M
FindFirstOfRandomByteSet 17.69% 555.45ns 1.80M
----------------------------------------------------------------------------
FindFirstOfOffsetRange 62.75ns 15.94M
----------------------------------------------------------------------------
============================================================================
string length 10240:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 613.80ns 1.63M
FindSingleCharRange 6.57% 9.34us 107.12K
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 1.23us 813.01K
FindFirstOf2NeedlesNoSSE 100.01% 1.23us 813.07K
FindFirstOf2NeedlesStd 3.77% 32.61us 30.67K
FindFirstOf2NeedlesMemchr 100.08% 1.23us 813.67K
FindFirstOf2NeedlesByteSet 8.65% 14.21us 70.37K
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 2.94us 340.63K
FindFirstOf4NeedlesNoSSE 119.61% 2.45us 407.44K
FindFirstOf4NeedlesStd 5.73% 51.23us 19.52K
FindFirstOf4NeedlesMemchr 119.77% 2.45us 407.97K
FindFirstOf4NeedlesByteSet 20.66% 14.21us 70.38K
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 2.94us 340.63K
FindFirstOf8NeedlesNoSSE 59.95% 4.90us 204.21K
FindFirstOf8NeedlesStd 3.32% 88.48us 11.30K
FindFirstOf8NeedlesMemchr 59.96% 4.90us 204.25K
FindFirstOf8NeedlesByteSet 20.68% 14.20us 70.43K
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 2.94us 340.63K
FindFirstOf16NeedlesNoSSE 29.98% 9.79us 102.13K
FindFirstOf16NeedlesStd 1.80% 162.97us 6.14K
FindFirstOf16NeedlesMemchr 29.98% 9.79us 102.11K
FindFirstOf16NeedlesByteSet 20.65% 14.22us 70.33K
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 8.77us 114.07K
FindFirstOf32NeedlesNoSSE 44.71% 19.61us 51.00K
FindFirstOf32NeedlesStd 2.73% 321.22us 3.11K
FindFirstOf32NeedlesMemchr 43.44% 20.18us 49.55K
FindFirstOf32NeedlesByteSet 44.67% 19.63us 50.95K
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 13.43us 74.44K
FindFirstOf64NeedlesNoSSE 68.26% 19.68us 50.81K
FindFirstOf64NeedlesStd 1.91% 702.62us 1.42K
FindFirstOf64NeedlesMemchr 33.81% 39.74us 25.17K
FindFirstOf64NeedlesByteSet 68.25% 19.68us 50.81K
----------------------------------------------------------------------------
FindFirstOfRandomBase 3.01us 331.81K
FindFirstOfRandomNoSSE 75.38% 4.00us 250.10K
FindFirstOfRandomStd 6.81% 44.25us 22.60K
FindFirstOfRandomMemchr 76.46% 3.94us 253.71K
FindFirstOfRandomByteSet 15.01% 20.08us 49.81K
----------------------------------------------------------------------------
FindFirstOfOffsetRange 1.23us 811.29K
----------------------------------------------------------------------------
============================================================================
string length
1048576:
============================================================================
folly/test/RangeFindBenchmark.cpp relative time/iter iters/s
============================================================================
FindSingleCharMemchr 85.07us 11.76K
FindSingleCharRange 8.92% 953.48us 1.05K
----------------------------------------------------------------------------
FindFirstOf2NeedlesBase 170.23us 5.87K
FindFirstOf2NeedlesNoSSE 100.01% 170.21us 5.87K
FindFirstOf2NeedlesStd 5.09% 3.34ms 299.18
FindFirstOf2NeedlesMemchr 100.02% 170.20us 5.88K
FindFirstOf2NeedlesByteSet 11.64% 1.46ms 683.69
----------------------------------------------------------------------------
FindFirstOf4NeedlesBase 298.04us 3.36K
FindFirstOf4NeedlesNoSSE 87.48% 340.68us 2.94K
FindFirstOf4NeedlesStd 5.68% 5.25ms 190.41
FindFirstOf4NeedlesMemchr 87.53% 340.51us 2.94K
FindFirstOf4NeedlesByteSet 20.37% 1.46ms 683.55
----------------------------------------------------------------------------
FindFirstOf8NeedlesBase 298.04us 3.36K
FindFirstOf8NeedlesNoSSE 43.75% 681.27us 1.47K
FindFirstOf8NeedlesStd 3.29% 9.07ms 110.24
FindFirstOf8NeedlesMemchr 43.74% 681.36us 1.47K
FindFirstOf8NeedlesByteSet 20.37% 1.46ms 683.55
----------------------------------------------------------------------------
FindFirstOf16NeedlesBase 298.03us 3.36K
FindFirstOf16NeedlesNoSSE 21.83% 1.37ms 732.40
FindFirstOf16NeedlesStd 1.78% 16.72ms 59.81
FindFirstOf16NeedlesMemchr 21.83% 1.37ms 732.49
FindFirstOf16NeedlesByteSet 20.37% 1.46ms 683.60
----------------------------------------------------------------------------
FindFirstOf32NeedlesBase 896.95us 1.11K
FindFirstOf32NeedlesNoSSE 44.21% 2.03ms 492.89
FindFirstOf32NeedlesStd 2.67% 33.53ms 29.82
FindFirstOf32NeedlesMemchr 31.84% 2.82ms 354.97
FindFirstOf32NeedlesByteSet 44.25% 2.03ms 493.31
----------------------------------------------------------------------------
FindFirstOf64NeedlesBase 1.38ms 725.72
FindFirstOf64NeedlesNoSSE 67.96% 2.03ms 493.18
FindFirstOf64NeedlesStd 1.90% 72.34ms 13.82
FindFirstOf64NeedlesMemchr 24.82% 5.55ms 180.11
FindFirstOf64NeedlesByteSet 67.97% 2.03ms 493.30
----------------------------------------------------------------------------
FindFirstOfRandomBase 657.10us 1.52K
FindFirstOfRandomNoSSE 31.60% 2.08ms 480.94
FindFirstOfRandomStd 2.05% 32.07ms 31.18
FindFirstOfRandomMemchr 24.06% 2.73ms 366.13
FindFirstOfRandomByteSet 31.56% 2.08ms 480.22
----------------------------------------------------------------------------
FindFirstOfOffsetRange 170.28us 5.87K
----------------------------------------------------------------------------
============================================================================
Reviewed By: philipp@fb.com
FB internal diff:
D638500
/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "folly/Range.h"
+#include "folly/CpuId.h"
+#include "folly/Likely.h"
+
namespace folly {
/**
return os;
}
-} // namespace folly
+namespace detail {
+size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
+ const StringPiece& needles) {
+ size_t best = haystack.size();
+ for (char needle: needles) {
+ const void* ptr = memchr(haystack.data(), needle, best);
+ if (ptr) {
+ auto found = static_cast<const char*>(ptr) - haystack.data();
+ best = std::min<size_t>(best, found);
+ }
+ }
+ if (best == haystack.size()) {
+ return StringPiece::npos;
+ }
+ return best;
+}
+} // namespace detail
+
+namespace {
+// build sse4.2-optimized version even if -msse4.2 is not passed to GCC
+size_t qfind_first_byte_of_needles16(const StringPiece& haystack,
+ const StringPiece& needles)
+ __attribute__ ((__target__("sse4.2")));
+
+// helper method for case where needles.size() <= 16
+size_t qfind_first_byte_of_needles16(const StringPiece& haystack,
+ const StringPiece& needles) {
+ DCHECK_LE(needles.size(), 16);
+ if (needles.size() <= 2 && haystack.size() >= 256) {
+ // benchmarking shows that memchr beats out SSE for small needle-sets
+ // with large haystacks.
+ // TODO(mcurtiss): could this be because of unaligned SSE loads?
+ return detail::qfind_first_byte_of_memchr(haystack, needles);
+ }
+ auto arr2 = __builtin_ia32_loaddqu(needles.data());
+ for (size_t i = 0; i < haystack.size(); i+= 16) {
+ auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
+ auto index = __builtin_ia32_pcmpestri128(arr2, needles.size(),
+ arr1, haystack.size() - i, 0);
+ if (index < 16) {
+ return i + index;
+ }
+ }
+ return StringPiece::npos;
+}
+
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+ const StringPiece& needles)
+ __attribute__ ((__target__("sse4.2")));
+
+size_t qfind_first_byte_of_sse42(const StringPiece& haystack,
+ const StringPiece& needles) {
+ if (UNLIKELY(needles.empty() || haystack.empty())) {
+ return StringPiece::npos;
+ } else if (needles.size() <= 16) {
+ // we can save some unnecessary load instructions by optimizing for
+ // the common case of needles.size() <= 16
+ return qfind_first_byte_of_needles16(haystack, needles);
+ }
+
+ size_t index = haystack.size();
+ for (size_t i = 0; i < haystack.size(); i += 16) {
+ size_t b = 16;
+ auto arr1 = __builtin_ia32_loaddqu(haystack.data() + i);
+ for (size_t j = 0; j < needles.size(); j += 16) {
+ auto arr2 = __builtin_ia32_loaddqu(needles.data() + j);
+ auto index = __builtin_ia32_pcmpestri128(arr2, needles.size() - j,
+ arr1, haystack.size() - i, 0);
+ b = std::min<size_t>(index, b);
+ }
+ if (b < 16) {
+ return i + b;
+ }
+ };
+ return StringPiece::npos;
+}
+
+typedef decltype(qfind_first_byte_of_sse42) Type_qfind_first_byte_of;
+
+// Aho, Hopcroft, and Ullman refer to this trick in "The Design and Analysis
+// of Computer Algorithms" (1974), but the best description is here:
+// http://research.swtch.com/sparse
+class FastByteSet {
+ public:
+ FastByteSet() : size_(0) { } // no init of arrays required!
+
+ inline void add(uint8_t i) {
+ if (!contains(i)) {
+ dense_[size_] = i;
+ sparse_[i] = size_;
+ size_++;
+ }
+ }
+ inline bool contains(uint8_t i) const {
+ DCHECK_LE(size_, 256);
+ return sparse_[i] < size_ && dense_[sparse_[i]] == i;
+ }
+
+ private:
+ uint16_t size_; // can't use uint8_t because it would overflow if all
+ // possible values were inserted.
+ uint8_t sparse_[256];
+ uint8_t dense_[256];
+};
+} // namespace
+
+namespace detail {
+size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
+ const StringPiece& needles) {
+ FastByteSet s;
+ for (auto needle: needles) {
+ s.add(needle);
+ }
+ for (size_t index = 0; index < haystack.size(); ++index) {
+ if (s.contains(haystack[index])) {
+ return index;
+ }
+ }
+ return StringPiece::npos;
+}
+
+size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
+ const StringPiece& needles) {
+ if (UNLIKELY(needles.empty() || haystack.empty())) {
+ return StringPiece::npos;
+ }
+ // The thresholds below were empirically determined by benchmarking.
+ // This is not an exact science since it depends on the CPU, the size of
+ // needles, and the size of haystack.
+ if (haystack.size() == 1 ||
+ (haystack.size() < 4 && needles.size() <= 16)) {
+ return qfind_first_of(haystack, needles, asciiCaseSensitive);
+ } else if ((needles.size() >= 4 && haystack.size() <= 10) ||
+ (needles.size() >= 16 && haystack.size() <= 64) ||
+ needles.size() >= 32) {
+ return qfind_first_byte_of_byteset(haystack, needles);
+ }
+
+ return qfind_first_byte_of_memchr(haystack, needles);
+}
+
+// This function is called on startup to resolve folly::qfind_first_byte_of
+extern "C" Type_qfind_first_byte_of* qfind_first_byte_of_ifunc() {
+ return folly::CpuId().sse42() ? qfind_first_byte_of_sse42 :
+ qfind_first_byte_of_nosse;
+}
+
+size_t qfind_first_byte_of(const StringPiece& haystack,
+ const StringPiece& needles)
+ __attribute__((ifunc("qfind_first_byte_of_ifunc")));
+
+} // namespace detail
+} // namespace folly
/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
return std::string::npos;
}
+namespace detail {
+size_t qfind_first_byte_of(const StringPiece& haystack,
+ const StringPiece& needles);
+} // namespace detail
+
template <class T, class Comp>
size_t qfind_first_of(const Range<T> & haystack,
- const Range<T> & needle,
+ const Range<T> & needles,
Comp eq) {
auto ret = std::find_first_of(haystack.begin(), haystack.end(),
- needle.begin(), needle.end(),
+ needles.begin(), needles.end(),
eq);
return ret == haystack.end() ? std::string::npos : ret - haystack.begin();
}
template <class T>
size_t qfind_first_of(const Range<T>& haystack,
- const Range<T>& needle) {
- return qfind_first_of(haystack, needle, asciiCaseSensitive);
+ const Range<T>& needles) {
+ return qfind_first_of(haystack, needles, asciiCaseSensitive);
+}
+
+// specialization for StringPiece
+template <>
+inline size_t qfind_first_of(const Range<const char*>& haystack,
+ const Range<const char*>& needles) {
+ return detail::qfind_first_byte_of(haystack, needles);
}
+// specialization for ByteRange
+template <>
+inline size_t qfind_first_of(const Range<const unsigned char*>& haystack,
+ const Range<const unsigned char*>& needles) {
+ return detail::qfind_first_byte_of(StringPiece(haystack),
+ StringPiece(needles));
+}
} // !namespace folly
FOLLY_ASSUME_FBVECTOR_COMPATIBLE_1(folly::Range);
#include "folly/Foreach.h"
#include <algorithm>
#include <iostream>
+#include <random>
#include <string>
+namespace folly { namespace detail {
+// declaration of functions in Range.cpp
+size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
+ const StringPiece& needles);
+
+size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
+ const StringPiece& needles);
+
+size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
+ const StringPiece& needles);
+}}
+
using namespace folly;
using namespace std;
str.append(1, 'b');
}
+std::mt19937 rnd;
+string ffoTestString;
+const size_t ffoDelimSize = 128;
+vector<string> ffoDelim;
+
+string generateString(int len) {
+ std::uniform_int_distribution<uint32_t> validChar(1, 255); // no null-char
+ string ret;
+ while (len--) {
+ ret.push_back(validChar(rnd));
+ }
+ return ret;
+}
+
+void initDelims(int len) {
+ ffoDelim.clear();
+
+ string s(len - 1, '\0'); // find_first_of won't finish until last char
+ s.push_back('a');
+ ffoTestString = s;
+
+ for (int i = 0; i < ffoDelimSize; ++i) {
+ // most delimiter sets are pretty small, but occasionally there could
+ // be a big one.
+ auto n = rnd() % 8 + 1;
+ if (n == 8) {
+ n = 32;
+ }
+ auto s = generateString(n);
+ if (rnd() % 2) {
+ // ~half of tests will find a hit
+ s[rnd() % s.size()] = 'a'; // yes, this could mean 'a' is a duplicate
+ }
+ ffoDelim.push_back(s);
+ }
+}
+
} // anonymous namespace
BENCHMARK(FindSingleCharMemchr, n) {
BENCHMARK_DRAW_LINE();
-BENCHMARK(FindFirstOfRange, n) {
+// it's useful to compare our custom implementations vs. the standard library
+inline size_t qfind_first_byte_of_std(const StringPiece& haystack,
+ const StringPiece& needles) {
+ return qfind_first_of(haystack, needles, asciiCaseSensitive);
+}
+
+template <class Func>
+void findFirstOfRange(StringPiece needles, Func func, size_t n) {
StringPiece haystack(str);
- folly::StringPiece needles("bc");
- DCHECK_EQ(haystack.size() - 1, haystack.find_first_of(needles)); // it works!
FOR_EACH_RANGE (i, 0, n) {
- doNotOptimizeAway(haystack.find_first_of(needles));
+ doNotOptimizeAway(func(haystack, needles));
char x = haystack[0];
doNotOptimizeAway(&x);
}
}
+const string delims2 = "bc";
+
+BENCHMARK(FindFirstOf2NeedlesBase, n) {
+ findFirstOfRange(delims2, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf2NeedlesNoSSE, n) {
+ findFirstOfRange(delims2, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf2NeedlesStd, n) {
+ findFirstOfRange(delims2, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf2NeedlesMemchr, n) {
+ findFirstOfRange(delims2, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf2NeedlesByteSet, n) {
+ findFirstOfRange(delims2, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+const string delims4 = "bcde";
+
+BENCHMARK(FindFirstOf4NeedlesBase, n) {
+ findFirstOfRange(delims4, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf4NeedlesNoSSE, n) {
+ findFirstOfRange(delims4, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf4NeedlesStd, n) {
+ findFirstOfRange(delims4, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf4NeedlesMemchr, n) {
+ findFirstOfRange(delims4, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf4NeedlesByteSet, n) {
+ findFirstOfRange(delims4, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+const string delims8 = "0123456b";
+
+BENCHMARK(FindFirstOf8NeedlesBase, n) {
+ findFirstOfRange(delims8, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf8NeedlesNoSSE, n) {
+ findFirstOfRange(delims8, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf8NeedlesStd, n) {
+ findFirstOfRange(delims8, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf8NeedlesMemchr, n) {
+ findFirstOfRange(delims8, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf8NeedlesByteSet, n) {
+ findFirstOfRange(delims8, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+const string delims16 = "0123456789bcdefg";
+
+BENCHMARK(FindFirstOf16NeedlesBase, n) {
+ findFirstOfRange(delims16, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf16NeedlesNoSSE, n) {
+ findFirstOfRange(delims16, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf16NeedlesStd, n) {
+ findFirstOfRange(delims16, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf16NeedlesMemchr, n) {
+ findFirstOfRange(delims16, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf16NeedlesByteSet, n) {
+ findFirstOfRange(delims16, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+const string delims32 = "!bcdefghijklmnopqrstuvwxyz_012345";
+
+BENCHMARK(FindFirstOf32NeedlesBase, n) {
+ findFirstOfRange(delims32, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf32NeedlesNoSSE, n) {
+ findFirstOfRange(delims32, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf32NeedlesStd, n) {
+ findFirstOfRange(delims32, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf32NeedlesMemchr, n) {
+ findFirstOfRange(delims32, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf32NeedlesByteSet, n) {
+ findFirstOfRange(delims32, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+const string delims64 = "!bcdefghijklmnopqrstuvwxyz_"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ-0123456789$";
+
+BENCHMARK(FindFirstOf64NeedlesBase, n) {
+ findFirstOfRange(delims64, detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf64NeedlesNoSSE, n) {
+ findFirstOfRange(delims64, detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf64NeedlesStd, n) {
+ findFirstOfRange(delims64, qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf64NeedlesMemchr, n) {
+ findFirstOfRange(delims64, detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOf64NeedlesByteSet, n) {
+ findFirstOfRange(delims64, detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
+template <class Func>
+void findFirstOfRandom(Func func, size_t iters) {
+ for (int i = 0; i < iters; ++i) {
+ auto test = i % ffoDelim.size();
+ auto p = func(ffoTestString, ffoDelim[test]);
+ doNotOptimizeAway(p);
+ }
+}
+
+BENCHMARK(FindFirstOfRandomBase, n) {
+ findFirstOfRandom(detail::qfind_first_byte_of, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOfRandomNoSSE, n) {
+ findFirstOfRandom(detail::qfind_first_byte_of_nosse, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOfRandomStd, n) {
+ findFirstOfRandom(qfind_first_byte_of_std, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOfRandomMemchr, n) {
+ findFirstOfRandom(detail::qfind_first_byte_of_memchr, n);
+}
+
+BENCHMARK_RELATIVE(FindFirstOfRandomByteSet, n) {
+ findFirstOfRandom(detail::qfind_first_byte_of_byteset, n);
+}
+
+BENCHMARK_DRAW_LINE();
+
BENCHMARK(FindFirstOfOffsetRange, n) {
StringPiece haystack(str);
folly::StringPiece needles("bc");
}
}
+BENCHMARK_DRAW_LINE();
+
int main(int argc, char** argv) {
google::ParseCommandLineFlags(&argc, &argv, true);
- for (int len : {1, 10, 256, 10*1024, 10*1024*1024}) {
+ for (int len : {1, 8, 10, 16, 32, 64, 128, 256, 10*1024, 1024*1024}) {
initStr(len);
+ initDelims(len);
runBenchmarks();
}
return 0;
/*
- * Copyright 2012 Facebook, Inc.
+ * Copyright 2013 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
// @author Kristina Holst (kholst@fb.com)
// @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
+#include <limits>
+#include <string>
#include <boost/range/concepts.hpp>
#include <gtest/gtest.h>
#include "folly/Range.h"
+namespace folly { namespace detail {
+// declaration of functions in Range.cpp
+size_t qfind_first_byte_of_memchr(const StringPiece& haystack,
+ const StringPiece& needles);
+
+size_t qfind_first_byte_of_byteset(const StringPiece& haystack,
+ const StringPiece& needles);
+
+size_t qfind_first_byte_of_nosse(const StringPiece& haystack,
+ const StringPiece& needles);
+}}
+
using namespace folly;
using namespace std;
static_cast<const void*>(b.begin()));
EXPECT_EQ(static_cast<const void*>(a.end()),
static_cast<const void*>(b.end()));
+
+ // and convert back again
+ StringPiece c(b);
+ EXPECT_EQ(a.begin(), c.begin());
+ EXPECT_EQ(a.end(), c.end());
+}
+
+template <typename NeedleFinder>
+class NeedleFinderTest : public ::testing::Test {
+ public:
+ static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
+ return NeedleFinder::find_first_byte_of(haystack, needles);
+ }
+};
+
+struct SseNeedleFinder {
+ static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
+ // This will only use the SSE version if it is supported on this CPU
+ // (selected using ifunc).
+ return detail::qfind_first_byte_of(haystack, needles);
+ }
+};
+
+struct NoSseNeedleFinder {
+ static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
+ return detail::qfind_first_byte_of_nosse(haystack, needles);
+ }
+};
+
+struct MemchrNeedleFinder {
+ static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
+ return detail::qfind_first_byte_of_memchr(haystack, needles);
+ }
+};
+
+struct ByteSetNeedleFinder {
+ static size_t find_first_byte_of(StringPiece haystack, StringPiece needles) {
+ return detail::qfind_first_byte_of_byteset(haystack, needles);
+ }
+};
+
+typedef ::testing::Types<SseNeedleFinder, NoSseNeedleFinder, MemchrNeedleFinder,
+ ByteSetNeedleFinder> NeedleFinders;
+TYPED_TEST_CASE(NeedleFinderTest, NeedleFinders);
+
+TYPED_TEST(NeedleFinderTest, Null) {
+ { // null characters in the string
+ string s(10, char(0));
+ s[5] = 'b';
+ string delims("abc");
+ EXPECT_EQ(5, this->find_first_byte_of(s, delims));
+ }
+ { // null characters in delim
+ string s("abc");
+ string delims(10, char(0));
+ delims[3] = 'c';
+ delims[7] = 'b';
+ EXPECT_EQ(1, this->find_first_byte_of(s, delims));
+ }
+ { // range not terminated by null character
+ string buf = "abcdefghijklmnopqrstuvwxyz";
+ StringPiece s(buf.data() + 5, 3);
+ StringPiece delims("z");
+ EXPECT_EQ(string::npos, this->find_first_byte_of(s, delims));
+ }
+}
+
+TYPED_TEST(NeedleFinderTest, DelimDuplicates) {
+ string delims(1000, 'b');
+ EXPECT_EQ(1, this->find_first_byte_of("abc", delims));
+ EXPECT_EQ(string::npos, this->find_first_byte_of("ac", delims));
+}
+
+TYPED_TEST(NeedleFinderTest, Empty) {
+ string a = "abc";
+ string b = "";
+ EXPECT_EQ(string::npos, this->find_first_byte_of(a, b));
+ EXPECT_EQ(string::npos, this->find_first_byte_of(b, a));
+ EXPECT_EQ(string::npos, this->find_first_byte_of(b, b));
+}
+
+TYPED_TEST(NeedleFinderTest, Unaligned) {
+ // works correctly even if input buffers are not 16-byte aligned
+ string s = "0123456789ABCDEFGH";
+ for (int i = 0; i < s.size(); ++i) {
+ StringPiece a(s.c_str() + i);
+ for (int j = 0; j < s.size(); ++j) {
+ StringPiece b(s.c_str() + j);
+ EXPECT_EQ((i > j) ? 0 : j - i, this->find_first_byte_of(a, b));
+ }
+ }
+}
+
+// for some algorithms (specifically those that create a set of needles),
+// we check for the edge-case of _all_ possible needles being sought.
+TYPED_TEST(NeedleFinderTest, Needles256) {
+ string needles;
+ const auto minValue = std::numeric_limits<StringPiece::value_type>::min();
+ const auto maxValue = std::numeric_limits<StringPiece::value_type>::max();
+ // make the size ~big to avoid any edge-case branches for tiny haystacks
+ const int haystackSize = 50;
+ for (int i = minValue; i <= maxValue; i++) { // <=
+ needles.push_back(i);
+ }
+ EXPECT_EQ(StringPiece::npos, this->find_first_byte_of("", needles));
+ for (int i = minValue; i <= maxValue; i++) {
+ EXPECT_EQ(0, this->find_first_byte_of(string(haystackSize, i), needles));
+ }
+
+ needles.append("these are redundant characters");
+ EXPECT_EQ(StringPiece::npos, this->find_first_byte_of("", needles));
+ for (int i = minValue; i <= maxValue; i++) {
+ EXPECT_EQ(0, this->find_first_byte_of(string(haystackSize, i), needles));
+ }
+}
+
+TYPED_TEST(NeedleFinderTest, Base) {
+ for (int i = 0; i < 32; ++i) {
+ for (int j = 0; j < 32; ++j) {
+ string s = string(i, 'X') + "abca" + string(i, 'X');
+ string delims = string(j, 'Y') + "a" + string(j, 'Y');
+ EXPECT_EQ(i, this->find_first_byte_of(s, delims));
+ }
+ }
}