From 62588622d40f6c6f5508496db69e06593b615049 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 22 Feb 2012 00:56:08 +0000 Subject: [PATCH] Add a Briggs and Torczon sparse set implementation. For objects that can be identified by small unsigned keys, SparseSet provides constant time clear() and fast deterministic iteration. Insert, erase, and find operations are typically faster than hash tables. SparseSet is useful for keeping information about physical registers, virtual registers, or numbered basic blocks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151110 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/ProgrammersManual.html | 19 +++ include/llvm/ADT/SparseSet.h | 259 ++++++++++++++++++++++++++++++++ unittests/ADT/SparseSetTest.cpp | 186 +++++++++++++++++++++++ unittests/CMakeLists.txt | 1 + 4 files changed, 465 insertions(+) create mode 100644 include/llvm/ADT/SparseSet.h create mode 100644 unittests/ADT/SparseSetTest.cpp diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html index 0b1a875da1a..f4c03222b20 100644 --- a/docs/ProgrammersManual.html +++ b/docs/ProgrammersManual.html @@ -81,6 +81,7 @@ option
  • "llvm/ADT/SmallSet.h"
  • "llvm/ADT/SmallPtrSet.h"
  • "llvm/ADT/DenseSet.h"
  • +
  • "llvm/ADT/SparseSet.h"
  • "llvm/ADT/FoldingSet.h"
  • <set>
  • "llvm/ADT/SetVector.h"
  • @@ -1488,6 +1489,24 @@ href="#dss_densemap">DenseMap has. + +

    + "llvm/ADT/SparseSet.h" +

    + +
    + +

    SparseSet holds a small number of objects identified by unsigned keys of +moderate size. It uses a lot of memory, but provides operations that are +almost as fast as a vector. Typical keys are physical registers, virtual +registers, or numbered basic blocks.

    + +

    SparseSet is useful for algorithms that need very fast clear/find/insert/erase +and fast iteration over small sets. It is not intended for building composite +data structures.

    + +
    +

    "llvm/ADT/FoldingSet.h" diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h new file mode 100644 index 00000000000..eba5cc0df53 --- /dev/null +++ b/include/llvm/ADT/SparseSet.h @@ -0,0 +1,259 @@ +//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SparseSet class derived from the version described in +// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters +// on Programming Languages and Systems, Volume 2 Issue 1-4, March–Dec. 1993. +// +// A sparse set holds a small number of objects identified by integer keys from +// a moderately sized universe. The sparse set uses more memory than other +// containers in order to provide faster operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_SPARSESET_H +#define LLVM_ADT_SPARSESET_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +/// SparseSetFunctor - Objects in a SparseSet are identified by small integer +/// keys. A functor object is used to compute the key of an object. The +/// functor's operator() must return an unsigned smaller than the universe. +/// +/// The default functor implementation forwards to a getSparseSetKey() method +/// on the object. It is intended for sparse sets holding ad-hoc structs. +/// +template +struct SparseSetFunctor { + unsigned operator()(const ValueT &Val) { + return Val.getSparseSetKey(); + } +}; + +/// SparseSetFunctor - Provide a trivial identity functor for +/// SparseSet. +/// +template<> struct SparseSetFunctor { + unsigned operator()(unsigned Val) { return Val; } +}; + +/// SparseSet - Fast set implementation for objects that can be identified by +/// small unsigned keys. +/// +/// SparseSet allocates memory proportional to the size of the key universe, so +/// it is not recommended for building composite data structures. It is useful +/// for algorithms that require a single set with fast operations. +/// +/// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast +/// clear() and iteration as fast as a vector. The find(), insert(), and +/// erase() operations are all constant time, and typically faster than a hash +/// table. The iteration order doesn't depend on numerical key values, it only +/// depends on the order of insert() and erase() operations. When no elements +/// have been erased, the iteration order is the insertion order. +/// +/// Compared to BitVector, SparseSet uses 8x-40x more memory, but +/// offers constant-time clear() and size() operations as well as fast +/// iteration independent on the size of the universe. +/// +/// SparseSet contains a dense vector holding all the objects and a sparse +/// array holding indexes into the dense vector. Most of the memory is used by +/// the sparse array which is the size of the key universe. The SparseT +/// template parameter provides a space/speed tradeoff for sets holding many +/// elements. +/// +/// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse +/// array uses 4 x Universe bytes. +/// +/// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache +/// lines, but the sparse array is 4x smaller. N is the number of elements in +/// the set. +/// +/// For sets that may grow to thousands of elements, SparseT should be set to +/// uint16_t or uint32_t. +/// +/// @param ValueT The type of objects in the set. +/// @param SparseT An unsigned integer type. See above. +/// @param KeyFunctorT A functor that computes the unsigned key of a ValueT. +/// +template > +class SparseSet { + typedef SmallVector DenseT; + DenseT Dense; + SparseT *Sparse; + unsigned Universe; + KeyFunctorT KeyOf; + + // Disable copy construction and assignment. + // This data structure is not meant to be used that way. + SparseSet(const SparseSet&); // DO NOT IMPLEMENT. + SparseSet &operator=(const SparseSet&); // DO NOT IMPLEMENT. + +public: + typedef ValueT value_type; + typedef ValueT &reference; + typedef const ValueT &const_reference; + typedef ValueT *pointer; + typedef const ValueT *const_pointer; + + SparseSet() : Sparse(0), Universe(0) {} + ~SparseSet() { free(Sparse); } + + /// setUniverse - Set the universe size which determines the largest key the + /// set can hold. The universe must be sized before any elements can be + /// added. + /// + /// @param U Universe size. All object keys must be less than U. + /// + void setUniverse(unsigned U) { + // It's not hard to resize the universe on a non-empty set, but it doesn't + // seem like a likely use case, so we can add that code when we need it. + assert(empty() && "Can only resize universe on an empty map"); + // Hysteresis prevents needless reallocations. + if (U >= Universe/4 && U <= Universe) + return; + free(Sparse); + // The Sparse array doesn't actually need to be initialized, so malloc + // would be enough here, but that will cause tools like valgrind to + // complain about branching on uninitialized data. + Sparse = reinterpret_cast(calloc(U, sizeof(SparseT))); + Universe = U; + } + + // Import trivial vector stuff from DenseT. + typedef typename DenseT::iterator iterator; + typedef typename DenseT::const_iterator const_iterator; + + const_iterator begin() const { return Dense.begin(); } + const_iterator end() const { return Dense.end(); } + iterator begin() { return Dense.begin(); } + iterator end() { return Dense.end(); } + + /// empty - Returns true if the set is empty. + /// + /// This is not the same as BitVector::empty(). + /// + bool empty() const { return Dense.empty(); } + + /// size - Returns the number of elements in the set. + /// + /// This is not the same as BitVector::size() which returns the size of the + /// universe. + /// + unsigned size() const { return Dense.size(); } + + /// clear - Clears the set. This is a very fast constant time operation. + /// + void clear() { + // Sparse does not need to be cleared, see find(). + Dense.clear(); + } + + /// find - Find an element by its key. + /// + /// @param Key A valid key to find. + /// @returns An iterator to the element identified by key, or end(). + /// + iterator find(unsigned Key) { + assert(Key < Universe && "Key out of range"); + assert(std::numeric_limits::is_integer && + !std::numeric_limits::is_signed && + "SparseT must be an unsigned integer type"); + const unsigned Stride = std::numeric_limits::max() + 1u; + for (unsigned i = Sparse[Key], e = size(); i < e; i += Stride) { + const unsigned FoundKey = KeyOf(Dense[i]); + assert(FoundKey < Universe && "Invalid key in set. Did object mutate?"); + if (Key == FoundKey) + return begin() + i; + // Stride is 0 when SparseT >= unsigned. We don't need to loop. + if (!Stride) + break; + } + return end(); + } + + const_iterator find(unsigned Key) const { + return const_cast(this)->find(Key); + } + + /// count - Returns true if this set contains an element identified by Key. + /// + bool count(unsigned Key) const { + return find(Key) != end(); + } + + /// insert - Attempts to insert a new element. + /// + /// If Val is successfully inserted, return (I, true), where I is an iterator + /// pointing to the newly inserted element. + /// + /// If the set already contains an element with the same key as Val, return + /// (I, false), where I is an iterator pointing to the existing element. + /// + /// Insertion invalidates all iterators. + /// + std::pair insert(const ValueT &Val) { + unsigned Key = KeyOf(Val); + iterator I = find(Key); + if (I != end()) + return std::make_pair(I, false); + Sparse[Key] = size(); + Dense.push_back(Val); + return std::make_pair(end() - 1, true); + } + + /// erase - Erases an existing element identified by a valid iterator. + /// + /// This invalidates all iterators, but erase() returns an iterator pointing + /// to the next element. This makes it possible to erase selected elements + /// while iterating over the set: + /// + /// for (SparseSet::iterator I = Set.begin(); I != Set.end();) + /// if (test(*I)) + /// I = Set.erase(I); + /// else + /// ++I; + /// + /// Note that end() changes when elements are erased, unlike std::list. + /// + iterator erase(iterator I) { + assert(I - begin() < size() && "Invalid iterator"); + if (I != end() - 1) { + *I = Dense.back(); + unsigned BackKey = KeyOf(Dense.back()); + assert(BackKey < Universe && "Invalid key in set. Did object mutate?"); + Sparse[BackKey] = I - begin(); + } + // This depends on SmallVector::pop_back() not invalidating iterators. + // std::vector::pop_back() doesn't give that guarantee. + Dense.pop_back(); + return I; + } + + /// erase - Erases an element identified by Key, if it exists. + /// + /// @param Key The key identifying the element to erase. + /// @returns True when an element was erased, false if no element was found. + /// + bool erase(unsigned Key) { + iterator I = find(Key); + if (I == end()) + return false; + erase(I); + return true; + } + +}; + +} // end namespace llvm + +#endif diff --git a/unittests/ADT/SparseSetTest.cpp b/unittests/ADT/SparseSetTest.cpp new file mode 100644 index 00000000000..981d52e3a26 --- /dev/null +++ b/unittests/ADT/SparseSetTest.cpp @@ -0,0 +1,186 @@ +//===------ ADT/SparseSetTest.cpp - SparseSet unit tests - -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SparseSet.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +typedef SparseSet USet; + +// Empty set tests +TEST(SparseSetTest, EmptySet) { + USet Set; + EXPECT_TRUE(Set.empty()); + EXPECT_TRUE(Set.begin() == Set.end()); + EXPECT_EQ(0u, Set.size()); + + Set.setUniverse(10); + + // Lookups on empty set. + EXPECT_TRUE(Set.find(0) == Set.end()); + EXPECT_TRUE(Set.find(9) == Set.end()); + + // Same thing on a const reference. + const USet &CSet = Set; + EXPECT_TRUE(CSet.empty()); + EXPECT_TRUE(CSet.begin() == CSet.end()); + EXPECT_EQ(0u, CSet.size()); + EXPECT_TRUE(CSet.find(0) == CSet.end()); + USet::const_iterator I = CSet.find(5); + EXPECT_TRUE(I == CSet.end()); +} + +// Single entry set tests +TEST(SparseSetTest, SingleEntrySet) { + USet Set; + Set.setUniverse(10); + std::pair IP = Set.insert(5); + EXPECT_TRUE(IP.second); + EXPECT_TRUE(IP.first == Set.begin()); + + EXPECT_FALSE(Set.empty()); + EXPECT_FALSE(Set.begin() == Set.end()); + EXPECT_TRUE(Set.begin() + 1 == Set.end()); + EXPECT_EQ(1u, Set.size()); + + EXPECT_TRUE(Set.find(0) == Set.end()); + EXPECT_TRUE(Set.find(9) == Set.end()); + + EXPECT_FALSE(Set.count(0)); + EXPECT_TRUE(Set.count(5)); + + // Redundant insert. + IP = Set.insert(5); + EXPECT_FALSE(IP.second); + EXPECT_TRUE(IP.first == Set.begin()); + + // Erase non-existant element. + EXPECT_FALSE(Set.erase(1)); + EXPECT_EQ(1u, Set.size()); + EXPECT_EQ(5u, *Set.begin()); + + // Erase iterator. + USet::iterator I = Set.find(5); + EXPECT_TRUE(I == Set.begin()); + I = Set.erase(I); + EXPECT_TRUE(I == Set.end()); + EXPECT_TRUE(Set.empty()); +} + +// Multiple entry set tests +TEST(SparseSetTest, MultipleEntrySet) { + USet Set; + Set.setUniverse(10); + + Set.insert(5); + Set.insert(3); + Set.insert(2); + Set.insert(1); + Set.insert(4); + EXPECT_EQ(5u, Set.size()); + + // Without deletions, iteration order == insertion order. + USet::const_iterator I = Set.begin(); + EXPECT_EQ(5u, *I); + ++I; + EXPECT_EQ(3u, *I); + ++I; + EXPECT_EQ(2u, *I); + ++I; + EXPECT_EQ(1u, *I); + ++I; + EXPECT_EQ(4u, *I); + ++I; + EXPECT_TRUE(I == Set.end()); + + // Redundant insert. + std::pair IP = Set.insert(3); + EXPECT_FALSE(IP.second); + EXPECT_TRUE(IP.first == Set.begin() + 1); + + // Erase last element by key. + EXPECT_TRUE(Set.erase(4)); + EXPECT_EQ(4u, Set.size()); + EXPECT_FALSE(Set.count(4)); + EXPECT_FALSE(Set.erase(4)); + EXPECT_EQ(4u, Set.size()); + EXPECT_FALSE(Set.count(4)); + + // Erase first element by key. + EXPECT_TRUE(Set.count(5)); + EXPECT_TRUE(Set.find(5) == Set.begin()); + EXPECT_TRUE(Set.erase(5)); + EXPECT_EQ(3u, Set.size()); + EXPECT_FALSE(Set.count(5)); + EXPECT_FALSE(Set.erase(5)); + EXPECT_EQ(3u, Set.size()); + EXPECT_FALSE(Set.count(5)); + + Set.insert(6); + Set.insert(7); + EXPECT_EQ(5u, Set.size()); + + // Erase last element by iterator. + I = Set.erase(Set.end() - 1); + EXPECT_TRUE(I == Set.end()); + EXPECT_EQ(4u, Set.size()); + + // Erase second element by iterator. + I = Set.erase(Set.begin() + 1); + EXPECT_TRUE(I == Set.begin() + 1); + + // Clear and resize the universe. + Set.clear(); + EXPECT_FALSE(Set.count(5)); + Set.setUniverse(1000); + + // Add more than 256 elements. + for (unsigned i = 100; i != 800; ++i) + Set.insert(i); + + for (unsigned i = 0; i != 10; ++i) + Set.erase(i); + + for (unsigned i = 100; i != 800; ++i) + EXPECT_TRUE(Set.count(i)); + + EXPECT_FALSE(Set.count(99)); + EXPECT_FALSE(Set.count(800)); + EXPECT_EQ(700u, Set.size()); +} + +struct Alt { + unsigned Value; + explicit Alt(unsigned x) : Value(x) {} + unsigned getSparseSetKey() const { return Value - 1000; } +}; + +TEST(SparseSetTest, AltStructSet) { + typedef SparseSet ASet; + ASet Set; + Set.setUniverse(10); + Set.insert(Alt(1005)); + + ASet::iterator I = Set.find(5); + ASSERT_TRUE(I == Set.begin()); + EXPECT_EQ(1005u, I->Value); + + Set.insert(Alt(1006)); + Set.insert(Alt(1006)); + I = Set.erase(Set.begin()); + ASSERT_TRUE(I == Set.begin()); + EXPECT_EQ(1006u, I->Value); + + EXPECT_FALSE(Set.erase(5)); + EXPECT_TRUE(Set.erase(6)); +} +} // namespace diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 8420cd4e101..c6a904ee45d 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -71,6 +71,7 @@ add_llvm_unittest(ADT ADT/SmallStringTest.cpp ADT/SmallVectorTest.cpp ADT/SparseBitVectorTest.cpp + ADT/SparseSetTest.cpp ADT/StringMapTest.cpp ADT/StringRefTest.cpp ADT/TripleTest.cpp -- 2.34.1