X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=include%2Fllvm%2FADT%2FTrie.h;h=70f3b4154d39986d794894b634df0a1d5fc8f979;hb=83b5752747ea14696b0e51904722c38771f22eb7;hp=30a9f56e1b165c8866a811d416a5bf9a45aabbfb;hpb=64735ccb2b8dd773c1311725cb39951d6088c165;p=oota-llvm.git diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h index 30a9f56e1b1..70f3b4154d3 100644 --- a/include/llvm/ADT/Trie.h +++ b/include/llvm/ADT/Trie.h @@ -2,8 +2,8 @@ // // The LLVM Compiler Infrastructure // -// This file was developed by Anton Korobeynikov and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // @@ -15,56 +15,85 @@ #ifndef LLVM_ADT_TRIE_H #define LLVM_ADT_TRIE_H -#include +#include "llvm/ADT/GraphTraits.h" +#include "llvm/Support/DOTGraphTraits.h" + #include namespace llvm { // FIXME: // - Labels are usually small, maybe it's better to use SmallString -// - Something efficient for child storage // - Should we use char* during construction? -// - GraphTraits interface -// - Eliminate Edge class, which is ok for debugging, but not for end code +// - Should we templatize Empty with traits-like interface? template class Trie { - class Edge; - class Node; - - class Edge { - std::string Label; - Node *Parent, *Child; + friend class GraphTraits >; + friend class DOTGraphTraits >; +public: + class Node { + friend class Trie; public: - typedef enum { + typedef std::vector NodeVectorType; + typedef typename NodeVectorType::iterator iterator; + typedef typename NodeVectorType::const_iterator const_iterator; + + private: + enum QueryResult { Same = -3, StringIsPrefix = -2, LabelIsPrefix = -1, DontMatch = 0, HaveCommonPart - } QueryResult; + }; - inline explicit Edge(std::string label = "", - Node* parent = NULL, Node* child = NULL): - Label(label), Parent(parent), Child(child) { } + struct NodeCmp { + bool operator() (Node* N1, Node* N2) { + return (N1->Label[0] < N2->Label[0]); + } + bool operator() (Node* N, char Id) { + return (N->Label[0] < Id); + } + }; - inline void setParent(Node* parent) { Parent = parent; } - inline Node* getParent() const { return Parent; } - inline void setChild(Node* child) { Child = child; } - inline Node* getChild() const { return Child; } - inline void setLabel(const std::string& label) { Label = label; } - inline const std::string& getLabel() const { return Label; } + std::string Label; + Payload Data; + NodeVectorType Children; + + // Do not implement + Node(const Node&); + Node& operator=(const Node&); + + inline void addEdge(Node* N) { + if (Children.empty()) + Children.push_back(N); + else { + iterator I = std::lower_bound(Children.begin(), Children.end(), + N, NodeCmp()); + // FIXME: no dups are allowed + Children.insert(I, N); + } + } - QueryResult query(const std::string& string) const { + inline void setEdge(Node* N) { + char Id = N->Label[0]; + iterator I = std::lower_bound(Children.begin(), Children.end(), + Id, NodeCmp()); + assert(I != Children.end() && "Node does not exists!"); + *I = N; + } + + QueryResult query(const std::string& s) const { unsigned i, l; - unsigned l1 = string.length(); + unsigned l1 = s.length(); unsigned l2 = Label.length(); // Find the length of common part l = std::min(l1, l2); i = 0; - while ((i < l) && (string[i] == Label[i])) + while ((i < l) && (s[i] == Label[i])) ++i; if (i == l) { // One is prefix of another, find who is who @@ -74,71 +103,86 @@ class Trie { return StringIsPrefix; else return LabelIsPrefix; - } else // String and Label just have common part, return its length + } else // s and Label have common (possible empty) part, return its length return (QueryResult)i; } - }; - - class Node { - friend class Trie; - std::map Edges; - Payload Data; public: - inline explicit Node(const Payload& data):Data(data) { } - inline Node(const Node& n) { - Data = n.Data; - Edges = n.Edges; + inline explicit Node(const Payload& data, const std::string& label = ""): + Label(label), Data(data) { } + + inline const Payload& data() const { return Data; } + inline void setData(const Payload& data) { Data = data; } + + inline const std::string& label() const { return Label; } + +#if 0 + inline void dump() { + std::cerr << "Node: " << this << "\n" + << "Label: " << Label << "\n" + << "Children:\n"; + + for (iterator I = Children.begin(), E = Children.end(); I != E; ++I) + std::cerr << (*I)->Label << "\n"; } - inline Node& operator=(const Node& n) { - if (&n != this) { - Data = n.Data; - Edges = n.Edges; - } +#endif - return *this; + inline Node* getEdge(char Id) { + Node* fNode = NULL; + iterator I = std::lower_bound(Children.begin(), Children.end(), + Id, NodeCmp()); + if (I != Children.end() && (*I)->Label[0] == Id) + fNode = *I; + + return fNode; } - inline bool isLeaf() const { return Edges.empty(); } + inline iterator begin() { return Children.begin(); } + inline const_iterator begin() const { return Children.begin(); } + inline iterator end () { return Children.end(); } + inline const_iterator end () const { return Children.end(); } - inline const Payload& getData() const { return Data; } - inline void setData(const Payload& data) { Data = data; } + inline size_t size () const { return Children.size(); } + inline bool empty() const { return Children.empty(); } + inline const Node* &front() const { return Children.front(); } + inline Node* &front() { return Children.front(); } + inline const Node* &back() const { return Children.back(); } + inline Node* &back() { return Children.back(); } - inline Edge* addEdge(const std::string& Label) { - if (!Edges.insert(std::make_pair(Label[0], - Edge(Label, this))).second) { - assert(0 && "Edge already exists!"); - return NULL; - } else - return &Edges[Label[0]]; - } }; +private: std::vector Nodes; Payload Empty; - inline Node* addNode(const Payload& data) { - Node* N = new Node(data); + inline Node* addNode(const Payload& data, const std::string label = "") { + Node* N = new Node(data, label); Nodes.push_back(N); return N; } - inline Node* splitEdge(Edge& cEdge, size_t index) { - const std::string& l = cEdge.getLabel(); - assert(index < l.length() && "Trying to split too far!"); + inline Node* splitEdge(Node* N, char Id, size_t index) { + Node* eNode = N->getEdge(Id); + assert(eNode && "Node doesn't exist"); + const std::string &l = eNode->Label; + assert(index > 0 && index < l.length() && "Trying to split too far!"); std::string l1 = l.substr(0, index); std::string l2 = l.substr(index); - Node* nNode = addNode(Empty); - Edge* nEdge = nNode->addEdge(l2); - nEdge->setChild(cEdge.getChild()); - cEdge.setChild(nNode); - cEdge.setLabel(l1); + Node* nNode = addNode(Empty, l1); + N->setEdge(nNode); + + eNode->Label = l2; + nNode->addEdge(eNode); return nNode; } + // Do not implement + Trie(const Trie&); + Trie& operator=(const Trie&); + public: inline explicit Trie(const Payload& empty):Empty(empty) { addNode(Empty); @@ -150,74 +194,142 @@ public: inline Node* getRoot() const { return Nodes[0]; } - bool addString(const std::string& s, const Payload& data) { - Node* cNode = getRoot(); - Edge* nEdge = NULL; - std::string s1(s); - - while (nEdge == NULL) { - if (cNode->Edges.count(s1[0])) { - Edge& cEdge = cNode->Edges[s1[0]]; - typename Edge::QueryResult r = cEdge.query(s1); - - switch (r) { - case Edge::Same: - case Edge::StringIsPrefix: - case Edge::DontMatch: - assert(0 && "Impossible!"); - return false; - case Edge::LabelIsPrefix: - s1 = s1.substr(cEdge.getLabel().length()); - cNode = cEdge.getChild(); - break; - default: - nEdge = splitEdge(cEdge, r)->addEdge(s1.substr(r)); - } - } else - nEdge = cNode->addEdge(s1); - } + bool addString(const std::string& s, const Payload& data); + const Payload& lookup(const std::string& s) const; - Node* tNode = addNode(data); - nEdge->setChild(tNode); +}; - return true; +// Define this out-of-line to dissuade the C++ compiler from inlining it. +template +bool Trie::addString(const std::string& s, const Payload& data) { + Node* cNode = getRoot(); + Node* tNode = NULL; + std::string s1(s); + + while (tNode == NULL) { + char Id = s1[0]; + if (Node* nNode = cNode->getEdge(Id)) { + typename Node::QueryResult r = nNode->query(s1); + + switch (r) { + case Node::Same: + case Node::StringIsPrefix: + // Currently we don't allow to have two strings in the trie one + // being a prefix of another. This should be fixed. + assert(0 && "FIXME!"); + return false; + case Node::DontMatch: + assert(0 && "Impossible!"); + return false; + case Node::LabelIsPrefix: + s1 = s1.substr(nNode->label().length()); + cNode = nNode; + break; + default: + nNode = splitEdge(cNode, Id, r); + tNode = addNode(data, s1.substr(r)); + nNode->addEdge(tNode); + } + } else { + tNode = addNode(data, s1); + cNode->addEdge(tNode); + } } - const Payload& lookup(const std::string& s) const { - Node* cNode = getRoot(); - Node* tNode = NULL; - std::string s1(s); - - while (tNode == NULL) { - if (cNode->Edges.count(s1[0])) { - Edge& cEdge = cNode->Edges[s1[0]]; - typename Edge::QueryResult r = cEdge.query(s1); - - switch (r) { - case Edge::Same: - tNode = cEdge.getChild(); - break; - case Edge::StringIsPrefix: - return Empty; - case Edge::DontMatch: - assert(0 && "Impossible!"); - return Empty; - case Edge::LabelIsPrefix: - s1 = s1.substr(cEdge.getLabel().length()); - cNode = cEdge.getChild(); - break; - default: - return Empty; - } - } else + return true; +} + +template +const Payload& Trie::lookup(const std::string& s) const { + Node* cNode = getRoot(); + Node* tNode = NULL; + std::string s1(s); + + while (tNode == NULL) { + char Id = s1[0]; + if (Node* nNode = cNode->getEdge(Id)) { + typename Node::QueryResult r = nNode->query(s1); + + switch (r) { + case Node::Same: + tNode = nNode; + break; + case Node::StringIsPrefix: return Empty; - } + case Node::DontMatch: + assert(0 && "Impossible!"); + return Empty; + case Node::LabelIsPrefix: + s1 = s1.substr(nNode->label().length()); + cNode = nNode; + break; + default: + return Empty; + } + } else + return Empty; + } + + return tNode->data(); +} + +template +struct GraphTraits > { + typedef Trie TrieType; + typedef typename TrieType::Node NodeType; + typedef typename NodeType::iterator ChildIteratorType; + + static inline NodeType *getEntryNode(const TrieType& T) { + return T.getRoot(); + } + + static inline ChildIteratorType child_begin(NodeType *N) { + return N->begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { return N->end(); } + + typedef typename std::vector::const_iterator nodes_iterator; - return tNode->getData(); + static inline nodes_iterator nodes_begin(const TrieType& G) { + return G.Nodes.begin(); + } + static inline nodes_iterator nodes_end(const TrieType& G) { + return G.Nodes.end(); } }; -} +template +struct DOTGraphTraits > : public DefaultDOTGraphTraits { + typedef typename Trie::Node NodeType; + typedef typename GraphTraits >::ChildIteratorType EdgeIter; + + static std::string getGraphName(const Trie& T) { + return "Trie"; + } + + static std::string getNodeLabel(NodeType* Node, const Trie& T) { + if (T.getRoot() == Node) + return ""; + else + return Node->label(); + } + + static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) { + NodeType* N = *I; + return N->label().substr(0, 1); + } + + static std::string getNodeAttributes(const NodeType* Node, + const Trie& T) { + if (Node->data() != T.Empty) + return "color=blue"; + + return ""; + } + +}; + +} // end of llvm namespace #endif // LLVM_ADT_TRIE_H