//
// The LLVM Compiler Infrastructure
//
-// This file was developed by Anton Korobeynikov and is distributed under
-// the University of Illinois Open Source License. See LICENSE.TXT for details.
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
#ifndef LLVM_ADT_TRIE_H
#define LLVM_ADT_TRIE_H
-#include <map>
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/DOTGraphTraits.h"
+
+#include <cassert>
#include <vector>
namespace llvm {
// FIXME:
// - Labels are usually small, maybe it's better to use SmallString
-// - Something efficient for child storage
// - Should we use char* during construction?
-// - GraphTraits interface
-// - Eliminate Edge class, which is ok for debugging, but not for end code
+// - Should we templatize Empty with traits-like interface?
template<class Payload>
class Trie {
- class Edge;
- class Node;
-
- class Edge {
- std::string Label;
- Node *Parent, *Child;
+ friend class GraphTraits<Trie<Payload> >;
+ friend class DOTGraphTraits<Trie<Payload> >;
+public:
+ class Node {
+ friend class Trie;
public:
- typedef enum {
+ typedef std::vector<Node*> NodeVectorType;
+ typedef typename NodeVectorType::iterator iterator;
+ typedef typename NodeVectorType::const_iterator const_iterator;
+
+ private:
+ enum QueryResult {
Same = -3,
StringIsPrefix = -2,
LabelIsPrefix = -1,
DontMatch = 0,
HaveCommonPart
- } QueryResult;
+ };
- inline explicit Edge(std::string label = "",
- Node* parent = NULL, Node* child = NULL):
- Label(label), Parent(parent), Child(child) { }
+ struct NodeCmp {
+ bool operator() (Node* N1, Node* N2) {
+ return (N1->Label[0] < N2->Label[0]);
+ }
+ bool operator() (Node* N, char Id) {
+ return (N->Label[0] < Id);
+ }
+ };
- inline void setParent(Node* parent) { Parent = parent; }
- inline Node* getParent() const { return Parent; }
- inline void setChild(Node* child) { Child = child; }
- inline Node* getChild() const { return Child; }
- inline void setLabel(const std::string& label) { Label = label; }
- inline const std::string& getLabel() const { return Label; }
+ std::string Label;
+ Payload Data;
+ NodeVectorType Children;
+
+ // Do not implement
+ Node(const Node&);
+ Node& operator=(const Node&);
+
+ inline void addEdge(Node* N) {
+ if (Children.empty())
+ Children.push_back(N);
+ else {
+ iterator I = std::lower_bound(Children.begin(), Children.end(),
+ N, NodeCmp());
+ // FIXME: no dups are allowed
+ Children.insert(I, N);
+ }
+ }
- QueryResult query(const std::string& string) const {
+ inline void setEdge(Node* N) {
+ char Id = N->Label[0];
+ iterator I = std::lower_bound(Children.begin(), Children.end(),
+ Id, NodeCmp());
+ assert(I != Children.end() && "Node does not exists!");
+ *I = N;
+ }
+
+ QueryResult query(const std::string& s) const {
unsigned i, l;
- unsigned l1 = string.length();
+ unsigned l1 = s.length();
unsigned l2 = Label.length();
// Find the length of common part
l = std::min(l1, l2);
i = 0;
- while ((i < l) && (string[i] == Label[i]))
+ while ((i < l) && (s[i] == Label[i]))
++i;
if (i == l) { // One is prefix of another, find who is who
return StringIsPrefix;
else
return LabelIsPrefix;
- } else // String and Label just have common part, return its length
+ } else // s and Label have common (possible empty) part, return its length
return (QueryResult)i;
}
- };
-
- class Node {
- friend class Trie;
- std::map<char, Edge> Edges;
- Payload Data;
public:
- inline explicit Node(const Payload& data):Data(data) { }
- inline Node(const Node& n) {
- Data = n.Data;
- Edges = n.Edges;
+ inline explicit Node(const Payload& data, const std::string& label = ""):
+ Label(label), Data(data) { }
+
+ inline const Payload& data() const { return Data; }
+ inline void setData(const Payload& data) { Data = data; }
+
+ inline const std::string& label() const { return Label; }
+
+#if 0
+ inline void dump() {
+ llvm::cerr << "Node: " << this << "\n"
+ << "Label: " << Label << "\n"
+ << "Children:\n";
+
+ for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
+ llvm::cerr << (*I)->Label << "\n";
}
- inline Node& operator=(const Node& n) {
- if (&n != this) {
- Data = n.Data;
- Edges = n.Edges;
- }
+#endif
- return *this;
+ inline Node* getEdge(char Id) {
+ Node* fNode = NULL;
+ iterator I = std::lower_bound(Children.begin(), Children.end(),
+ Id, NodeCmp());
+ if (I != Children.end() && (*I)->Label[0] == Id)
+ fNode = *I;
+
+ return fNode;
}
- inline bool isLeaf() const { return Edges.empty(); }
+ inline iterator begin() { return Children.begin(); }
+ inline const_iterator begin() const { return Children.begin(); }
+ inline iterator end () { return Children.end(); }
+ inline const_iterator end () const { return Children.end(); }
- inline const Payload& getData() const { return Data; }
- inline void setData(const Payload& data) { Data = data; }
+ inline size_t size () const { return Children.size(); }
+ inline bool empty() const { return Children.empty(); }
+ inline const Node* &front() const { return Children.front(); }
+ inline Node* &front() { return Children.front(); }
+ inline const Node* &back() const { return Children.back(); }
+ inline Node* &back() { return Children.back(); }
- inline Edge* addEdge(const std::string& Label) {
- if (!Edges.insert(std::make_pair(Label[0],
- Edge(Label, this))).second) {
- assert(0 && "Edge already exists!");
- return NULL;
- } else
- return &Edges[Label[0]];
- }
};
+private:
std::vector<Node*> Nodes;
Payload Empty;
- inline Node* addNode(const Payload& data) {
- Node* N = new Node(data);
+ inline Node* addNode(const Payload& data, const std::string label = "") {
+ Node* N = new Node(data, label);
Nodes.push_back(N);
return N;
}
- inline Node* splitEdge(Edge& cEdge, size_t index) {
- const std::string& l = cEdge.getLabel();
- assert(index < l.length() && "Trying to split too far!");
+ inline Node* splitEdge(Node* N, char Id, size_t index) {
+ Node* eNode = N->getEdge(Id);
+ assert(eNode && "Node doesn't exist");
+ const std::string &l = eNode->Label;
+ assert(index > 0 && index < l.length() && "Trying to split too far!");
std::string l1 = l.substr(0, index);
std::string l2 = l.substr(index);
- Node* nNode = addNode(Empty);
- Edge* nEdge = nNode->addEdge(l2);
- nEdge->setChild(cEdge.getChild());
- cEdge.setChild(nNode);
- cEdge.setLabel(l1);
+ Node* nNode = addNode(Empty, l1);
+ N->setEdge(nNode);
+
+ eNode->Label = l2;
+ nNode->addEdge(eNode);
return nNode;
}
+ // Do not implement
+ Trie(const Trie&);
+ Trie& operator=(const Trie&);
+
public:
inline explicit Trie(const Payload& empty):Empty(empty) {
addNode(Empty);
inline Node* getRoot() const { return Nodes[0]; }
- bool addString(const std::string& s, const Payload& data) {
- Node* cNode = getRoot();
- Edge* nEdge = NULL;
- std::string s1(s);
-
- while (nEdge == NULL) {
- if (cNode->Edges.count(s1[0])) {
- Edge& cEdge = cNode->Edges[s1[0]];
- typename Edge::QueryResult r = cEdge.query(s1);
-
- switch (r) {
- case Edge::Same:
- case Edge::StringIsPrefix:
- case Edge::DontMatch:
- assert(0 && "Impossible!");
- return false;
- case Edge::LabelIsPrefix:
- s1 = s1.substr(cEdge.getLabel().length());
- cNode = cEdge.getChild();
- break;
- default:
- nEdge = splitEdge(cEdge, r)->addEdge(s1.substr(r));
- }
- } else
- nEdge = cNode->addEdge(s1);
- }
+ bool addString(const std::string& s, const Payload& data);
+ const Payload& lookup(const std::string& s) const;
- Node* tNode = addNode(data);
- nEdge->setChild(tNode);
+};
- return true;
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template<class Payload>
+bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
+ Node* cNode = getRoot();
+ Node* tNode = NULL;
+ std::string s1(s);
+
+ while (tNode == NULL) {
+ char Id = s1[0];
+ if (Node* nNode = cNode->getEdge(Id)) {
+ typename Node::QueryResult r = nNode->query(s1);
+
+ switch (r) {
+ case Node::Same:
+ case Node::StringIsPrefix:
+ // Currently we don't allow to have two strings in the trie one
+ // being a prefix of another. This should be fixed.
+ assert(0 && "FIXME!");
+ return false;
+ case Node::DontMatch:
+ assert(0 && "Impossible!");
+ return false;
+ case Node::LabelIsPrefix:
+ s1 = s1.substr(nNode->label().length());
+ cNode = nNode;
+ break;
+ default:
+ nNode = splitEdge(cNode, Id, r);
+ tNode = addNode(data, s1.substr(r));
+ nNode->addEdge(tNode);
+ }
+ } else {
+ tNode = addNode(data, s1);
+ cNode->addEdge(tNode);
+ }
}
- const Payload& lookup(const std::string& s) const {
- Node* cNode = getRoot();
- Node* tNode = NULL;
- std::string s1(s);
-
- while (tNode == NULL) {
- if (cNode->Edges.count(s1[0])) {
- Edge& cEdge = cNode->Edges[s1[0]];
- typename Edge::QueryResult r = cEdge.query(s1);
-
- switch (r) {
- case Edge::Same:
- tNode = cEdge.getChild();
- break;
- case Edge::StringIsPrefix:
- return Empty;
- case Edge::DontMatch:
- assert(0 && "Impossible!");
- return Empty;
- case Edge::LabelIsPrefix:
- s1 = s1.substr(cEdge.getLabel().length());
- cNode = cEdge.getChild();
- break;
- default:
- return Empty;
- }
- } else
+ return true;
+}
+
+template<class Payload>
+const Payload& Trie<Payload>::lookup(const std::string& s) const {
+ Node* cNode = getRoot();
+ Node* tNode = NULL;
+ std::string s1(s);
+
+ while (tNode == NULL) {
+ char Id = s1[0];
+ if (Node* nNode = cNode->getEdge(Id)) {
+ typename Node::QueryResult r = nNode->query(s1);
+
+ switch (r) {
+ case Node::Same:
+ tNode = nNode;
+ break;
+ case Node::StringIsPrefix:
return Empty;
- }
+ case Node::DontMatch:
+ assert(0 && "Impossible!");
+ return Empty;
+ case Node::LabelIsPrefix:
+ s1 = s1.substr(nNode->label().length());
+ cNode = nNode;
+ break;
+ default:
+ return Empty;
+ }
+ } else
+ return Empty;
+ }
+
+ return tNode->data();
+}
+
+template<class Payload>
+struct GraphTraits<Trie<Payload> > {
+ typedef Trie<Payload> TrieType;
+ typedef typename TrieType::Node NodeType;
+ typedef typename NodeType::iterator ChildIteratorType;
+
+ static inline NodeType *getEntryNode(const TrieType& T) {
+ return T.getRoot();
+ }
+
+ static inline ChildIteratorType child_begin(NodeType *N) {
+ return N->begin();
+ }
+ static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+
+ typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
- return tNode->getData();
+ static inline nodes_iterator nodes_begin(const TrieType& G) {
+ return G.Nodes.begin();
+ }
+ static inline nodes_iterator nodes_end(const TrieType& G) {
+ return G.Nodes.end();
}
};
-}
+template<class Payload>
+struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
+ typedef typename Trie<Payload>::Node NodeType;
+ typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
+
+ static std::string getGraphName(const Trie<Payload>& T) {
+ return "Trie";
+ }
+
+ static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
+ if (T.getRoot() == Node)
+ return "<Root>";
+ else
+ return Node->label();
+ }
+
+ static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
+ NodeType* N = *I;
+ return N->label().substr(0, 1);
+ }
+
+ static std::string getNodeAttributes(const NodeType* Node,
+ const Trie<Payload>& T) {
+ if (Node->data() != T.Empty)
+ return "color=blue";
+
+ return "";
+ }
+
+};
+
+} // end of llvm namespace
#endif // LLVM_ADT_TRIE_H