1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
18 #include "llvm/ADT/GraphTraits.h"
19 #include "llvm/Support/DOTGraphTraits.h"
27 // - Labels are usually small, maybe it's better to use SmallString
28 // - Should we use char* during construction?
29 // - Should we templatize Empty with traits-like interface?
31 template<class Payload>
33 friend class GraphTraits<Trie<Payload> >;
34 friend class DOTGraphTraits<Trie<Payload> >;
40 typedef std::vector<Node*> NodeVectorType;
41 typedef typename NodeVectorType::iterator iterator;
42 typedef typename NodeVectorType::const_iterator const_iterator;
54 bool operator() (Node* N1, Node* N2) {
55 return (N1->Label[0] < N2->Label[0]);
57 bool operator() (Node* N, char Id) {
58 return (N->Label[0] < Id);
64 NodeVectorType Children;
68 Node& operator=(const Node&);
70 inline void addEdge(Node* N) {
72 Children.push_back(N);
74 iterator I = std::lower_bound(Children.begin(), Children.end(),
76 // FIXME: no dups are allowed
77 Children.insert(I, N);
81 inline void setEdge(Node* N) {
82 char Id = N->Label[0];
83 iterator I = std::lower_bound(Children.begin(), Children.end(),
85 assert(I != Children.end() && "Node does not exists!");
89 QueryResult query(const std::string& s) const {
91 unsigned l1 = s.length();
92 unsigned l2 = Label.length();
94 // Find the length of common part
97 while ((i < l) && (s[i] == Label[i]))
100 if (i == l) { // One is prefix of another, find who is who
104 return StringIsPrefix;
106 return LabelIsPrefix;
107 } else // s and Label have common (possible empty) part, return its length
108 return (QueryResult)i;
112 inline explicit Node(const Payload& data, const std::string& label = ""):
113 Label(label), Data(data) { }
115 inline const Payload& data() const { return Data; }
116 inline void setData(const Payload& data) { Data = data; }
118 inline const std::string& label() const { return Label; }
122 llvm::cerr << "Node: " << this << "\n"
123 << "Label: " << Label << "\n"
126 for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
127 llvm::cerr << (*I)->Label << "\n";
131 inline Node* getEdge(char Id) {
133 iterator I = std::lower_bound(Children.begin(), Children.end(),
135 if (I != Children.end() && (*I)->Label[0] == Id)
141 inline iterator begin() { return Children.begin(); }
142 inline const_iterator begin() const { return Children.begin(); }
143 inline iterator end () { return Children.end(); }
144 inline const_iterator end () const { return Children.end(); }
146 inline size_t size () const { return Children.size(); }
147 inline bool empty() const { return Children.empty(); }
148 inline const Node* &front() const { return Children.front(); }
149 inline Node* &front() { return Children.front(); }
150 inline const Node* &back() const { return Children.back(); }
151 inline Node* &back() { return Children.back(); }
156 std::vector<Node*> Nodes;
159 inline Node* addNode(const Payload& data, const std::string label = "") {
160 Node* N = new Node(data, label);
165 inline Node* splitEdge(Node* N, char Id, size_t index) {
166 Node* eNode = N->getEdge(Id);
167 assert(eNode && "Node doesn't exist");
169 const std::string &l = eNode->Label;
170 assert(index > 0 && index < l.length() && "Trying to split too far!");
171 std::string l1 = l.substr(0, index);
172 std::string l2 = l.substr(index);
174 Node* nNode = addNode(Empty, l1);
178 nNode->addEdge(eNode);
185 Trie& operator=(const Trie&);
188 inline explicit Trie(const Payload& empty):Empty(empty) {
192 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
196 inline Node* getRoot() const { return Nodes[0]; }
198 bool addString(const std::string& s, const Payload& data);
199 const Payload& lookup(const std::string& s) const;
203 // Define this out-of-line to dissuade the C++ compiler from inlining it.
204 template<class Payload>
205 bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
206 Node* cNode = getRoot();
210 while (tNode == NULL) {
212 if (Node* nNode = cNode->getEdge(Id)) {
213 typename Node::QueryResult r = nNode->query(s1);
217 case Node::StringIsPrefix:
218 // Currently we don't allow to have two strings in the trie one
219 // being a prefix of another. This should be fixed.
220 assert(0 && "FIXME!");
222 case Node::DontMatch:
223 assert(0 && "Impossible!");
225 case Node::LabelIsPrefix:
226 s1 = s1.substr(nNode->label().length());
230 nNode = splitEdge(cNode, Id, r);
231 tNode = addNode(data, s1.substr(r));
232 nNode->addEdge(tNode);
235 tNode = addNode(data, s1);
236 cNode->addEdge(tNode);
243 template<class Payload>
244 const Payload& Trie<Payload>::lookup(const std::string& s) const {
245 Node* cNode = getRoot();
249 while (tNode == NULL) {
251 if (Node* nNode = cNode->getEdge(Id)) {
252 typename Node::QueryResult r = nNode->query(s1);
258 case Node::StringIsPrefix:
260 case Node::DontMatch:
261 assert(0 && "Impossible!");
263 case Node::LabelIsPrefix:
264 s1 = s1.substr(nNode->label().length());
274 return tNode->data();
277 template<class Payload>
278 struct GraphTraits<Trie<Payload> > {
279 typedef Trie<Payload> TrieType;
280 typedef typename TrieType::Node NodeType;
281 typedef typename NodeType::iterator ChildIteratorType;
283 static inline NodeType *getEntryNode(const TrieType& T) {
287 static inline ChildIteratorType child_begin(NodeType *N) {
290 static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
292 typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
294 static inline nodes_iterator nodes_begin(const TrieType& G) {
295 return G.Nodes.begin();
297 static inline nodes_iterator nodes_end(const TrieType& G) {
298 return G.Nodes.end();
303 template<class Payload>
304 struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
305 typedef typename Trie<Payload>::Node NodeType;
306 typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
308 static std::string getGraphName(const Trie<Payload>& T) {
312 static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
313 if (T.getRoot() == Node)
316 return Node->label();
319 static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
321 return N->label().substr(0, 1);
324 static std::string getNodeAttributes(const NodeType* Node,
325 const Trie<Payload>& T) {
326 if (Node->data() != T.Empty)
334 } // end of llvm namespace
336 #endif // LLVM_ADT_TRIE_H