1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
18 #include "llvm/ADT/GraphTraits.h"
19 #include "llvm/Support/DOTGraphTraits.h"
26 // - Labels are usually small, maybe it's better to use SmallString
27 // - Should we use char* during construction?
28 // - Should we templatize Empty with traits-like interface?
30 template<class Payload>
32 friend class GraphTraits<Trie<Payload> >;
33 friend class DOTGraphTraits<Trie<Payload> >;
39 typedef std::vector<Node*> NodeVectorType;
40 typedef typename NodeVectorType::iterator iterator;
41 typedef typename NodeVectorType::const_iterator const_iterator;
53 bool operator() (Node* N1, Node* N2) {
54 return (N1->Label[0] < N2->Label[0]);
56 bool operator() (Node* N, char Id) {
57 return (N->Label[0] < Id);
63 NodeVectorType Children;
67 Node& operator=(const Node&);
69 inline void addEdge(Node* N) {
71 Children.push_back(N);
73 iterator I = std::lower_bound(Children.begin(), Children.end(),
75 // FIXME: no dups are allowed
76 Children.insert(I, N);
80 inline void setEdge(Node* N) {
81 char Id = N->Label[0];
82 iterator I = std::lower_bound(Children.begin(), Children.end(),
84 assert(I != Children.end() && "Node does not exists!");
88 QueryResult query(const std::string& s) const {
90 unsigned l1 = s.length();
91 unsigned l2 = Label.length();
93 // Find the length of common part
96 while ((i < l) && (s[i] == Label[i]))
99 if (i == l) { // One is prefix of another, find who is who
103 return StringIsPrefix;
105 return LabelIsPrefix;
106 } else // s and Label have common (possible empty) part, return its length
107 return (QueryResult)i;
111 inline explicit Node(const Payload& data, const std::string& label = ""):
112 Label(label), Data(data) { }
114 inline const Payload& data() const { return Data; }
115 inline void setData(const Payload& data) { Data = data; }
117 inline const std::string& label() const { return Label; }
121 llvm::cerr << "Node: " << this << "\n"
122 << "Label: " << Label << "\n"
125 for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
126 llvm::cerr << (*I)->Label << "\n";
130 inline Node* getEdge(char Id) {
132 iterator I = std::lower_bound(Children.begin(), Children.end(),
134 if (I != Children.end() && (*I)->Label[0] == Id)
140 inline iterator begin() { return Children.begin(); }
141 inline const_iterator begin() const { return Children.begin(); }
142 inline iterator end () { return Children.end(); }
143 inline const_iterator end () const { return Children.end(); }
145 inline size_t size () const { return Children.size(); }
146 inline bool empty() const { return Children.empty(); }
147 inline const Node* &front() const { return Children.front(); }
148 inline Node* &front() { return Children.front(); }
149 inline const Node* &back() const { return Children.back(); }
150 inline Node* &back() { return Children.back(); }
155 std::vector<Node*> Nodes;
158 inline Node* addNode(const Payload& data, const std::string label = "") {
159 Node* N = new Node(data, label);
164 inline Node* splitEdge(Node* N, char Id, size_t index) {
165 Node* eNode = N->getEdge(Id);
166 assert(eNode && "Node doesn't exist");
168 const std::string &l = eNode->Label;
169 assert(index > 0 && index < l.length() && "Trying to split too far!");
170 std::string l1 = l.substr(0, index);
171 std::string l2 = l.substr(index);
173 Node* nNode = addNode(Empty, l1);
177 nNode->addEdge(eNode);
184 Trie& operator=(const Trie&);
187 inline explicit Trie(const Payload& empty):Empty(empty) {
191 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
195 inline Node* getRoot() const { return Nodes[0]; }
197 bool addString(const std::string& s, const Payload& data);
198 const Payload& lookup(const std::string& s) const;
202 // Define this out-of-line to dissuade the C++ compiler from inlining it.
203 template<class Payload>
204 bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
205 Node* cNode = getRoot();
209 while (tNode == NULL) {
211 if (Node* nNode = cNode->getEdge(Id)) {
212 typename Node::QueryResult r = nNode->query(s1);
216 case Node::StringIsPrefix:
217 // Currently we don't allow to have two strings in the trie one
218 // being a prefix of another. This should be fixed.
219 assert(0 && "FIXME!");
221 case Node::DontMatch:
222 assert(0 && "Impossible!");
224 case Node::LabelIsPrefix:
225 s1 = s1.substr(nNode->label().length());
229 nNode = splitEdge(cNode, Id, r);
230 tNode = addNode(data, s1.substr(r));
231 nNode->addEdge(tNode);
234 tNode = addNode(data, s1);
235 cNode->addEdge(tNode);
242 template<class Payload>
243 const Payload& Trie<Payload>::lookup(const std::string& s) const {
244 Node* cNode = getRoot();
248 while (tNode == NULL) {
250 if (Node* nNode = cNode->getEdge(Id)) {
251 typename Node::QueryResult r = nNode->query(s1);
257 case Node::StringIsPrefix:
259 case Node::DontMatch:
260 assert(0 && "Impossible!");
262 case Node::LabelIsPrefix:
263 s1 = s1.substr(nNode->label().length());
273 return tNode->data();
276 template<class Payload>
277 struct GraphTraits<Trie<Payload> > {
278 typedef Trie<Payload> TrieType;
279 typedef typename TrieType::Node NodeType;
280 typedef typename NodeType::iterator ChildIteratorType;
282 static inline NodeType *getEntryNode(const TrieType& T) {
286 static inline ChildIteratorType child_begin(NodeType *N) {
289 static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
291 typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
293 static inline nodes_iterator nodes_begin(const TrieType& G) {
294 return G.Nodes.begin();
296 static inline nodes_iterator nodes_end(const TrieType& G) {
297 return G.Nodes.end();
302 template<class Payload>
303 struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
304 typedef typename Trie<Payload>::Node NodeType;
305 typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
307 static std::string getGraphName(const Trie<Payload>& T) {
311 static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T,
313 if (T.getRoot() == Node)
316 return Node->label();
319 static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
321 return N->label().substr(0, 1);
324 static std::string getNodeAttributes(const NodeType* Node,
325 const Trie<Payload>& T) {
326 if (Node->data() != T.Empty)
334 } // end of llvm namespace
336 #endif // LLVM_ADT_TRIE_H