1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Anton Korobeynikov and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
18 #include "llvm/ADT/GraphTraits.h"
19 #include "llvm/Support/DOTGraphTraits.h"
26 // - Labels are usually small, maybe it's better to use SmallString
27 // - Should we use char* during construction?
28 // - Should we templatize Empty with traits-like interface?
30 template<class Payload>
32 friend class GraphTraits<Trie<Payload> >;
33 friend class DOTGraphTraits<Trie<Payload> >;
37 friend class GraphTraits<Trie<Payload> >;
46 typedef std::vector<Node*> NodeVector;
47 typedef typename std::vector<Node*>::iterator NodeVectorIter;
50 bool operator() (Node* N1, Node* N2) {
51 return (N1->Label[0] < N2->Label[0]);
53 bool operator() (Node* N, char Id) {
54 return (N->Label[0] < Id);
64 Node& operator=(const Node&);
66 inline void addEdge(Node* N) {
68 Children.push_back(N);
70 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
72 // FIXME: no dups are allowed
73 Children.insert(I, N);
77 inline void setEdge(Node* N) {
78 char Id = N->Label[0];
79 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
81 assert(I != Children.end() && "Node does not exists!");
85 QueryResult query(const std::string& s) const {
87 unsigned l1 = s.length();
88 unsigned l2 = Label.length();
90 // Find the length of common part
93 while ((i < l) && (s[i] == Label[i]))
96 if (i == l) { // One is prefix of another, find who is who
100 return StringIsPrefix;
102 return LabelIsPrefix;
103 } else // s and Label have common (possible empty) part, return its length
104 return (QueryResult)i;
108 inline explicit Node(const Payload& data, const std::string& label = ""):
109 Label(label), Data(data) { }
111 inline const Payload& data() const { return Data; }
112 inline void setData(const Payload& data) { Data = data; }
114 inline const std::string& label() const { return Label; }
118 std::cerr << "Node: " << this << "\n"
119 << "Label: " << Label << "\n"
122 for (NodeVectorIter I = Children.begin(), E = Children.end(); I != E; ++I)
123 std::cerr << (*I)->Label << "\n";
127 inline Node* getEdge(char Id) {
129 NodeVectorIter I = std::lower_bound(Children.begin(), Children.end(),
131 if (I != Children.end() && (*I)->Label[0] == Id)
139 std::vector<Node*> Nodes;
142 inline Node* addNode(const Payload& data, const std::string label = "") {
143 Node* N = new Node(data, label);
148 inline Node* splitEdge(Node* N, char Id, size_t index) {
149 Node* eNode = N->getEdge(Id);
150 assert(eNode && "Node doesn't exist");
152 const std::string &l = eNode->Label;
153 assert(index > 0 && index < l.length() && "Trying to split too far!");
154 std::string l1 = l.substr(0, index);
155 std::string l2 = l.substr(index);
157 Node* nNode = addNode(Empty, l1);
161 nNode->addEdge(eNode);
168 Trie& operator=(const Trie&);
171 inline explicit Trie(const Payload& empty):Empty(empty) {
175 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
179 inline Node* getRoot() const { return Nodes[0]; }
181 bool addString(const std::string& s, const Payload& data);
182 const Payload& lookup(const std::string& s) const;
186 // Define this out-of-line to dissuade the C++ compiler from inlining it.
187 template<class Payload>
188 bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
189 Node* cNode = getRoot();
193 while (tNode == NULL) {
195 if (Node* nNode = cNode->getEdge(Id)) {
196 typename Node::QueryResult r = nNode->query(s1);
200 case Node::StringIsPrefix:
201 // Currently we don't allow to have two strings in the trie one
202 // being a prefix of another. This should be fixed.
203 assert(0 && "FIXME!");
205 case Node::DontMatch:
206 assert(0 && "Impossible!");
208 case Node::LabelIsPrefix:
209 s1 = s1.substr(nNode->label().length());
213 nNode = splitEdge(cNode, Id, r);
214 tNode = addNode(data, s1.substr(r));
215 nNode->addEdge(tNode);
218 tNode = addNode(data, s1);
219 cNode->addEdge(tNode);
226 template<class Payload>
227 const Payload& Trie<Payload>::lookup(const std::string& s) const {
228 Node* cNode = getRoot();
232 while (tNode == NULL) {
234 if (Node* nNode = cNode->getEdge(Id)) {
235 typename Node::QueryResult r = nNode->query(s1);
241 case Node::StringIsPrefix:
243 case Node::DontMatch:
244 assert(0 && "Impossible!");
246 case Node::LabelIsPrefix:
247 s1 = s1.substr(nNode->label().length());
257 return tNode->data();
260 template<class Payload>
261 struct GraphTraits<Trie<Payload> > {
262 typedef typename Trie<Payload>::Node NodeType;
263 typedef typename std::vector<NodeType*>::iterator ChildIteratorType;
265 static inline NodeType *getEntryNode(const Trie<Payload>& T) {
269 static inline ChildIteratorType child_begin(NodeType *N) {
270 return N->Children.begin();
272 static inline ChildIteratorType child_end(NodeType *N) {
273 return N->Children.end();
276 typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
278 static inline nodes_iterator nodes_begin(const Trie<Payload>& G) {
279 return G.Nodes.begin();
281 static inline nodes_iterator nodes_end(const Trie<Payload>& G) {
282 return G.Nodes.end();
287 template<class Payload>
288 struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
289 typedef typename Trie<Payload>::Node NodeType;
290 typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
292 static std::string getGraphName(const Trie<Payload>& T) {
296 static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
297 if (T.getRoot() == Node)
300 return Node->label();
303 static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
305 return N->label().substr(0, 1);
308 static std::string getNodeAttributes(const NodeType* Node,
309 const Trie<Payload>& T) {
310 if (Node->data() != T.Empty)
318 } // end of llvm namespace
320 #endif // LLVM_ADT_TRIE_H