1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
18 #include "llvm/ADT/GraphTraits.h"
19 #include "llvm/Support/DOTGraphTraits.h"
27 // - Labels are usually small, maybe it's better to use SmallString
28 // - Should we use char* during construction?
29 // - Should we templatize Empty with traits-like interface?
31 template<class Payload>
33 friend class GraphTraits<Trie<Payload> >;
34 friend class DOTGraphTraits<Trie<Payload> >;
40 typedef std::vector<Node*> NodeVectorType;
41 typedef typename NodeVectorType::iterator iterator;
42 typedef typename NodeVectorType::const_iterator const_iterator;
54 bool operator() (Node* N1, Node* N2) {
55 return (N1->Label[0] < N2->Label[0]);
57 bool operator() (Node* N, char Id) {
58 return (N->Label[0] < Id);
64 NodeVectorType Children;
68 Node& operator=(const Node&);
70 inline void addEdge(Node* N) {
72 Children.push_back(N);
74 iterator I = std::lower_bound(Children.begin(), Children.end(),
76 // FIXME: no dups are allowed
77 Children.insert(I, N);
81 inline void setEdge(Node* N) {
82 char Id = N->Label[0];
83 iterator I = std::lower_bound(Children.begin(), Children.end(),
85 assert(I != Children.end() && "Node does not exists!");
89 QueryResult query(const std::string& s) const {
91 unsigned l1 = s.length();
92 unsigned l2 = Label.length();
94 // Find the length of common part
97 while ((i < l) && (s[i] == Label[i]))
100 if (i == l) { // One is prefix of another, find who is who
104 return StringIsPrefix;
106 return LabelIsPrefix;
107 } else // s and Label have common (possible empty) part, return its length
108 return (QueryResult)i;
112 inline explicit Node(const Payload& data, const std::string& label = ""):
113 Label(label), Data(data) { }
115 inline const Payload& data() const { return Data; }
116 inline void setData(const Payload& data) { Data = data; }
118 inline const std::string& label() const { return Label; }
122 llvm::cerr << "Node: " << this << "\n"
123 << "Label: " << Label << "\n"
126 for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
127 llvm::cerr << (*I)->Label << "\n";
131 inline Node* getEdge(char Id) {
133 iterator I = std::lower_bound(Children.begin(), Children.end(),
135 if (I != Children.end() && (*I)->Label[0] == Id)
141 inline iterator begin() { return Children.begin(); }
142 inline const_iterator begin() const { return Children.begin(); }
143 inline iterator end () { return Children.end(); }
144 inline const_iterator end () const { return Children.end(); }
146 inline size_t size () const { return Children.size(); }
147 inline bool empty() const { return Children.empty(); }
148 inline const Node* &front() const { return Children.front(); }
149 inline Node* &front() { return Children.front(); }
150 inline const Node* &back() const { return Children.back(); }
151 inline Node* &back() { return Children.back(); }
156 std::vector<Node*> Nodes;
159 inline Node* addNode(const Payload& data, const std::string label = "") {
160 Node* N = new Node(data, label);
165 inline Node* splitEdge(Node* N, char Id, size_t index) {
166 Node* eNode = N->getEdge(Id);
167 assert(eNode && "Node doesn't exist");
169 const std::string &l = eNode->Label;
170 assert(index > 0 && index < l.length() && "Trying to split too far!");
171 std::string l1 = l.substr(0, index);
172 std::string l2 = l.substr(index);
174 Node* nNode = addNode(Empty, l1);
178 nNode->addEdge(eNode);
185 Trie& operator=(const Trie&);
188 inline explicit Trie(const Payload& empty):Empty(empty) {
192 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
196 inline Node* getRoot() const { return Nodes[0]; }
198 bool addString(const std::string& s, const Payload& data);
199 const Payload& lookup(const std::string& s) const;
203 // Define this out-of-line to dissuade the C++ compiler from inlining it.
204 template<class Payload>
205 bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
206 Node* cNode = getRoot();
210 while (tNode == NULL) {
212 if (Node* nNode = cNode->getEdge(Id)) {
213 typename Node::QueryResult r = nNode->query(s1);
217 case Node::StringIsPrefix:
218 // Currently we don't allow to have two strings in the trie one
219 // being a prefix of another. This should be fixed.
220 assert(0 && "FIXME!");
222 case Node::DontMatch:
223 llvm_unreachable("Impossible!");
224 case Node::LabelIsPrefix:
225 s1 = s1.substr(nNode->label().length());
229 nNode = splitEdge(cNode, Id, r);
230 tNode = addNode(data, s1.substr(r));
231 nNode->addEdge(tNode);
234 tNode = addNode(data, s1);
235 cNode->addEdge(tNode);
242 template<class Payload>
243 const Payload& Trie<Payload>::lookup(const std::string& s) const {
244 Node* cNode = getRoot();
248 while (tNode == NULL) {
250 if (Node* nNode = cNode->getEdge(Id)) {
251 typename Node::QueryResult r = nNode->query(s1);
257 case Node::StringIsPrefix:
259 case Node::DontMatch:
260 llvm_unreachable("Impossible!");
261 case Node::LabelIsPrefix:
262 s1 = s1.substr(nNode->label().length());
272 return tNode->data();
275 template<class Payload>
276 struct GraphTraits<Trie<Payload> > {
277 typedef Trie<Payload> TrieType;
278 typedef typename TrieType::Node NodeType;
279 typedef typename NodeType::iterator ChildIteratorType;
281 static inline NodeType *getEntryNode(const TrieType& T) {
285 static inline ChildIteratorType child_begin(NodeType *N) {
288 static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
290 typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
292 static inline nodes_iterator nodes_begin(const TrieType& G) {
293 return G.Nodes.begin();
295 static inline nodes_iterator nodes_end(const TrieType& G) {
296 return G.Nodes.end();
301 template<class Payload>
302 struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
303 typedef typename Trie<Payload>::Node NodeType;
304 typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
306 static std::string getGraphName(const Trie<Payload>& T) {
310 static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
311 if (T.getRoot() == Node)
314 return Node->label();
317 static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
319 return N->label().substr(0, 1);
322 static std::string getNodeAttributes(const NodeType* Node,
323 const Trie<Payload>& T) {
324 if (Node->data() != T.Empty)
332 } // end of llvm namespace
334 #endif // LLVM_ADT_TRIE_H