1 //===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by Anton Korobeynikov and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This class defines a generic trie structure. The trie structure
11 // is immutable after creation, but the payload contained within it is not.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_ADT_TRIE_H
16 #define LLVM_ADT_TRIE_H
24 // - Labels are usually small, maybe it's better to use SmallString
25 // - Something efficient for child storage
26 // - Should we use char* during construction?
27 // - Should we templatize Empty with traits-like interface?
28 // - GraphTraits interface
30 template<class Payload>
45 std::map<char, Node*> Children;
47 inline explicit Node(const Payload& data, const std::string& label = ""):
48 Label(label), Data(data) { }
50 inline Node(const Node& n) {
52 Children = n.Children;
55 inline Node& operator=(const Node& n) {
58 Children = n.Children;
65 inline bool isLeaf() const { return Children.empty(); }
67 inline const Payload& getData() const { return Data; }
68 inline void setData(const Payload& data) { Data = data; }
70 inline void setLabel(const std::string& label) { Label = label; }
71 inline const std::string& getLabel() const { return Label; }
73 inline bool addEdge(Node* N) {
74 const std::string& Label = N->getLabel();
75 return Children.insert(std::make_pair(Label[0], N)).second;
78 QueryResult query(const std::string& s) const {
80 unsigned l1 = s.length();
81 unsigned l2 = Label.length();
83 // Find the length of common part
86 while ((i < l) && (s[i] == Label[i]))
89 if (i == l) { // One is prefix of another, find who is who
93 return StringIsPrefix;
96 } else // s and Label have common (possible empty) part, return its length
97 return (QueryResult)i;
101 std::vector<Node*> Nodes;
104 inline Node* addNode(const Payload& data, const std::string label = "") {
105 Node* N = new Node(data, label);
110 inline Node* splitEdge(Node* N, char Id, size_t index) {
111 assert(N->Children.count(Id) && "Node doesn't exist");
113 Node* eNode = N->Children[Id];
115 const std::string &l = eNode->Label;
116 assert(index > 0 && index < l.length() && "Trying to split too far!");
117 std::string l1 = l.substr(0, index);
118 std::string l2 = l.substr(index);
122 Node* nNode = addNode(Empty, l1);
123 nNode->addEdge(eNode);
125 N->Children[Id] = nNode;
131 inline explicit Trie(const Payload& empty):Empty(empty) {
135 for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
139 inline Node* getRoot() const { return Nodes[0]; }
141 bool addString(const std::string& s, const Payload& data) {
142 Node* cNode = getRoot();
146 while (tNode == NULL) {
148 if (cNode->Children.count(Id)) {
149 Node* nNode = cNode->Children[Id];
150 typename Node::QueryResult r = nNode->query(s1);
154 case Node::StringIsPrefix:
155 // Currently we don't allow to have two strings in the trie one
156 // being a prefix of another. This should be fixed.
157 assert(0 && "FIXME!");
159 case Node::DontMatch:
160 assert(0 && "Impossible!");
162 case Node::LabelIsPrefix:
163 s1 = s1.substr(nNode->getLabel().length());
167 nNode = splitEdge(cNode, Id, r);
168 tNode = addNode(data, s1.substr(r));
169 nNode->addEdge(tNode);
172 tNode = addNode(data, s1);
173 cNode->addEdge(tNode);
180 const Payload& lookup(const std::string& s) const {
181 Node* cNode = getRoot();
185 while (tNode == NULL) {
186 if (cNode->Children.count(s1[0])) {
187 Node* nNode = cNode->Children[s1[0]];
188 typename Node::QueryResult r = nNode->query(s1);
194 case Node::StringIsPrefix:
196 case Node::DontMatch:
197 assert(0 && "Impossible!");
199 case Node::LabelIsPrefix:
200 s1 = s1.substr(nNode->getLabel().length());
210 return tNode->getData();
217 #endif // LLVM_ADT_TRIE_H