//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_YAML_PARSER_H
-#define LLVM_SUPPORT_YAML_PARSER_H
+#ifndef LLVM_SUPPORT_YAMLPARSER_H
+#define LLVM_SUPPORT_YAMLPARSER_H
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/SMLoc.h"
-
#include <limits>
+#include <map>
#include <utility>
namespace llvm {
-class MemoryBuffer;
+class MemoryBufferRef;
class SourceMgr;
-class raw_ostream;
class Twine;
+class raw_ostream;
namespace yaml {
class Scanner;
struct Token;
-/// @brief Dump all the tokens in this stream to OS.
-/// @returns true if there was an error, false otherwise.
+/// \brief Dump all the tokens in this stream to OS.
+/// \returns true if there was an error, false otherwise.
bool dumpTokens(StringRef Input, raw_ostream &);
-/// @brief Scans all tokens in input without outputting anything. This is used
+/// \brief Scans all tokens in input without outputting anything. This is used
/// for benchmarking the tokenizer.
-/// @returns true if there was an error, false otherwise.
+/// \returns true if there was an error, false otherwise.
bool scanTokens(StringRef Input);
-/// @brief Escape \a Input for a double quoted scalar.
+/// \brief Escape \a Input for a double quoted scalar.
std::string escape(StringRef Input);
-/// @brief This class represents a YAML stream potentially containing multiple
+/// \brief This class represents a YAML stream potentially containing multiple
/// documents.
class Stream {
public:
- Stream(StringRef Input, SourceMgr &);
+ /// \brief This keeps a reference to the string referenced by \p Input.
+ Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
+
+ Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
+ ~Stream();
document_iterator begin();
document_iterator end();
void printError(Node *N, const Twine &Msg);
private:
- OwningPtr<Scanner> scanner;
- OwningPtr<Document> CurrentDoc;
+ std::unique_ptr<Scanner> scanner;
+ std::unique_ptr<Document> CurrentDoc;
friend class Document;
-
- /// @brief Validate a %YAML x.x directive.
- void handleYAMLDirective(const Token &);
};
-/// @brief Abstract base class for all Nodes.
+/// \brief Abstract base class for all Nodes.
class Node {
+ virtual void anchor();
+
public:
enum NodeKind {
NK_Null,
NK_Scalar,
+ NK_BlockScalar,
NK_KeyValue,
NK_Mapping,
NK_Sequence,
NK_Alias
};
- Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
- virtual ~Node();
+ Node(unsigned int Type, std::unique_ptr<Document> &, StringRef Anchor,
+ StringRef Tag);
- /// @brief Get the value of the anchor attached to this node. If it does not
+ /// \brief Get the value of the anchor attached to this node. If it does not
/// have one, getAnchor().size() will be 0.
StringRef getAnchor() const { return Anchor; }
+ /// \brief Get the tag as it was written in the document. This does not
+ /// perform tag resolution.
+ StringRef getRawTag() const { return Tag; }
+
+ /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
+ /// and substitution.
+ std::string getVerbatimTag() const;
+
SMRange getSourceRange() const { return SourceRange; }
void setSourceRange(SMRange SR) { SourceRange = SR; }
void setError(const Twine &Message, Token &Location) const;
bool failed() const;
- virtual void skip() {};
+ virtual void skip() {}
unsigned int getType() const { return TypeID; }
- static inline bool classof(const Node *) { return true; }
- void *operator new ( size_t Size
- , BumpPtrAllocator &Alloc
- , size_t Alignment = 16) throw() {
+ void *operator new(size_t Size, BumpPtrAllocator &Alloc,
+ size_t Alignment = 16) LLVM_NOEXCEPT {
return Alloc.Allocate(Size, Alignment);
}
- void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
- Alloc.Deallocate(Ptr);
+ void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
+ size_t Size) LLVM_NOEXCEPT {
+ Alloc.Deallocate(Ptr, Size);
}
protected:
- OwningPtr<Document> &Doc;
+ std::unique_ptr<Document> &Doc;
SMRange SourceRange;
+ void operator delete(void *) LLVM_NOEXCEPT = delete;
+
+ ~Node() = default;
+
private:
unsigned int TypeID;
StringRef Anchor;
+ /// \brief The tag as typed in the document.
+ StringRef Tag;
};
-/// @brief A null value.
+/// \brief A null value.
///
/// Example:
/// !!null null
-class NullNode : public Node {
+class NullNode final : public Node {
+ void anchor() override;
+
public:
- NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
+ NullNode(std::unique_ptr<Document> &D)
+ : Node(NK_Null, D, StringRef(), StringRef()) {}
- static inline bool classof(const NullNode *) { return true; }
- static inline bool classof(const Node *N) {
- return N->getType() == NK_Null;
- }
+ static inline bool classof(const Node *N) { return N->getType() == NK_Null; }
};
-/// @brief A scalar node is an opaque datum that can be presented as a
+/// \brief A scalar node is an opaque datum that can be presented as a
/// series of zero or more Unicode scalar values.
///
/// Example:
/// Adena
-class ScalarNode : public Node {
+class ScalarNode final : public Node {
+ void anchor() override;
+
public:
- ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
- : Node(NK_Scalar, D, Anchor)
- , Value(Val) {
+ ScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
+ StringRef Val)
+ : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
SMLoc Start = SMLoc::getFromPointer(Val.begin());
- SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
+ SMLoc End = SMLoc::getFromPointer(Val.end());
SourceRange = SMRange(Start, End);
}
// utf8).
StringRef getRawValue() const { return Value; }
- /// @brief Gets the value of this node as a StringRef.
+ /// \brief Gets the value of this node as a StringRef.
///
- /// @param Storage is used to store the content of the returned StringRef iff
+ /// \param Storage is used to store the content of the returned StringRef iff
/// it requires any modification from how it appeared in the source.
/// This happens with escaped characters and multi-line literals.
StringRef getValue(SmallVectorImpl<char> &Storage) const;
- static inline bool classof(const ScalarNode *) { return true; }
static inline bool classof(const Node *N) {
return N->getType() == NK_Scalar;
}
private:
StringRef Value;
- StringRef unescapeDoubleQuoted( StringRef UnquotedValue
- , StringRef::size_type Start
- , SmallVectorImpl<char> &Storage) const;
+ StringRef unescapeDoubleQuoted(StringRef UnquotedValue,
+ StringRef::size_type Start,
+ SmallVectorImpl<char> &Storage) const;
};
-static bool getAs(const ScalarNode *SN, bool &Result) {
- SmallString<4> Storage;
- StringRef Value = SN->getValue(Storage);
- if (Value == "true")
- Result = true;
- else if (Value == "false")
- Result = false;
- else
- return false;
- return true;
-}
+/// \brief A block scalar node is an opaque datum that can be presented as a
+/// series of zero or more Unicode scalar values.
+///
+/// Example:
+/// |
+/// Hello
+/// World
+class BlockScalarNode final : public Node {
+ void anchor() override;
-template<class T>
-typename enable_if_c<std::numeric_limits<T>::is_integer, bool>::type
-getAs(const ScalarNode *SN, T &Result) {
- SmallString<4> Storage;
- return !SN->getValue(Storage).getAsInteger(0, Result);
-}
+public:
+ BlockScalarNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
+ StringRef Value, StringRef RawVal)
+ : Node(NK_BlockScalar, D, Anchor, Tag), Value(Value) {
+ SMLoc Start = SMLoc::getFromPointer(RawVal.begin());
+ SMLoc End = SMLoc::getFromPointer(RawVal.end());
+ SourceRange = SMRange(Start, End);
+ }
+
+ /// \brief Gets the value of this node as a StringRef.
+ StringRef getValue() const { return Value; }
+
+ static inline bool classof(const Node *N) {
+ return N->getType() == NK_BlockScalar;
+ }
+
+private:
+ StringRef Value;
+};
-/// @brief A key and value pair. While not technically a Node under the YAML
+/// \brief A key and value pair. While not technically a Node under the YAML
/// representation graph, it is easier to treat them this way.
///
/// TODO: Consider making this not a child of Node.
///
/// Example:
/// Section: .text
-class KeyValueNode : public Node {
+class KeyValueNode final : public Node {
+ void anchor() override;
+
public:
- KeyValueNode(OwningPtr<Document> &D)
- : Node(NK_KeyValue, D, StringRef())
- , Key(0)
- , Value(0)
- {}
+ KeyValueNode(std::unique_ptr<Document> &D)
+ : Node(NK_KeyValue, D, StringRef(), StringRef()), Key(nullptr),
+ Value(nullptr) {}
- /// @brief Parse and return the key.
+ /// \brief Parse and return the key.
///
/// This may be called multiple times.
///
- /// @returns The key, or nullptr if failed() == true.
+ /// \returns The key, or nullptr if failed() == true.
Node *getKey();
- /// @brief Parse and return the value.
+ /// \brief Parse and return the value.
///
/// This may be called multiple times.
///
- /// @returns The value, or nullptr if failed() == true.
+ /// \returns The value, or nullptr if failed() == true.
Node *getValue();
- virtual void skip() {
+ void skip() override {
getKey()->skip();
- getValue()->skip();
+ if (Node *Val = getValue())
+ Val->skip();
}
- static inline bool classof(const KeyValueNode *) { return true; }
static inline bool classof(const Node *N) {
return N->getType() == NK_KeyValue;
}
Node *Value;
};
-/// @brief This is an iterator abstraction over YAML collections shared by both
+/// \brief This is an iterator abstraction over YAML collections shared by both
/// sequences and maps.
///
/// BaseT must have a ValueT* member named CurrentEntry and a member function
/// increment() which must set CurrentEntry to 0 to create an end iterator.
template <class BaseT, class ValueT>
class basic_collection_iterator
- : public std::iterator<std::forward_iterator_tag, ValueT> {
+ : public std::iterator<std::input_iterator_tag, ValueT> {
public:
- basic_collection_iterator() : Base(0) {}
+ basic_collection_iterator() : Base(nullptr) {}
basic_collection_iterator(BaseT *B) : Base(B) {}
- ValueT *operator ->() const {
+ ValueT *operator->() const {
assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
return Base->CurrentEntry;
}
- ValueT &operator *() const {
+ ValueT &operator*() const {
assert(Base && Base->CurrentEntry &&
"Attempted to dereference end iterator!");
return *Base->CurrentEntry;
}
- operator ValueT*() const {
+ operator ValueT *() const {
assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
return Base->CurrentEntry;
}
- bool operator !=(const basic_collection_iterator &Other) const {
- if(Base != Other.Base)
- return true;
- return (Base && Other.Base) && Base->CurrentEntry
- != Other.Base->CurrentEntry;
+ /// Note on EqualityComparable:
+ ///
+ /// The iterator is not re-entrant,
+ /// it is meant to be used for parsing YAML on-demand
+ /// Once iteration started - it can point only to one entry at a time
+ /// hence Base.CurrentEntry and Other.Base.CurrentEntry are equal
+ /// iff Base and Other.Base are equal.
+ bool operator==(const basic_collection_iterator &Other) const {
+ if (Base && (Base == Other.Base)) {
+ assert((Base->CurrentEntry == Other.Base->CurrentEntry)
+ && "Equal Bases expected to point to equal Entries");
+ }
+
+ return Base == Other.Base;
+ }
+
+ bool operator!=(const basic_collection_iterator &Other) const {
+ return !(Base == Other.Base);
}
basic_collection_iterator &operator++() {
assert(Base && "Attempted to advance iterator past end!");
Base->increment();
// Create an end iterator.
- if (Base->CurrentEntry == 0)
- Base = 0;
+ if (!Base->CurrentEntry)
+ Base = nullptr;
return *this;
}
return ret;
}
-template <class CollectionType>
-void skip(CollectionType &C) {
+template <class CollectionType> void skip(CollectionType &C) {
// TODO: support skipping from the middle of a parsed collection ;/
assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
if (C.IsAtBeginning)
- for (typename CollectionType::iterator i = begin(C), e = C.end();
- i != e; ++i)
+ for (typename CollectionType::iterator i = begin(C), e = C.end(); i != e;
+ ++i)
i->skip();
}
-/// @brief Represents a YAML map created from either a block map for a flow map.
+/// \brief Represents a YAML map created from either a block map for a flow map.
///
/// This parses the YAML stream as increment() is called.
///
/// Example:
/// Name: _main
/// Scope: Global
-class MappingNode : public Node {
+class MappingNode final : public Node {
+ void anchor() override;
+
public:
enum MappingType {
MT_Block,
MT_Flow,
- MT_Inline //< An inline mapping node is used for "[key: value]".
+ MT_Inline ///< An inline mapping node is used for "[key: value]".
};
- MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
- : Node(NK_Mapping, D, Anchor)
- , Type(MT)
- , IsAtBeginning(true)
- , IsAtEnd(false)
- , CurrentEntry(0)
- {}
+ MappingNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
+ MappingType MT)
+ : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
+ IsAtEnd(false), CurrentEntry(nullptr) {}
friend class basic_collection_iterator<MappingNode, KeyValueNode>;
typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
template <class T> friend typename T::iterator yaml::begin(T &);
template <class T> friend void yaml::skip(T &);
- iterator begin() {
- return yaml::begin(*this);
- }
+ iterator begin() { return yaml::begin(*this); }
iterator end() { return iterator(); }
- virtual void skip() {
- yaml::skip(*this);
- }
+ void skip() override { yaml::skip(*this); }
- static inline bool classof(const MappingNode *) { return true; }
static inline bool classof(const Node *N) {
return N->getType() == NK_Mapping;
}
void increment();
};
-/// @brief Represents a YAML sequence created from either a block sequence for a
+/// \brief Represents a YAML sequence created from either a block sequence for a
/// flow sequence.
///
/// This parses the YAML stream as increment() is called.
/// Example:
/// - Hello
/// - World
-class SequenceNode : public Node {
+class SequenceNode final : public Node {
+ void anchor() override;
+
public:
enum SequenceType {
ST_Block,
ST_Indentless
};
- SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
- : Node(NK_Sequence, D, Anchor)
- , SeqType(ST)
- , IsAtBeginning(true)
- , IsAtEnd(false)
- , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
- , CurrentEntry(0)
- {}
+ SequenceNode(std::unique_ptr<Document> &D, StringRef Anchor, StringRef Tag,
+ SequenceType ST)
+ : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
+ IsAtEnd(false),
+ WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
+ CurrentEntry(nullptr) {}
friend class basic_collection_iterator<SequenceNode, Node>;
typedef basic_collection_iterator<SequenceNode, Node> iterator;
void increment();
- iterator begin() {
- return yaml::begin(*this);
- }
+ iterator begin() { return yaml::begin(*this); }
iterator end() { return iterator(); }
- virtual void skip() {
- yaml::skip(*this);
- }
+ void skip() override { yaml::skip(*this); }
- static inline bool classof(const SequenceNode *) { return true; }
static inline bool classof(const Node *N) {
return N->getType() == NK_Sequence;
}
Node *CurrentEntry;
};
-/// @brief Represents an alias to a Node with an anchor.
+/// \brief Represents an alias to a Node with an anchor.
///
/// Example:
/// *AnchorName
-class AliasNode : public Node {
+class AliasNode final : public Node {
+ void anchor() override;
+
public:
- AliasNode(OwningPtr<Document> &D, StringRef Val)
- : Node(NK_Alias, D, StringRef()), Name(Val) {}
+ AliasNode(std::unique_ptr<Document> &D, StringRef Val)
+ : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
StringRef getName() const { return Name; }
Node *getTarget();
- static inline bool classof(const ScalarNode *) { return true; }
- static inline bool classof(const Node *N) {
- return N->getType() == NK_Alias;
- }
+ static inline bool classof(const Node *N) { return N->getType() == NK_Alias; }
private:
StringRef Name;
};
-/// @brief A YAML Stream is a sequence of Documents. A document contains a root
+/// \brief A YAML Stream is a sequence of Documents. A document contains a root
/// node.
class Document {
public:
- /// @brief Root for parsing a node. Returns a single node.
+ /// \brief Root for parsing a node. Returns a single node.
Node *parseBlockNode();
Document(Stream &ParentStream);
- /// @brief Finish parsing the current document and return true if there are
+ /// \brief Finish parsing the current document and return true if there are
/// more. Return false otherwise.
bool skip();
- /// @brief Parse and return the root level node.
+ /// \brief Parse and return the root level node.
Node *getRoot() {
if (Root)
return Root;
return Root = parseBlockNode();
}
+ const std::map<StringRef, StringRef> &getTagMap() const { return TagMap; }
+
private:
friend class Node;
friend class document_iterator;
- /// @brief Stream to read tokens from.
+ /// \brief Stream to read tokens from.
Stream &stream;
- /// @brief Used to allocate nodes to. All are destroyed without calling their
+ /// \brief Used to allocate nodes to. All are destroyed without calling their
/// destructor when the document is destroyed.
BumpPtrAllocator NodeAllocator;
- /// @brief The root node. Used to support skipping a partially parsed
+ /// \brief The root node. Used to support skipping a partially parsed
/// document.
Node *Root;
+ /// \brief Maps tag prefixes to their expansion.
+ std::map<StringRef, StringRef> TagMap;
+
Token &peekNext();
Token getNext();
void setError(const Twine &Message, Token &Location) const;
bool failed() const;
- void handleTagDirective(const Token &Tag) {
- // TODO: Track tags.
- }
-
- /// @brief Parse %BLAH directives and return true if any were encountered.
+ /// \brief Parse %BLAH directives and return true if any were encountered.
bool parseDirectives();
- /// @brief Consume the next token and error if it is not \a TK.
+ /// \brief Parse %YAML
+ void parseYAMLDirective();
+
+ /// \brief Parse %TAG
+ void parseTAGDirective();
+
+ /// \brief Consume the next token and error if it is not \a TK.
bool expectToken(int TK);
};
-/// @brief Iterator abstraction for Documents over a Stream.
+/// \brief Iterator abstraction for Documents over a Stream.
class document_iterator {
public:
- document_iterator() : Doc(NullDoc) {}
- document_iterator(OwningPtr<Document> &D) : Doc(D) {}
+ document_iterator() : Doc(nullptr) {}
+ document_iterator(std::unique_ptr<Document> &D) : Doc(&D) {}
- bool operator !=(const document_iterator &Other) {
- return Doc != Other.Doc;
+ bool operator==(const document_iterator &Other) {
+ if (isAtEnd() || Other.isAtEnd())
+ return isAtEnd() && Other.isAtEnd();
+
+ return Doc == Other.Doc;
}
+ bool operator!=(const document_iterator &Other) { return !(*this == Other); }
- document_iterator operator ++() {
- if (!Doc->skip()) {
- Doc.reset(0);
+ document_iterator operator++() {
+ assert(Doc && "incrementing iterator past the end.");
+ if (!(*Doc)->skip()) {
+ Doc->reset(nullptr);
} else {
- Stream &S = Doc->stream;
- Doc.reset(new Document(S));
+ Stream &S = (*Doc)->stream;
+ Doc->reset(new Document(S));
}
return *this;
}
- Document &operator *() {
- return *Doc;
- }
+ Document &operator*() { return *Doc->get(); }
- OwningPtr<Document> &operator ->() {
- return Doc;
- }
+ std::unique_ptr<Document> &operator->() { return *Doc; }
private:
- static OwningPtr<Document> NullDoc;
- OwningPtr<Document> &Doc;
+ bool isAtEnd() const { return !Doc || !*Doc; }
+
+ std::unique_ptr<Document> *Doc;
};
-}
-}
+} // End namespace yaml.
+
+} // End namespace llvm.
#endif