X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FYAMLParser.cpp;h=b8b50dde5d68d3abb5c66029f7944bbcbf00ed8e;hb=d3198ce4bcc3ec64068d6508f2f08be97e6be83e;hp=29d938b3874bfcb2613db91ba1a448708b7bbce8;hpb=24b709529f30bf8cb4769eefbfedb4179349fd28;p=oota-llvm.git diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 29d938b3874..b8b50dde5d6 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -12,27 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" - -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace yaml; enum UnicodeEncodingForm { - UEF_UTF32_LE, //< UTF-32 Little Endian - UEF_UTF32_BE, //< UTF-32 Big Endian - UEF_UTF16_LE, //< UTF-16 Little Endian - UEF_UTF16_BE, //< UTF-16 Big Endian - UEF_UTF8, //< UTF-8 or ascii. - UEF_Unknown //< Not a valid Unicode encoding. + UEF_UTF32_LE, ///< UTF-32 Little Endian + UEF_UTF32_BE, ///< UTF-32 Big Endian + UEF_UTF16_LE, ///< UTF-16 Little Endian + UEF_UTF16_BE, ///< UTF-16 Big Endian + UEF_UTF8, ///< UTF-8 or ascii. + UEF_Unknown ///< Not a valid Unicode encoding. }; /// EncodingInfo - Holds the encoding type and length of the byte order mark if @@ -97,6 +96,15 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { namespace llvm { namespace yaml { +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + /// Token - A single YAML token. struct Token : ilist_node { enum TokenKind { @@ -251,7 +259,8 @@ namespace yaml { /// @brief Scans YAML tokens from a MemoryBuffer. class Scanner { public: - Scanner(const StringRef Input, SourceMgr &SM); + Scanner(StringRef Input, SourceMgr &SM); + Scanner(MemoryBufferRef Buffer, SourceMgr &SM_); /// @brief Parse the next token and return it without popping it. Token &peekNext(); @@ -260,7 +269,7 @@ public: Token getNext(); void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, - ArrayRef Ranges = ArrayRef()) { + ArrayRef Ranges = None) { SM.PrintMessage(Loc, Kind, Message, Ranges); } @@ -285,6 +294,8 @@ public: } private: + void init(MemoryBufferRef Buffer); + StringRef currentInput() { return StringRef(Current, End - Current); } @@ -369,9 +380,6 @@ private: /// sequence of ns-uri-char. StringRef scan_ns_uri_char(); - /// @brief Scan ns-plain-one-line[133] starting at \a Cur. - StringRef scan_ns_plain_one_line(); - /// @brief Consume a minimal well-formed code unit subsequence starting at /// \a Cur. Return false if it is not the same Unicode scalar value as /// \a Expected. This updates \a Column. @@ -463,7 +471,7 @@ private: SourceMgr &SM; /// @brief The original input. - MemoryBuffer *InputBuffer; + MemoryBufferRef InputBuffer; /// @brief The current position of the scanner. StringRef::iterator Current; @@ -489,9 +497,6 @@ private: /// @brief Can the next token be the start of a simple key? bool IsSimpleKeyAllowed; - /// @brief Is the next token required to start a simple key? - bool IsSimpleKeyRequired; - /// @brief True if an error has occurred. bool Failed; @@ -696,20 +701,28 @@ std::string yaml::escape(StringRef Input) { return EscapedInput; } -Scanner::Scanner(StringRef Input, SourceMgr &sm) - : SM(sm) - , Indent(-1) - , Column(0) - , Line(0) - , FlowLevel(0) - , IsStartOfStream(true) - , IsSimpleKeyAllowed(true) - , IsSimpleKeyRequired(false) - , Failed(false) { - InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); - SM.AddNewSourceBuffer(InputBuffer, SMLoc()); - Current = InputBuffer->getBufferStart(); - End = InputBuffer->getBufferEnd(); +Scanner::Scanner(StringRef Input, SourceMgr &sm) : SM(sm) { + init(MemoryBufferRef(Input, "YAML")); +} + +Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_) : SM(SM_) { + init(Buffer); +} + +void Scanner::init(MemoryBufferRef Buffer) { + InputBuffer = Buffer; + Current = InputBuffer.getBufferStart(); + End = InputBuffer.getBufferEnd(); + Indent = -1; + Column = 0; + Line = 0; + FlowLevel = 0; + IsStartOfStream = true; + IsSimpleKeyAllowed = true; + Failed = false; + std::unique_ptr InputBufferOwner = + MemoryBuffer::getMemBuffer(Buffer); + SM.AddNewSourceBuffer(std::move(InputBufferOwner), SMLoc()); } Token &Scanner::peekNext() { @@ -853,42 +866,6 @@ StringRef Scanner::scan_ns_uri_char() { return StringRef(Start, Current - Start); } -StringRef Scanner::scan_ns_plain_one_line() { - StringRef::iterator start = Current; - // The first character must already be verified. - ++Current; - while (true) { - if (Current == End) { - break; - } else if (*Current == ':') { - // Check if the next character is a ns-char. - if (Current + 1 == End) - break; - StringRef::iterator i = skip_ns_char(Current + 1); - if (Current + 1 != i) { - Current = i; - Column += 2; // Consume both the ':' and ns-char. - } else - break; - } else if (*Current == '#') { - // Check if the previous character was a ns-char. - // The & 0x80 check is to check for the trailing byte of a utf-8 - if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { - ++Current; - ++Column; - } else - break; - } else { - StringRef::iterator i = skip_nb_char(Current); - if (i == Current) - break; - Current = i; - ++Column; - } - } - return StringRef(start, Current - start); -} - bool Scanner::consume(uint32_t Expected) { if (Expected >= 0x80) report_fatal_error("Not dealing with this yet"); @@ -907,6 +884,7 @@ bool Scanner::consume(uint32_t Expected) { void Scanner::skip(uint32_t Distance) { Current += Distance; Column += Distance; + assert(Current <= End && "Skipped past the end"); } bool Scanner::isBlankOrBreak(StringRef::iterator Position) { @@ -1058,14 +1036,22 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); - Token T; T.Kind = Token::TK_VersionDirective; T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; } return false; } @@ -1243,6 +1229,12 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { } } } + + if (Current == End) { + setError("Expected quote at end of scalar", Current); + return false; + } + skip(1); // Skip ending quote. Token T; T.Kind = Token::TK_Scalar; @@ -1526,8 +1518,10 @@ bool Scanner::fetchMoreTokens() { } Stream::Stream(StringRef Input, SourceMgr &SM) - : scanner(new Scanner(Input, SM)) - , CurrentDoc(0) {} + : scanner(new Scanner(Input, SM)), CurrentDoc() {} + +Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM) + : scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {} Stream::~Stream() {} @@ -1542,10 +1536,6 @@ void Stream::printError(Node *N, const Twine &Msg) { , Ranges); } -void Stream::handleYAMLDirective(const Token &t) { - // TODO: Ensure version is 1.x. -} - document_iterator Stream::begin() { if (CurrentDoc) report_fatal_error("Can only iterate over the stream once"); @@ -1566,14 +1556,57 @@ void Stream::skip() { i->skip(); } -Node::Node(unsigned int Type, OwningPtr &D, StringRef A) - : Doc(D) - , TypeID(Type) - , Anchor(A) { +Node::Node(unsigned int Type, std::unique_ptr &D, StringRef A, + StringRef T) + : Doc(D), TypeID(Type), Anchor(A), Tag(T) { SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); SourceRange = SMRange(Start, Start); } +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return Ret; + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return Ret; + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return Ret; + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + Token &Node::peekNext() { return Doc->peekNext(); } @@ -1839,14 +1872,14 @@ Node *KeyValueNode::getValue() { void MappingNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) { CurrentEntry->skip(); if (Type == MT_Inline) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } } @@ -1859,13 +1892,13 @@ void MappingNode::increment() { case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError("Unexpected token. Expected Key or Block End", T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else { switch (T.Kind) { @@ -1878,14 +1911,14 @@ void MappingNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Key, Flow Entry, or Flow " "Mapping End." , T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } } @@ -1893,7 +1926,7 @@ void MappingNode::increment() { void SequenceNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) @@ -1904,37 +1937,37 @@ void SequenceNode::increment() { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Block Entry or Block End." , T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Indentless) { switch (T.Kind) { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; default: case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Flow) { switch (T.Kind) { @@ -1948,7 +1981,7 @@ void SequenceNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; case Token::TK_StreamEnd: case Token::TK_DocumentEnd: @@ -1956,13 +1989,13 @@ void SequenceNode::increment() { setError("Could not find closing ]!", T); // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: if (!WasPreviousTokenFlowEntry) { setError("Expected , between entries!", T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; } // Otherwise it must be a flow entry. @@ -1976,7 +2009,11 @@ void SequenceNode::increment() { } } -Document::Document(Stream &S) : stream(S), Root(0) { +Document::Document(Stream &S) : stream(S), Root(nullptr) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + if (parseDirectives()) expectToken(Token::TK_DocumentStart); Token &T = peekNext(); @@ -2020,6 +2057,7 @@ Node *Document::parseBlockNode() { Token T = peekNext(); // Handle properties. Token AnchorInfo; + Token TagInfo; parse_property: switch (T.Kind) { case Token::TK_Alias: @@ -2028,13 +2066,17 @@ parse_property: case Token::TK_Anchor: if (AnchorInfo.Kind == Token::TK_Anchor) { setError("Already encountered an anchor for this node!", T); - return 0; + return nullptr; } AnchorInfo = getNext(); // Consume TK_Anchor. T = peekNext(); goto parse_property; case Token::TK_Tag: - getNext(); // Skip TK_Tag. + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return nullptr; + } + TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); goto parse_property; default: @@ -2048,42 +2090,49 @@ parse_property: // Don't eat the TK_BlockEntry, SequenceNode needs it. return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Indentless); case Token::TK_BlockSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Block); case Token::TK_BlockMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Block); case Token::TK_FlowSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Flow); case Token::TK_FlowMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Flow); case Token::TK_Scalar: getNext(); return new (NodeAllocator) ScalarNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , T.Range); case Token::TK_Key: // Don't eat the TK_Key, KeyValueNode expects it. return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Inline); case Token::TK_DocumentStart: case Token::TK_DocumentEnd: @@ -2093,10 +2142,10 @@ parse_property: // !!null null. return new (NodeAllocator) NullNode(stream.CurrentDoc); case Token::TK_Error: - return 0; + return nullptr; } llvm_unreachable("Control flow shouldn't reach here."); - return 0; + return nullptr; } bool Document::parseDirectives() { @@ -2104,10 +2153,10 @@ bool Document::parseDirectives() { while (true) { Token T = peekNext(); if (T.Kind == Token::TK_TagDirective) { - handleTagDirective(getNext()); + parseTAGDirective(); isDirective = true; } else if (T.Kind == Token::TK_VersionDirective) { - stream.handleYAMLDirective(getNext()); + parseYAMLDirective(); isDirective = true; } else break; @@ -2115,6 +2164,21 @@ bool Document::parseDirectives() { return isDirective; } +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + bool Document::expectToken(int TK) { Token T = getNext(); if (T.Kind != TK) {