X-Git-Url: http://demsky.eecs.uci.edu/git/?a=blobdiff_plain;f=lib%2FSupport%2FYAMLParser.cpp;h=3be02ee9fb98639b4ddf24e626b408d2fd7220c0;hb=c00ae934322b82493ef3fe429c3373028e7fe97d;hp=330519f3019d31897ec8e12ed971a199c8e3a827;hpb=a95b4ebf3c17f0069b8cea3841d66395eaf6d4e1;p=oota-llvm.git diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 330519f3019..3be02ee9fb9 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -12,27 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/YAMLParser.h" - -#include "llvm/ADT/ilist.h" -#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace yaml; enum UnicodeEncodingForm { - UEF_UTF32_LE, //< UTF-32 Little Endian - UEF_UTF32_BE, //< UTF-32 Big Endian - UEF_UTF16_LE, //< UTF-16 Little Endian - UEF_UTF16_BE, //< UTF-16 Big Endian - UEF_UTF8, //< UTF-8 or ascii. - UEF_Unknown //< Not a valid Unicode encoding. + UEF_UTF32_LE, ///< UTF-32 Little Endian + UEF_UTF32_BE, ///< UTF-32 Big Endian + UEF_UTF16_LE, ///< UTF-16 Little Endian + UEF_UTF16_BE, ///< UTF-16 Big Endian + UEF_UTF8, ///< UTF-8 or ascii. + UEF_Unknown ///< Not a valid Unicode encoding. }; /// EncodingInfo - Holds the encoding type and length of the byte order mark if @@ -97,6 +96,15 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { namespace llvm { namespace yaml { +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + /// Token - A single YAML token. struct Token : ilist_node { enum TokenKind { @@ -252,6 +260,7 @@ namespace yaml { class Scanner { public: Scanner(const StringRef Input, SourceMgr &SM); + Scanner(MemoryBuffer *Buffer, SourceMgr &SM_); /// @brief Parse the next token and return it without popping it. Token &peekNext(); @@ -260,7 +269,7 @@ public: Token getNext(); void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, - ArrayRef Ranges = ArrayRef()) { + ArrayRef Ranges = None) { SM.PrintMessage(Loc, Kind, Message, Ranges); } @@ -369,9 +378,6 @@ private: /// sequence of ns-uri-char. StringRef scan_ns_uri_char(); - /// @brief Scan ns-plain-one-line[133] starting at \a Cur. - StringRef scan_ns_plain_one_line(); - /// @brief Consume a minimal well-formed code unit subsequence starting at /// \a Cur. Return false if it is not the same Unicode scalar value as /// \a Expected. This updates \a Column. @@ -489,9 +495,6 @@ private: /// @brief Can the next token be the start of a simple key? bool IsSimpleKeyAllowed; - /// @brief Is the next token required to start a simple key? - bool IsSimpleKeyRequired; - /// @brief True if an error has occurred. bool Failed; @@ -658,7 +661,7 @@ std::string yaml::escape(StringRef Input) { EscapedInput += "\\r"; else if (*i == 0x1B) EscapedInput += "\\e"; - else if (*i >= 0 && *i < 0x20) { // Control characters not handled above. + else if ((unsigned char)*i < 0x20) { // Control characters not handled above. std::string HexStr = utohexstr(*i); EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr; } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence. @@ -704,7 +707,6 @@ Scanner::Scanner(StringRef Input, SourceMgr &sm) , FlowLevel(0) , IsStartOfStream(true) , IsSimpleKeyAllowed(true) - , IsSimpleKeyRequired(false) , Failed(false) { InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML"); SM.AddNewSourceBuffer(InputBuffer, SMLoc()); @@ -712,6 +714,21 @@ Scanner::Scanner(StringRef Input, SourceMgr &sm) End = InputBuffer->getBufferEnd(); } +Scanner::Scanner(MemoryBuffer *Buffer, SourceMgr &SM_) + : SM(SM_) + , InputBuffer(Buffer) + , Current(InputBuffer->getBufferStart()) + , End(InputBuffer->getBufferEnd()) + , Indent(-1) + , Column(0) + , Line(0) + , FlowLevel(0) + , IsStartOfStream(true) + , IsSimpleKeyAllowed(true) + , Failed(false) { + SM.AddNewSourceBuffer(InputBuffer, SMLoc()); +} + Token &Scanner::peekNext() { // If the current token is a possible simple key, keep parsing until we // can confirm. @@ -755,6 +772,8 @@ Token Scanner::getNext() { } StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { + if (Position == End) + return Position; // Check 7 bit c-printable - b-char. if ( *Position == 0x09 || (*Position >= 0x20 && *Position <= 0x7E)) @@ -778,6 +797,8 @@ StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) { } StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) { + if (Position == End) + return Position; if (*Position == 0x0D) { if (Position + 1 != End && *(Position + 1) == 0x0A) return Position + 2; @@ -849,42 +870,6 @@ StringRef Scanner::scan_ns_uri_char() { return StringRef(Start, Current - Start); } -StringRef Scanner::scan_ns_plain_one_line() { - StringRef::iterator start = Current; - // The first character must already be verified. - ++Current; - while (true) { - if (Current == End) { - break; - } else if (*Current == ':') { - // Check if the next character is a ns-char. - if (Current + 1 == End) - break; - StringRef::iterator i = skip_ns_char(Current + 1); - if (Current + 1 != i) { - Current = i; - Column += 2; // Consume both the ':' and ns-char. - } else - break; - } else if (*Current == '#') { - // Check if the previous character was a ns-char. - // The & 0x80 check is to check for the trailing byte of a utf-8 - if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) { - ++Current; - ++Column; - } else - break; - } else { - StringRef::iterator i = skip_nb_char(Current); - if (i == Current) - break; - Current = i; - ++Column; - } - } - return StringRef(start, Current - start); -} - bool Scanner::consume(uint32_t Expected) { if (Expected >= 0x80) report_fatal_error("Not dealing with this yet"); @@ -903,6 +888,7 @@ bool Scanner::consume(uint32_t Expected) { void Scanner::skip(uint32_t Distance) { Current += Distance; Column += Distance; + assert(Current <= End && "Skipped past the end"); } bool Scanner::isBlankOrBreak(StringRef::iterator Position) { @@ -1054,14 +1040,22 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); - Token T; T.Kind = Token::TK_VersionDirective; T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; } return false; } @@ -1211,7 +1205,9 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { ++Current; // Repeat until the previous character was not a '\' or was an escaped // backslash. - } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current)); + } while ( Current != End + && *(Current - 1) == '\\' + && wasEscaped(Start + 1, Current)); } else { skip(1); while (true) { @@ -1237,6 +1233,12 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) { } } } + + if (Current == End) { + setError("Expected quote at end of scalar", Current); + return false; + } + skip(1); // Skip ending quote. Token T; T.Kind = Token::TK_Scalar; @@ -1520,8 +1522,10 @@ bool Scanner::fetchMoreTokens() { } Stream::Stream(StringRef Input, SourceMgr &SM) - : scanner(new Scanner(Input, SM)) - , CurrentDoc(0) {} + : scanner(new Scanner(Input, SM)), CurrentDoc() {} + +Stream::Stream(MemoryBuffer *InputBuffer, SourceMgr &SM) + : scanner(new Scanner(InputBuffer, SM)), CurrentDoc() {} Stream::~Stream() {} @@ -1536,10 +1540,6 @@ void Stream::printError(Node *N, const Twine &Msg) { , Ranges); } -void Stream::handleYAMLDirective(const Token &t) { - // TODO: Ensure version is 1.x. -} - document_iterator Stream::begin() { if (CurrentDoc) report_fatal_error("Can only iterate over the stream once"); @@ -1560,14 +1560,57 @@ void Stream::skip() { i->skip(); } -Node::Node(unsigned int Type, OwningPtr &D, StringRef A) - : Doc(D) - , TypeID(Type) - , Anchor(A) { +Node::Node(unsigned int Type, std::unique_ptr &D, StringRef A, + StringRef T) + : Doc(D), TypeID(Type), Anchor(A), Tag(T) { SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); SourceRange = SMRange(Start, Start); } +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return std::move(Ret); + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return std::move(Ret); + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return std::move(Ret); + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + Token &Node::peekNext() { return Doc->peekNext(); } @@ -1624,9 +1667,7 @@ StringRef ScalarNode::getValue(SmallVectorImpl &Storage) const { return UnquotedValue; } // Plain or block. - size_t trimtrail = Value.rfind(' '); - return Value.drop_back( - trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail); + return Value.rtrim(" "); } StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue @@ -1733,7 +1774,9 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue // TODO: Report error. break; unsigned int UnicodeScalarValue; - UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue); + if (UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(2); break; @@ -1743,7 +1786,9 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue // TODO: Report error. break; unsigned int UnicodeScalarValue; - UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue); + if (UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(4); break; @@ -1753,7 +1798,9 @@ StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue // TODO: Report error. break; unsigned int UnicodeScalarValue; - UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue); + if (UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue)) + // TODO: Report error. + UnicodeScalarValue = 0xFFFD; encodeUTF8(UnicodeScalarValue, Storage); UnquotedValue = UnquotedValue.substr(8); break; @@ -1829,14 +1876,14 @@ Node *KeyValueNode::getValue() { void MappingNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) { CurrentEntry->skip(); if (Type == MT_Inline) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } } @@ -1849,13 +1896,13 @@ void MappingNode::increment() { case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError("Unexpected token. Expected Key or Block End", T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else { switch (T.Kind) { @@ -1868,14 +1915,14 @@ void MappingNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Key, Flow Entry, or Flow " "Mapping End." , T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } } @@ -1883,7 +1930,7 @@ void MappingNode::increment() { void SequenceNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) @@ -1894,37 +1941,37 @@ void SequenceNode::increment() { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Block Entry or Block End." , T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Indentless) { switch (T.Kind) { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; default: case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Flow) { switch (T.Kind) { @@ -1938,7 +1985,7 @@ void SequenceNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; case Token::TK_StreamEnd: case Token::TK_DocumentEnd: @@ -1946,13 +1993,13 @@ void SequenceNode::increment() { setError("Could not find closing ]!", T); // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: if (!WasPreviousTokenFlowEntry) { setError("Expected , between entries!", T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; } // Otherwise it must be a flow entry. @@ -1966,7 +2013,11 @@ void SequenceNode::increment() { } } -Document::Document(Stream &S) : stream(S), Root(0) { +Document::Document(Stream &S) : stream(S), Root(nullptr) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + if (parseDirectives()) expectToken(Token::TK_DocumentStart); Token &T = peekNext(); @@ -2010,6 +2061,7 @@ Node *Document::parseBlockNode() { Token T = peekNext(); // Handle properties. Token AnchorInfo; + Token TagInfo; parse_property: switch (T.Kind) { case Token::TK_Alias: @@ -2018,13 +2070,17 @@ parse_property: case Token::TK_Anchor: if (AnchorInfo.Kind == Token::TK_Anchor) { setError("Already encountered an anchor for this node!", T); - return 0; + return nullptr; } AnchorInfo = getNext(); // Consume TK_Anchor. T = peekNext(); goto parse_property; case Token::TK_Tag: - getNext(); // Skip TK_Tag. + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return nullptr; + } + TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); goto parse_property; default: @@ -2038,42 +2094,49 @@ parse_property: // Don't eat the TK_BlockEntry, SequenceNode needs it. return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Indentless); case Token::TK_BlockSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Block); case Token::TK_BlockMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Block); case Token::TK_FlowSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Flow); case Token::TK_FlowMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Flow); case Token::TK_Scalar: getNext(); return new (NodeAllocator) ScalarNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , T.Range); case Token::TK_Key: // Don't eat the TK_Key, KeyValueNode expects it. return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Inline); case Token::TK_DocumentStart: case Token::TK_DocumentEnd: @@ -2083,10 +2146,10 @@ parse_property: // !!null null. return new (NodeAllocator) NullNode(stream.CurrentDoc); case Token::TK_Error: - return 0; + return nullptr; } llvm_unreachable("Control flow shouldn't reach here."); - return 0; + return nullptr; } bool Document::parseDirectives() { @@ -2094,10 +2157,10 @@ bool Document::parseDirectives() { while (true) { Token T = peekNext(); if (T.Kind == Token::TK_TagDirective) { - handleTagDirective(getNext()); + parseTAGDirective(); isDirective = true; } else if (T.Kind == Token::TK_VersionDirective) { - stream.handleYAMLDirective(getNext()); + parseYAMLDirective(); isDirective = true; } else break; @@ -2105,6 +2168,21 @@ bool Document::parseDirectives() { return isDirective; } +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + bool Document::expectToken(int TK) { Token T = getNext(); if (T.Kind != TK) { @@ -2113,5 +2191,3 @@ bool Document::expectToken(int TK) { } return true; } - -OwningPtr document_iterator::NullDoc;