From 44a4cfb63d87dc0ba778982a1796673ca1513e90 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Fri, 18 Oct 2013 22:38:04 +0000 Subject: [PATCH] [Support][YAML] Add support for accessing tags and tag handle substitution. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193004 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Support/YAMLParser.h | 73 ++++++++++++--------- lib/Support/YAMLParser.cpp | 102 ++++++++++++++++++++++++++---- test/YAMLParser/spec-02-24.data | 7 +- test/YAMLParser/spec-07-04.data | 4 +- test/YAMLParser/yaml.data | 8 ++- utils/yaml-bench/YAMLBench.cpp | 22 +++++-- 6 files changed, 169 insertions(+), 47 deletions(-) diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h index 338bb4b6f2b..0e780bab121 100644 --- a/include/llvm/Support/YAMLParser.h +++ b/include/llvm/Support/YAMLParser.h @@ -43,6 +43,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/SMLoc.h" + +#include #include #include @@ -99,9 +101,6 @@ private: OwningPtr CurrentDoc; friend class Document; - - /// @brief Validate a %YAML x.x directive. - void handleYAMLDirective(const Token &); }; /// @brief Abstract base class for all Nodes. @@ -116,12 +115,21 @@ public: NK_Alias }; - Node(unsigned int Type, OwningPtr&, StringRef Anchor); + Node(unsigned int Type, OwningPtr &, StringRef Anchor, + StringRef Tag); /// @brief Get the value of the anchor attached to this node. If it does not /// have one, getAnchor().size() will be 0. StringRef getAnchor() const { return Anchor; } + /// \brief Get the tag as it was written in the document. This does not + /// perform tag resolution. + StringRef getRawTag() const { return Tag; } + + /// \brief Get the verbatium tag for a given Node. This performs tag resoluton + /// and substitution. + std::string getVerbatimTag() const; + SMRange getSourceRange() const { return SourceRange; } void setSourceRange(SMRange SR) { SourceRange = SR; } @@ -158,6 +166,8 @@ protected: private: unsigned int TypeID; StringRef Anchor; + /// \brief The tag as typed in the document. + StringRef Tag; }; /// @brief A null value. @@ -166,7 +176,8 @@ private: /// !!null null class NullNode : public Node { public: - NullNode(OwningPtr &D) : Node(NK_Null, D, StringRef()) {} + NullNode(OwningPtr &D) + : Node(NK_Null, D, StringRef(), StringRef()) {} static inline bool classof(const Node *N) { return N->getType() == NK_Null; @@ -180,9 +191,9 @@ public: /// Adena class ScalarNode : public Node { public: - ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Val) - : Node(NK_Scalar, D, Anchor) - , Value(Val) { + ScalarNode(OwningPtr &D, StringRef Anchor, StringRef Tag, + StringRef Val) + : Node(NK_Scalar, D, Anchor, Tag), Value(Val) { SMLoc Start = SMLoc::getFromPointer(Val.begin()); SMLoc End = SMLoc::getFromPointer(Val.end()); SourceRange = SMRange(Start, End); @@ -222,7 +233,7 @@ private: class KeyValueNode : public Node { public: KeyValueNode(OwningPtr &D) - : Node(NK_KeyValue, D, StringRef()) + : Node(NK_KeyValue, D, StringRef(), StringRef()) , Key(0) , Value(0) {} @@ -338,13 +349,10 @@ public: MT_Inline ///< An inline mapping node is used for "[key: value]". }; - MappingNode(OwningPtr &D, StringRef Anchor, MappingType MT) - : Node(NK_Mapping, D, Anchor) - , Type(MT) - , IsAtBeginning(true) - , IsAtEnd(false) - , CurrentEntry(0) - {} + MappingNode(OwningPtr &D, StringRef Anchor, StringRef Tag, + MappingType MT) + : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true), + IsAtEnd(false), CurrentEntry(0) {} friend class basic_collection_iterator; typedef basic_collection_iterator iterator; @@ -397,14 +405,12 @@ public: ST_Indentless }; - SequenceNode(OwningPtr &D, StringRef Anchor, SequenceType ST) - : Node(NK_Sequence, D, Anchor) - , SeqType(ST) - , IsAtBeginning(true) - , IsAtEnd(false) - , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','. - , CurrentEntry(0) - {} + SequenceNode(OwningPtr &D, StringRef Anchor, StringRef Tag, + SequenceType ST) + : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true), + IsAtEnd(false), + WasPreviousTokenFlowEntry(true), // Start with an imaginary ','. + CurrentEntry(0) {} friend class basic_collection_iterator; typedef basic_collection_iterator iterator; @@ -442,7 +448,7 @@ private: class AliasNode : public Node { public: AliasNode(OwningPtr &D, StringRef Val) - : Node(NK_Alias, D, StringRef()), Name(Val) {} + : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {} StringRef getName() const { return Name; } Node *getTarget(); @@ -475,6 +481,10 @@ public: return Root = parseBlockNode(); } + const std::map &getTagMap() const { + return TagMap; + } + private: friend class Node; friend class document_iterator; @@ -490,18 +500,23 @@ private: /// document. Node *Root; + /// \brief Maps tag prefixes to their expansion. + std::map TagMap; + Token &peekNext(); Token getNext(); void setError(const Twine &Message, Token &Location) const; bool failed() const; - void handleTagDirective(const Token &Tag) { - // TODO: Track tags. - } - /// @brief Parse %BLAH directives and return true if any were encountered. bool parseDirectives(); + /// \brief Parse %YAML + void parseYAMLDirective(); + + /// \brief Parse %TAG + void parseTAGDirective(); + /// @brief Consume the next token and error if it is not \a TK. bool expectToken(int TK); }; diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 213f5e1568d..79df616e202 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -1070,14 +1070,22 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); - Token T; T.Kind = Token::TK_VersionDirective; T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; } return false; } @@ -1564,10 +1572,6 @@ void Stream::printError(Node *N, const Twine &Msg) { , Ranges); } -void Stream::handleYAMLDirective(const Token &t) { - // TODO: Ensure version is 1.x. -} - document_iterator Stream::begin() { if (CurrentDoc) report_fatal_error("Can only iterate over the stream once"); @@ -1588,14 +1592,59 @@ void Stream::skip() { i->skip(); } -Node::Node(unsigned int Type, OwningPtr &D, StringRef A) +Node::Node(unsigned int Type, OwningPtr &D, StringRef A, StringRef T) : Doc(D) , TypeID(Type) - , Anchor(A) { + , Anchor(A) + , Tag(T) { SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); SourceRange = SMRange(Start, Start); } +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return std::move(Ret); + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return std::move(Ret); + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return std::move(Ret); + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + Token &Node::peekNext() { return Doc->peekNext(); } @@ -1999,6 +2048,10 @@ void SequenceNode::increment() { } Document::Document(Stream &S) : stream(S), Root(0) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + if (parseDirectives()) expectToken(Token::TK_DocumentStart); Token &T = peekNext(); @@ -2042,6 +2095,7 @@ Node *Document::parseBlockNode() { Token T = peekNext(); // Handle properties. Token AnchorInfo; + Token TagInfo; parse_property: switch (T.Kind) { case Token::TK_Alias: @@ -2056,7 +2110,11 @@ parse_property: T = peekNext(); goto parse_property; case Token::TK_Tag: - getNext(); // Skip TK_Tag. + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return 0; + } + TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); goto parse_property; default: @@ -2070,42 +2128,49 @@ parse_property: // Don't eat the TK_BlockEntry, SequenceNode needs it. return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Indentless); case Token::TK_BlockSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Block); case Token::TK_BlockMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Block); case Token::TK_FlowSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Flow); case Token::TK_FlowMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Flow); case Token::TK_Scalar: getNext(); return new (NodeAllocator) ScalarNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , T.Range); case Token::TK_Key: // Don't eat the TK_Key, KeyValueNode expects it. return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Inline); case Token::TK_DocumentStart: case Token::TK_DocumentEnd: @@ -2126,10 +2191,10 @@ bool Document::parseDirectives() { while (true) { Token T = peekNext(); if (T.Kind == Token::TK_TagDirective) { - handleTagDirective(getNext()); + parseTAGDirective(); isDirective = true; } else if (T.Kind == Token::TK_VersionDirective) { - stream.handleYAMLDirective(getNext()); + parseYAMLDirective(); isDirective = true; } else break; @@ -2137,6 +2202,21 @@ bool Document::parseDirectives() { return isDirective; } +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + bool Document::expectToken(int TK) { Token T = getNext(); if (T.Kind != TK) { diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data index 01ca7f5d122..56b25cbbd80 100644 --- a/test/YAMLParser/spec-02-24.data +++ b/test/YAMLParser/spec-02-24.data @@ -1,4 +1,4 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s %TAG ! tag:clarkevans.com,2002: --- !shape @@ -14,3 +14,8 @@ start: *ORIGIN color: 0xFFEEBB text: Pretty vector drawing. + +#CHECK: ! +#CHECK: ! +#CHECK: ! +#CHECK: ! diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data index beba7d06ecf..2c8b2ec6959 100644 --- a/test/YAMLParser/spec-07-04.data +++ b/test/YAMLParser/spec-07-04.data @@ -1,5 +1,7 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s %TAG !yaml! tag:yaml.org,2002: --- !yaml!str "foo" + +#CHECK: !!str "foo" diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data index 3ce5e4b73e2..4f9b294f456 100644 --- a/test/YAMLParser/yaml.data +++ b/test/YAMLParser/yaml.data @@ -1,5 +1,11 @@ -# RUN: yaml-bench -canonical %s +# RUN: yaml-bench -canonical %s | FileCheck %s - !!yaml '!' - !!yaml '&' - !!yaml '*' + +# CHECK: !!seq [ +# CHECK: !!yaml "!", +# CHECK: !!yaml "&", +# CHECK: !!yaml "*", +# CHECK: ] diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp index eef4a725a1a..f20a4ccc819 100644 --- a/utils/yaml-bench/YAMLBench.cpp +++ b/utils/yaml-bench/YAMLBench.cpp @@ -63,6 +63,20 @@ static raw_ostream &operator <<(raw_ostream &os, const indent &in) { return os; } +/// \brief Pretty print a tag by replacing tag:yaml.org,2002: with !!. +static std::string prettyTag(yaml::Node *N) { + std::string Tag = N->getVerbatimTag(); + if (StringRef(Tag).startswith("tag:yaml.org,2002:")) { + std::string Ret = "!!"; + Ret += StringRef(Tag).substr(18); + return std::move(Ret); + } + std::string Ret = "!<"; + Ret += Tag; + Ret += ">"; + return Ret; +} + static void dumpNode( yaml::Node *n , unsigned Indent = 0 , bool SuppressFirstIndent = false) { @@ -76,9 +90,9 @@ static void dumpNode( yaml::Node *n if (yaml::ScalarNode *sn = dyn_cast(n)) { SmallString<32> Storage; StringRef Val = sn->getValue(Storage); - outs() << "!!str \"" << yaml::escape(Val) << "\""; + outs() << prettyTag(n) << " \"" << yaml::escape(Val) << "\""; } else if (yaml::SequenceNode *sn = dyn_cast(n)) { - outs() << "!!seq [\n"; + outs() << prettyTag(n) << " [\n"; ++Indent; for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end(); i != e; ++i) { @@ -88,7 +102,7 @@ static void dumpNode( yaml::Node *n --Indent; outs() << indent(Indent) << "]"; } else if (yaml::MappingNode *mn = dyn_cast(n)) { - outs() << "!!map {\n"; + outs() << prettyTag(n) << " {\n"; ++Indent; for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end(); i != e; ++i) { @@ -104,7 +118,7 @@ static void dumpNode( yaml::Node *n } else if (yaml::AliasNode *an = dyn_cast(n)){ outs() << "*" << an->getName(); } else if (dyn_cast(n)) { - outs() << "!!null null"; + outs() << prettyTag(n) << " null"; } } -- 2.34.1