-static void ParseCStringVector(std::vector<char *> &output,
- const char *input) {
- // Characters which will be treated as token separators:
- static const char *const delims = " \v\f\t\r\n";
-
- std::string work(input);
- // Skip past any delims at head of input string.
- size_t pos = work.find_first_not_of(delims);
- // If the string consists entirely of delims, then exit early.
- if (pos == std::string::npos) return;
- // Otherwise, jump forward to beginning of first word.
- work = work.substr(pos);
- // Find position of first delimiter.
- pos = work.find_first_of(delims);
-
- while (!work.empty() && pos != std::string::npos) {
- // Everything from 0 to POS is the next word to copy.
- output.push_back(strdup(work.substr(0,pos).c_str()));
- // Is there another word in the string?
- size_t nextpos = work.find_first_not_of(delims, pos + 1);
- if (nextpos != std::string::npos) {
- // Yes? Then remove delims from beginning ...
- work = work.substr(work.find_first_not_of(delims, pos + 1));
- // and find the end of the word.
- pos = work.find_first_of(delims);
- } else {
- // No? (Remainder of string is delims.) End the loop.
- work = "";
- pos = std::string::npos;
+/// * If an even number of backslashes is followed by a double quote, one
+/// backslash is output for every pair of backslashes, and the last double
+/// quote remains unconsumed. The double quote will later be interpreted as
+/// the start or end of a quoted string in the main loop outside of this
+/// function.
+///
+/// * If an odd number of backslashes is followed by a double quote, one
+/// backslash is output for every pair of backslashes, and a double quote is
+/// output for the last pair of backslash-double quote. The double quote is
+/// consumed in this case.
+///
+/// * Otherwise, backslashes are interpreted literally.
+static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
+ size_t E = Src.size();
+ int BackslashCount = 0;
+ // Skip the backslashes.
+ do {
+ ++I;
+ ++BackslashCount;
+ } while (I != E && Src[I] == '\\');
+
+ bool FollowedByDoubleQuote = (I != E && Src[I] == '"');
+ if (FollowedByDoubleQuote) {
+ Token.append(BackslashCount / 2, '\\');
+ if (BackslashCount % 2 == 0)
+ return I - 1;
+ Token.push_back('"');
+ return I;
+ }
+ Token.append(BackslashCount, '\\');
+ return I - 1;
+}
+
+void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs) {
+ SmallString<128> Token;
+
+ // This is a small state machine to consume characters until it reaches the
+ // end of the source string.
+ enum { INIT, UNQUOTED, QUOTED } State = INIT;
+ for (size_t I = 0, E = Src.size(); I != E; ++I) {
+ // INIT state indicates that the current input index is at the start of
+ // the string or between tokens.
+ if (State == INIT) {
+ if (isWhitespace(Src[I])) {
+ // Mark the end of lines in response files
+ if (MarkEOLs && Src[I] == '\n')
+ NewArgv.push_back(nullptr);
+ continue;
+ }
+ if (Src[I] == '"') {
+ State = QUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ State = UNQUOTED;
+ continue;
+ }
+ Token.push_back(Src[I]);
+ State = UNQUOTED;
+ continue;
+ }
+
+ // UNQUOTED state means that it's reading a token not quoted by double
+ // quotes.
+ if (State == UNQUOTED) {
+ // Whitespace means the end of the token.
+ if (isWhitespace(Src[I])) {
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ Token.clear();
+ State = INIT;
+ // Mark the end of lines in response files
+ if (MarkEOLs && Src[I] == '\n')
+ NewArgv.push_back(nullptr);
+ continue;
+ }
+ if (Src[I] == '"') {
+ State = QUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ continue;
+ }
+ Token.push_back(Src[I]);
+ continue;
+ }
+
+ // QUOTED state means that it's reading a token quoted by double quotes.
+ if (State == QUOTED) {
+ if (Src[I] == '"') {
+ State = UNQUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ continue;
+ }
+ Token.push_back(Src[I]);
+ }
+ }
+ // Append the last token after hitting EOF with no whitespace.
+ if (!Token.empty())
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ // Mark the end of response files
+ if (MarkEOLs)
+ NewArgv.push_back(nullptr);
+}
+
+static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
+ TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &NewArgv,
+ bool MarkEOLs = false) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
+ MemoryBuffer::getFile(FName);
+ if (!MemBufOrErr)
+ return false;
+ MemoryBuffer &MemBuf = *MemBufOrErr.get();
+ StringRef Str(MemBuf.getBufferStart(), MemBuf.getBufferSize());
+
+ // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing.
+ ArrayRef<char> BufRef(MemBuf.getBufferStart(), MemBuf.getBufferEnd());
+ std::string UTF8Buf;
+ if (hasUTF16ByteOrderMark(BufRef)) {
+ if (!convertUTF16ToUTF8String(BufRef, UTF8Buf))
+ return false;
+ Str = StringRef(UTF8Buf);
+ }
+
+ // Tokenize the contents into NewArgv.
+ Tokenizer(Str, Saver, NewArgv, MarkEOLs);
+
+ return true;
+}
+
+/// \brief Expand response files on a command line recursively using the given
+/// StringSaver and tokenization strategy.
+bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &Argv,
+ bool MarkEOLs) {
+ unsigned RspFiles = 0;
+ bool AllExpanded = true;
+
+ // Don't cache Argv.size() because it can change.
+ for (unsigned I = 0; I != Argv.size();) {
+ const char *Arg = Argv[I];
+ // Check if it is an EOL marker
+ if (Arg == nullptr) {
+ ++I;
+ continue;
+ }
+ if (Arg[0] != '@') {
+ ++I;
+ continue;