2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_GEN_STRING_H
18 #error This file may only be included from folly/gen/String.h
21 #include "folly/Conv.h"
22 #include "folly/String.h"
23 #include "folly/io/IOBuf.h"
29 inline bool splitPrefix(StringPiece& in, StringPiece& prefix, char delimiter) {
30 auto p = static_cast<const char*>(memchr(in.data(), delimiter, in.size()));
32 prefix.assign(in.data(), p);
33 in.assign(p + 1, in.end());
40 inline const char* ch(const unsigned char* p) {
41 return reinterpret_cast<const char*>(p);
44 class StringResplitter : public Operator<StringResplitter> {
47 explicit StringResplitter(char delimiter) : delimiter_(delimiter) { }
49 template <class Source>
50 class Generator : public GenImpl<StringPiece, Generator<Source>> {
54 Generator(Source source, char delimiter)
55 : source_(std::move(source)), delimiter_(delimiter) { }
58 bool apply(Body&& body) const {
59 std::unique_ptr<IOBuf> buffer;
61 auto fn = [&](StringPiece in) -> bool {
63 bool found = splitPrefix(in, prefix, this->delimiter_);
64 if (found && buffer && buffer->length() != 0) {
65 // Append to end of buffer, return line
66 if (!prefix.empty()) {
67 buffer->reserve(0, prefix.size());
68 memcpy(buffer->writableTail(), prefix.data(), prefix.size());
69 buffer->append(prefix.size());
71 if (!body(StringPiece(ch(buffer->data()), buffer->length()))) {
75 found = splitPrefix(in, prefix, this->delimiter_);
77 // Buffer is empty, return lines directly from input (no buffer)
82 found = splitPrefix(in, prefix, this->delimiter_);
85 // Incomplete line left, append to buffer
87 // Arbitrarily assume that we have half a line and get enough
88 // room for twice that.
89 constexpr size_t kDefaultLineSize = 256;
90 buffer = IOBuf::create(std::max(kDefaultLineSize, 2 * in.size()));
92 buffer->reserve(0, in.size());
93 memcpy(buffer->writableTail(), in.data(), in.size());
94 buffer->append(in.size());
100 if (!source_.apply(std::move(fn))) {
104 // Incomplete last line
105 if (buffer && buffer->length() != 0) {
106 if (!body(StringPiece(ch(buffer->data()), buffer->length()))) {
113 static constexpr bool infinite = Source::infinite;
116 template<class Source,
118 class Gen = Generator<Source>>
119 Gen compose(GenImpl<Value, Source>&& source) const {
120 return Gen(std::move(source.self()), delimiter_);
123 template<class Source,
125 class Gen = Generator<Source>>
126 Gen compose(const GenImpl<Value, Source>& source) const {
127 return Gen(source.self(), delimiter_);
131 class SplitStringSource : public GenImpl<StringPiece, SplitStringSource> {
135 SplitStringSource(const StringPiece& source,
138 , delimiter_(delimiter) { }
140 template <class Body>
141 bool apply(Body&& body) const {
142 StringPiece rest(source_);
144 while (splitPrefix(rest, prefix, this->delimiter_)) {
159 * Unsplit - For joining tokens from a generator into a string. This is
160 * the inverse of `split` above.
162 * This type is primarily used through the 'unsplit' function.
164 template<class Delimiter,
166 class Unsplit : public Operator<Unsplit<Delimiter, Output>> {
167 Delimiter delimiter_;
169 Unsplit(const Delimiter& delimiter)
170 : delimiter_(delimiter) {
173 template<class Source,
175 Output compose(const GenImpl<Value, Source>& source) const {
177 UnsplitBuffer<Delimiter, Output> unsplitter(delimiter_, &outputBuffer);
178 unsplitter.compose(source);
184 * UnsplitBuffer - For joining tokens from a generator into a string,
185 * and inserting them into a custom buffer.
187 * This type is primarily used through the 'unsplit' function.
189 template<class Delimiter,
191 class UnsplitBuffer : public Operator<UnsplitBuffer<Delimiter, OutputBuffer>> {
192 Delimiter delimiter_;
193 OutputBuffer* outputBuffer_;
195 UnsplitBuffer(const Delimiter& delimiter, OutputBuffer* outputBuffer)
196 : delimiter_(delimiter)
197 , outputBuffer_(outputBuffer) {
201 template<class Source,
203 void compose(const GenImpl<Value, Source>& source) const {
204 // If the output buffer is empty, we skip inserting the delimiter for the
206 bool skipDelim = outputBuffer_->empty();
207 source | [&](Value v) {
210 toAppend(std::forward<Value>(v), outputBuffer_);
212 toAppend(delimiter_, std::forward<Value>(v), outputBuffer_);
220 * Hack for static for-like constructs
222 template<class Target, class=void>
223 inline Target passthrough(Target target) { return target; }
225 #pragma GCC diagnostic push
227 // Clang isn't happy with eatField() hack below.
228 #pragma GCC diagnostic ignored "-Wreturn-stack-address"
232 * ParseToTuple - For splitting a record and immediatlely converting it to a
233 * target tuple type. Primary used through the 'eachToTuple' helper, like so:
236 * = split("1:a 2:b", ' ')
237 * | eachToTuple<int, string>()
238 * | as<vector<tuple<int, string>>>();
241 template<class TargetContainer,
245 Delimiter delimiter_;
247 explicit SplitTo(Delimiter delimiter)
248 : delimiter_(delimiter) {}
250 TargetContainer operator()(StringPiece line) const {
252 StringPiece fields[sizeof...(Targets)];
253 // HACK(tjackson): Used for referencing fields[] corresponding to variadic
254 // template parameters.
255 auto eatField = [&]() -> StringPiece& { return fields[i++]; };
256 if (!split(delimiter_,
258 detail::passthrough<StringPiece&, Targets>(eatField())...)) {
259 throw std::runtime_error("field count mismatch");
262 return TargetContainer(To<Targets>()(eatField())...);
266 #pragma GCC diagnostic pop
268 } // namespace detail