From: Andrei Alexandrescu Date: Wed, 15 Jan 2014 18:38:22 +0000 (-0800) Subject: readFile reads an entire file into a string, vector, or similar X-Git-Tag: v0.22.0~732 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=52ba96edd198f7985f0315b034f84c6dd29234c5;p=folly.git readFile reads an entire file into a string, vector, or similar Test Plan: unittest Reviewed By: lucian@fb.com FB internal diff: D1129497 --- diff --git a/folly/File.h b/folly/File.h index ec4a2085..1c6793fa 100644 --- a/folly/File.h +++ b/folly/File.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/folly/FileUtil.h b/folly/FileUtil.h index 6b9325ae..49398033 100644 --- a/folly/FileUtil.h +++ b/folly/FileUtil.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,10 @@ #define FOLLY_FILEUTIL_H_ #include "folly/Portability.h" +#include "folly/ScopeGuard.h" +#include +#include #include #include #include @@ -102,7 +105,66 @@ ssize_t writevFull(int fd, iovec* iov, int count); ssize_t pwritevFull(int fd, iovec* iov, int count, off_t offset); #endif +/** + * Read entire file (if num_bytes is defaulted) or no more than + * num_bytes (otherwise) into container *out. The container is assumed + * to be contiguous, with element size equal to 1, and offer size(), + * reserve(), and random access (e.g. std::vector, std::string, + * fbstring). + * + * Returns: true on success or false on failure. In the latter case + * errno will be set appropriately by the failing system primitive. + */ +template +bool readFile(const char* file_name, Container& out, + size_t num_bytes = std::numeric_limits::max()) { + static_assert(sizeof(out[0]) == 1, + "readFile: only containers with byte-sized elements accepted"); + assert(file_name); + + const auto fd = open(file_name, O_RDONLY); + if (fd == -1) return false; + + size_t soFar = 0; // amount of bytes successfully read + SCOPE_EXIT { + assert(out.size() >= soFar); // resize better doesn't throw + out.resize(soFar); + // Ignore errors when closing the file + close(fd); + }; + + // Obtain file size: + struct stat buf; + if (fstat(fd, &buf) == -1) return false; + // Some files (notably under /proc and /sys on Linux) lie about + // their size, so treat the size advertised by fstat under advise + // but don't rely on it. In particular, if the size is zero, we + // should attempt to read stuff. If not zero, we'll attempt to read + // one extra byte. + constexpr size_t initialAlloc = 1024 * 4; + out.resize( + std::min( + buf.st_size ? buf.st_size + 1 : initialAlloc, + num_bytes)); + + while (soFar < out.size()) { + const auto actual = readFull(fd, &out[soFar], out.size() - soFar); + if (actual == -1) { + return false; + } + soFar += actual; + if (soFar < out.size()) { + // File exhausted + break; + } + // Ew, allocate more memory. Use exponential growth to avoid + // quadratic behavior. Cap size to num_bytes. + out.resize(std::min(out.size() * 3 / 2, num_bytes)); + } + + return true; +} + } // namespaces #endif /* FOLLY_FILEUTIL_H_ */ - diff --git a/folly/String-inl.h b/folly/String-inl.h index e61ef332..a2179209 100644 --- a/folly/String-inl.h +++ b/folly/String-inl.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -648,4 +648,3 @@ void hexDump(const void* ptr, size_t size, OutIt out) { } // namespace folly #endif /* FOLLY_STRING_INL_H_ */ - diff --git a/folly/String.h b/folly/String.h index da93cd26..31b21b1e 100644 --- a/folly/String.h +++ b/folly/String.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/folly/experimental/FileGen-inl.h b/folly/experimental/FileGen-inl.h index d28966b7..2825d68e 100644 --- a/folly/experimental/FileGen-inl.h +++ b/folly/experimental/FileGen-inl.h @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -135,4 +135,12 @@ inline auto byLine(File file, char delim = '\n') | resplit(delim); } +/** + * Ditto, take the filename and opens it + */ +inline auto byLine(const char* fileName, char delim = '\n') + -> decltype(byLine(File(fileName))) { + return byLine(File(fileName), delim); +} + }} // !folly::gen diff --git a/folly/experimental/io/HugePages.cpp b/folly/experimental/io/HugePages.cpp index 041dfa2d..21426fdc 100644 --- a/folly/experimental/io/HugePages.cpp +++ b/folly/experimental/io/HugePages.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -360,4 +360,3 @@ HugePages::File HugePages::create(ByteRange data, } } // namespace folly - diff --git a/folly/experimental/test/GenBenchmark.cpp b/folly/experimental/test/GenBenchmark.cpp index 2b03ca0f..52f3e0a2 100644 --- a/folly/experimental/test/GenBenchmark.cpp +++ b/folly/experimental/test/GenBenchmark.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -598,7 +598,7 @@ BENCHMARK(ByLine_Pipes, iters) { PCHECK(::read(rfd, &buf, 1) == 1); // wait for startup } - auto s = byLine(rfd) | eachTo() | sum; + auto s = byLine(File(rfd)) | eachTo() | sum; folly::doNotOptimizeAway(s); BENCHMARK_SUSPEND { diff --git a/folly/experimental/test/GenTest.cpp b/folly/experimental/test/GenTest.cpp index 499ab052..35c1414b 100644 --- a/folly/experimental/test/GenTest.cpp +++ b/folly/experimental/test/GenTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1349,7 +1349,7 @@ TEST_P(FileGenBufferedTest, FileWriter) { auto collect = eachTo() | as(); auto expected = src | resplit('\n') | collect; - src | eachAs() | toFile(file.fd(), bufferSize); + src | eachAs() | toFile(File(file.fd()), bufferSize); auto found = byLine(file.path().c_str()) | collect; EXPECT_TRUE(expected == found); diff --git a/folly/io/test/RecordIOTest.cpp b/folly/io/test/RecordIOTest.cpp index 4c12f817..2ffbb976 100644 --- a/folly/io/test/RecordIOTest.cpp +++ b/folly/io/test/RecordIOTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -54,12 +54,12 @@ std::unique_ptr iobufs(std::initializer_list ranges) { TEST(RecordIOTest, Simple) { TemporaryFile file; { - RecordIOWriter writer(file.fd()); + RecordIOWriter writer(File(file.fd())); writer.write(iobufs({"hello ", "world"})); writer.write(iobufs({"goodbye"})); } { - RecordIOReader reader(file.fd()); + RecordIOReader reader(File(file.fd())); auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("hello world", sp((it++)->first)); @@ -68,12 +68,12 @@ TEST(RecordIOTest, Simple) { EXPECT_TRUE(it == reader.end()); } { - RecordIOWriter writer(file.fd()); + RecordIOWriter writer(File(file.fd())); writer.write(iobufs({"meow"})); writer.write(iobufs({"woof"})); } { - RecordIOReader reader(file.fd()); + RecordIOReader reader(File(file.fd())); auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("hello world", sp((it++)->first)); @@ -93,13 +93,13 @@ TEST(RecordIOTest, SmallRecords) { memset(tmp, 'x', kSize); TemporaryFile file; { - RecordIOWriter writer(file.fd()); + RecordIOWriter writer(File(file.fd())); for (int i = 0; i < kSize; ++i) { // record of size 0 should be ignored writer.write(IOBuf::wrapBuffer(tmp, i)); } } { - RecordIOReader reader(file.fd()); + RecordIOReader reader(File(file.fd())); auto it = reader.begin(); for (int i = 1; i < kSize; ++i) { ASSERT_FALSE(it == reader.end()); @@ -112,19 +112,19 @@ TEST(RecordIOTest, SmallRecords) { TEST(RecordIOTest, MultipleFileIds) { TemporaryFile file; { - RecordIOWriter writer(file.fd(), 1); + RecordIOWriter writer(File(file.fd()), 1); writer.write(iobufs({"hello"})); } { - RecordIOWriter writer(file.fd(), 2); + RecordIOWriter writer(File(file.fd()), 2); writer.write(iobufs({"world"})); } { - RecordIOWriter writer(file.fd(), 1); + RecordIOWriter writer(File(file.fd()), 1); writer.write(iobufs({"goodbye"})); } { - RecordIOReader reader(file.fd(), 0); // return all + RecordIOReader reader(File(file.fd()), 0); // return all auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("hello", sp((it++)->first)); @@ -135,7 +135,7 @@ TEST(RecordIOTest, MultipleFileIds) { EXPECT_TRUE(it == reader.end()); } { - RecordIOReader reader(file.fd(), 1); + RecordIOReader reader(File(file.fd()), 1); auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("hello", sp((it++)->first)); @@ -144,14 +144,14 @@ TEST(RecordIOTest, MultipleFileIds) { EXPECT_TRUE(it == reader.end()); } { - RecordIOReader reader(file.fd(), 2); + RecordIOReader reader(File(file.fd()), 2); auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("world", sp((it++)->first)); EXPECT_TRUE(it == reader.end()); } { - RecordIOReader reader(file.fd(), 3); + RecordIOReader reader(File(file.fd()), 3); auto it = reader.begin(); EXPECT_TRUE(it == reader.end()); } @@ -160,7 +160,7 @@ TEST(RecordIOTest, MultipleFileIds) { TEST(RecordIOTest, ExtraMagic) { TemporaryFile file; { - RecordIOWriter writer(file.fd()); + RecordIOWriter writer(File(file.fd())); writer.write(iobufs({"hello"})); } uint8_t buf[recordio_helpers::headerSize() + 5]; @@ -172,7 +172,7 @@ TEST(RecordIOTest, ExtraMagic) { // and an extra record EXPECT_EQ(sizeof(buf), write(file.fd(), buf, sizeof(buf))); { - RecordIOReader reader(file.fd()); + RecordIOReader reader(File(file.fd())); auto it = reader.begin(); ASSERT_FALSE(it == reader.end()); EXPECT_EQ("hello", sp((it++)->first)); @@ -213,7 +213,7 @@ TEST(RecordIOTest, Randomized) { // Recreate the writer multiple times so we test that we create a // continuous stream for (size_t i = 0; i < 3; ++i) { - RecordIOWriter writer(file.fd()); + RecordIOWriter writer(File(file.fd())); for (size_t j = 0; j < recordCount; ++j) { off_t beginPos = writer.filePos(); record.clear(); @@ -251,7 +251,7 @@ TEST(RecordIOTest, Randomized) { { size_t i = 0; - RecordIOReader reader(file.fd()); + RecordIOReader reader(File(file.fd())); for (auto& r : reader) { SCOPED_TRACE(i); ASSERT_LT(i, records.size()); @@ -270,4 +270,3 @@ int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } - diff --git a/folly/test/FileUtilTest.cpp b/folly/test/FileUtilTest.cpp index f466f11f..d40be3d5 100644 --- a/folly/test/FileUtilTest.cpp +++ b/folly/test/FileUtilTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ namespace folly { namespace test { using namespace fileutil_detail; +using namespace std; namespace { @@ -238,6 +239,49 @@ TEST_F(FileUtilTest, preadv) { } #endif +TEST(String, readFile) { + srand(time(nullptr)); + const string tmpPrefix = to("/tmp/folly-file-util-test-", + getpid(), "-", rand(), "-"); + const string afile = tmpPrefix + "myfile"; + const string emptyFile = tmpPrefix + "myfile2"; + + SCOPE_EXIT { + unlink(afile.c_str()); + unlink(emptyFile.c_str()); + }; + + auto f = fopen(emptyFile.c_str(), "wb"); + EXPECT_NE(nullptr, f); + EXPECT_EQ(0, fclose(f)); + f = fopen(afile.c_str(), "wb"); + EXPECT_NE(nullptr, f); + EXPECT_EQ(3, fwrite("bar", 1, 3, f)); + EXPECT_EQ(0, fclose(f)); + + { + string contents; + EXPECT_TRUE(readFile(emptyFile.c_str(), contents)); + EXPECT_EQ(contents, ""); + EXPECT_TRUE(readFile(afile.c_str(), contents, 0)); + EXPECT_EQ("", contents); + EXPECT_TRUE(readFile(afile.c_str(), contents, 2)); + EXPECT_EQ("ba", contents); + EXPECT_TRUE(readFile(afile.c_str(), contents)); + EXPECT_EQ("bar", contents); + } + { + vector contents; + EXPECT_TRUE(readFile(emptyFile.c_str(), contents)); + EXPECT_EQ(vector(), contents); + EXPECT_TRUE(readFile(afile.c_str(), contents, 0)); + EXPECT_EQ(vector(), contents); + EXPECT_TRUE(readFile(afile.c_str(), contents, 2)); + EXPECT_EQ(vector({'b', 'a'}), contents); + EXPECT_TRUE(readFile(afile.c_str(), contents)); + EXPECT_EQ(vector({'b', 'a', 'r'}), contents); + } +} }} // namespaces @@ -246,4 +290,3 @@ int main(int argc, char *argv[]) { google::ParseCommandLineFlags(&argc, &argv, true); return RUN_ALL_TESTS(); } - diff --git a/folly/test/MemoryMappingTest.cpp b/folly/test/MemoryMappingTest.cpp index 61dceb2a..c111073a 100644 --- a/folly/test/MemoryMappingTest.cpp +++ b/folly/test/MemoryMappingTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,16 +23,16 @@ namespace folly { TEST(MemoryMapping, Basic) { File f = File::temporary(); { - WritableMemoryMapping m(f.fd(), 0, sizeof(double)); + WritableMemoryMapping m(File(f.fd()), 0, sizeof(double)); double volatile* d = m.asWritableRange().data(); *d = 37 * M_PI; } { - MemoryMapping m(f.fd(), 0, 3); + MemoryMapping m(File(f.fd()), 0, 3); EXPECT_EQ(0, m.asRange().size()); //not big enough } { - MemoryMapping m(f.fd(), 0, sizeof(double)); + MemoryMapping m(File(f.fd()), 0, sizeof(double)); const double volatile* d = m.asRange().data(); EXPECT_EQ(*d, 37 * M_PI); } @@ -41,8 +41,8 @@ TEST(MemoryMapping, Basic) { TEST(MemoryMapping, DoublyMapped) { File f = File::temporary(); // two mappings of the same memory, different addresses. - WritableMemoryMapping mw(f.fd(), 0, sizeof(double)); - MemoryMapping mr(f.fd(), 0, sizeof(double)); + WritableMemoryMapping mw(File(f.fd()), 0, sizeof(double)); + MemoryMapping mr(File(f.fd()), 0, sizeof(double)); double volatile* dw = mw.asWritableRange().data(); const double volatile* dr = mr.asRange().data(); @@ -84,11 +84,11 @@ TEST(MemoryMapping, Simple) { writeStringToFileOrDie("hello", f.fd()); { - MemoryMapping m(f.fd()); + MemoryMapping m(File(f.fd())); EXPECT_EQ("hello", m.data()); } { - MemoryMapping m(f.fd(), 1, 2); + MemoryMapping m(File(f.fd()), 1, 2); EXPECT_EQ("el", m.data()); } } @@ -105,20 +105,20 @@ TEST(MemoryMapping, LargeFile) { writeStringToFileOrDie(fileData, f.fd()); { - MemoryMapping m(f.fd()); + MemoryMapping m(File(f.fd())); EXPECT_EQ(fileData, m.data()); } { size_t size = sysconf(_SC_PAGESIZE) * 2; StringPiece s(fileData.data() + 9, size - 9); - MemoryMapping m(f.fd(), 9, size - 9); + MemoryMapping m(File(f.fd()), 9, size - 9); EXPECT_EQ(s.toString(), m.data()); } } TEST(MemoryMapping, ZeroLength) { File f = File::temporary(); - MemoryMapping m(f.fd()); + MemoryMapping m(File(f.fd())); EXPECT_TRUE(m.mlock(MemoryMapping::LockMode::MUST_LOCK)); EXPECT_TRUE(m.mlocked()); EXPECT_EQ(0, m.data().size()); diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp index 92f6ba97..925af1fe 100644 --- a/folly/test/StringTest.cpp +++ b/folly/test/StringTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1028,4 +1028,3 @@ int main(int argc, char *argv[]) { } return ret; } - diff --git a/folly/test/SubprocessTest.cpp b/folly/test/SubprocessTest.cpp index 769eec57..ab4c4b24 100644 --- a/folly/test/SubprocessTest.cpp +++ b/folly/test/SubprocessTest.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2013 Facebook, Inc. + * Copyright 2014 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -181,7 +181,7 @@ TEST(ParentDeathSubprocessTest, ParentDeathSignal) { TEST(PopenSubprocessTest, PopenRead) { Subprocess proc("ls /", Subprocess::pipeStdout()); int found = 0; - gen::byLine(proc.stdout()) | + gen::byLine(File(proc.stdout())) | [&] (StringPiece line) { if (line == "etc" || line == "bin" || line == "usr") { ++found;