From 6ea306c4d4bc93600ecb03702932950905111077 Mon Sep 17 00:00:00 2001 From: Tudor Bosman Date: Fri, 31 Aug 2012 19:39:12 -0700 Subject: [PATCH] Minor huge pages library changes Summary: Moved to separate library instead of :io Got rid of the mode argument when creating files -- umask is your friend Separated getSize interface from create Added FsUtil.h for some fs::path operations Switched some interfaces to fs::path instead of string / StringPiece Canonicalize paths so we don't care whether paths are absolute or relative Test Plan: fs_util_test, by hand Reviewed By: philipp@fb.com FB internal diff: D564465 --- folly/experimental/io/FsUtil.cpp | 71 +++++++++++++ folly/experimental/io/FsUtil.h | 59 +++++++++++ folly/experimental/io/HugePageUtil.cpp | 2 +- folly/experimental/io/HugePages.cpp | 116 ++++++++++++---------- folly/experimental/io/HugePages.h | 42 ++++++-- folly/experimental/io/test/FsUtilTest.cpp | 76 ++++++++++++++ 6 files changed, 307 insertions(+), 59 deletions(-) create mode 100644 folly/experimental/io/FsUtil.cpp create mode 100644 folly/experimental/io/FsUtil.h create mode 100644 folly/experimental/io/test/FsUtilTest.cpp diff --git a/folly/experimental/io/FsUtil.cpp b/folly/experimental/io/FsUtil.cpp new file mode 100644 index 00000000..b39116a5 --- /dev/null +++ b/folly/experimental/io/FsUtil.cpp @@ -0,0 +1,71 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/experimental/io/FsUtil.h" + +namespace bsys = ::boost::system; + +namespace folly { +namespace fs { + +namespace { +bool skipPrefix(const path& pth, const path& prefix, path::const_iterator& it) { + it = pth.begin(); + for (auto& p : prefix) { + if (it == pth.end()) { + return false; + } + if (p == ".") { + // Should only occur at the end, if prefix ends with a slash + continue; + } + if (*it++ != p) { + return false; + } + } + return true; +} +} // namespace + +bool starts_with(const path& pth, const path& prefix) { + path::const_iterator it; + return skipPrefix(pth, prefix, it); +} + +path remove_prefix(const path& pth, const path& prefix) { + path::const_iterator it; + if (!skipPrefix(pth, prefix, it)) { + throw filesystem_error( + "Path does not start with prefix", + pth, prefix, + bsys::errc::make_error_code(bsys::errc::invalid_argument)); + } + + path p; + for (; it != pth.end(); ++it) { + p /= *it; + } + + return p; +} + +path canonical_parent(const path& pth, const path& base) { + return canonical(pth.parent_path(), base) / pth.filename(); +} + +} // namespace fs +} // namespace folly + diff --git a/folly/experimental/io/FsUtil.h b/folly/experimental/io/FsUtil.h new file mode 100644 index 00000000..9b6ca1ed --- /dev/null +++ b/folly/experimental/io/FsUtil.h @@ -0,0 +1,59 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FOLLY_IO_FSUTIL_H_ +#define FOLLY_IO_FSUTIL_H_ + +#include + +namespace folly { +namespace fs { + +// Functions defined in this file are meant to extend the +// boost::filesystem library; functions will be named according to boost's +// naming conventions instead of ours. For convenience, import the +// boost::filesystem namespace into folly::fs. +using namespace ::boost::filesystem; + +/** + * Check whether "path" starts with "prefix". + * That is, if prefix has n path elements, then the first n elements of + * path must be the same as prefix. + * + * There is a special case if prefix ends with a slash: + * /foo/bar/ is not a prefix of /foo/bar, but both /foo/bar and /foo/bar/ + * are prefixes of /foo/bar/baz. + */ +bool starts_with(const path& p, const path& prefix); + +/** + * If "path" starts with "prefix", return "path" with "prefix" removed. + * Otherwise, throw filesystem_error. + */ +path remove_prefix(const path& p, const path& prefix); + +/** + * Canonicalize the parent path, leaving the filename (last component) + * unchanged. You may use this before creating a file instead of + * boost::filesystem::canonical, which requires that the entire path exists. + */ +path canonical_parent(const path& p, const path& basePath = current_path()); + +} // namespace fs +} // namespace folly + +#endif /* FOLLY_IO_FSUTIL_H_ */ + diff --git a/folly/experimental/io/HugePageUtil.cpp b/folly/experimental/io/HugePageUtil.cpp index 1506adb8..f608b3cd 100644 --- a/folly/experimental/io/HugePageUtil.cpp +++ b/folly/experimental/io/HugePageUtil.cpp @@ -82,7 +82,7 @@ void copy(const char* srcFile, const char* destPrefix) { void list() { HugePages hp; for (auto& p : hp.sizes()) { - std::cout << p.first << " " << p.second << "\n"; + std::cout << p.size << " " << p.mountPoint << "\n"; } } diff --git a/folly/experimental/io/HugePages.cpp b/folly/experimental/io/HugePages.cpp index 3591410d..e2deabf5 100644 --- a/folly/experimental/io/HugePages.cpp +++ b/folly/experimental/io/HugePages.cpp @@ -17,6 +17,9 @@ #include "folly/experimental/io/HugePages.h" #include +#include +#include +#include #include #include @@ -25,7 +28,6 @@ #include #include -#include #include #include @@ -38,8 +40,6 @@ #include "folly/String.h" #include "folly/experimental/io/Stream.h" -namespace fs = ::boost::filesystem; - namespace folly { namespace { @@ -70,7 +70,7 @@ HugePageSizeVec getRawHugePageSizes() { std::string filename(it->path().filename().native()); if (boost::regex_match(filename, match, regex)) { StringPiece numStr(filename.data() + match.position(1), match.length(1)); - vec.emplace_back(to(numStr) * 1024, ""); + vec.emplace_back(to(numStr) * 1024); } } return vec; @@ -113,11 +113,11 @@ HugePageSizeVec getHugePageSizes() { size_t defaultHugePageSize = getDefaultHugePageSize(); struct PageSizeLess { - bool operator()(const std::pair& a, size_t b) const { - return a.first < b; + bool operator()(const HugePageSize& a, size_t b) const { + return a.size < b; } - bool operator()(size_t a, const std::pair& b) const { - return a < b.first; + bool operator()(size_t a, const HugePageSize& b) const { + return a < b.size; } }; @@ -156,12 +156,13 @@ HugePageSizeVec getHugePageSizes() { auto pos = std::lower_bound(sizeVec.begin(), sizeVec.end(), pageSize, PageSizeLess()); - if (pos == sizeVec.end() || pos->first != pageSize) { + if (pos == sizeVec.end() || pos->size != pageSize) { throw std::runtime_error("Mount page size not found"); } - if (pos->second.empty()) { + if (pos->mountPoint.empty()) { // Store mount point - pos->second.assign(parts[1].data(), parts[1].size()); + pos->mountPoint = fs::canonical(fs::path(parts[1].begin(), + parts[1].end())); } } @@ -204,7 +205,7 @@ class ScopedFd : private boost::noncopyable { // RAII wrapper that deletes a file upon destruction unless you call release() class ScopedDeleter : private boost::noncopyable { public: - explicit ScopedDeleter(std::string name) : name_(std::move(name)) { } + explicit ScopedDeleter(fs::path name) : name_(std::move(name)) { } void release() { name_.clear(); } @@ -219,7 +220,7 @@ class ScopedDeleter : private boost::noncopyable { } } private: - std::string name_; + fs::path name_; }; // RAII wrapper around a mmap mapping, munmaps upon destruction unless you @@ -262,63 +263,78 @@ class ScopedMmap : private boost::noncopyable { HugePages::HugePages() : sizes_(getHugePageSizes()) { } -HugePages::File HugePages::create(ByteRange data, - StringPiece baseName, - size_t hugePageSize, - mode_t mode) const { - // Pick an appropriate size. - StringPiece mountPath; - if (hugePageSize == 0) { - for (auto& p : sizes_) { - if (p.second.empty()) { - continue; // not mounted - } - hugePageSize = p.first; - mountPath = StringPiece(p.second); - break; +const HugePageSize& HugePages::getSize(size_t hugePageSize) const { + // Linear search is just fine. + for (auto& p : sizes_) { + if (p.mountPoint.empty()) { + continue; // not mounted } - if (hugePageSize == 0) { - throw std::runtime_error("No huge page filesystem mounted"); - } - } else { - // Linear search is just fine - for (auto& p : sizes_) { - if (p.first == hugePageSize) { - if (p.second.empty()) { - throw std::runtime_error( - "No huge page filesystem mounted with requested page size"); - } - mountPath = StringPiece(p.second); - } - } - if (mountPath.empty()) { - throw std::runtime_error("Requested huge page size not found"); + if (hugePageSize == 0 || hugePageSize == p.size) { + return p; } } + throw std::runtime_error("Huge page not supported / not mounted"); +} + +HugePages::File HugePages::create(ByteRange data, + const fs::path& path, + HugePageSize hugePageSize) const { + namespace bsys = ::boost::system; + if (hugePageSize.size == 0) { + hugePageSize = getSize(); + } // Round size up File file; - file.size = data.size() / hugePageSize * hugePageSize; + file.size = data.size() / hugePageSize.size * hugePageSize.size; if (file.size != data.size()) { - file.size += hugePageSize; + file.size += hugePageSize.size; } - file.path = folly::format("{}/{}", mountPath, baseName).str(); - ScopedFd fd(open(file.path.c_str(), O_RDWR | O_CREAT | O_TRUNC, mode)); + { + file.path = fs::canonical_parent(path, hugePageSize.mountPoint); + if (!fs::starts_with(file.path, hugePageSize.mountPoint)) { + throw fs::filesystem_error( + "HugePages::create: path not rooted at mount point", + file.path, hugePageSize.mountPoint, + bsys::errc::make_error_code(bsys::errc::invalid_argument)); + } + } + ScopedFd fd(open(file.path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0666)); if (fd.fd() == -1) { throw std::system_error(errno, std::system_category(), "open failed"); } ScopedDeleter deleter(file.path); - ScopedMmap map(mmap(nullptr, file.size, PROT_READ | PROT_WRITE, MAP_SHARED, - fd.fd(), 0), + ScopedMmap map(mmap(nullptr, file.size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd.fd(), 0), file.size); if (map.start() == MAP_FAILED) { throw std::system_error(errno, std::system_category(), "mmap failed"); } - memcpy(map.start(), data.data(), data.size()); + // Ignore madvise return code + madvise(const_cast(data.data()), data.size(), + MADV_SEQUENTIAL); + // Why is this not memcpy, you ask? + // The SSSE3-optimized memcpy in glibc likes to copy memory backwards, + // rendering any prefetching from madvise useless (even harmful). + const unsigned char* src = data.data(); + size_t size = data.size(); + unsigned char* dest = reinterpret_cast(map.start()); + if (reinterpret_cast(src) % 8 == 0) { + const uint64_t* src8 = reinterpret_cast(src); + size_t size8 = size / 8; + uint64_t* dest8 = reinterpret_cast(dest); + while (size8--) { + *dest8++ = *src8++; + } + src = reinterpret_cast(src8); + dest = reinterpret_cast(dest8); + size %= 8; + } + memcpy(dest, src, size); map.munmap(); deleter.release(); diff --git a/folly/experimental/io/HugePages.h b/folly/experimental/io/HugePages.h index 6ec6a1fa..321024c9 100644 --- a/folly/experimental/io/HugePages.h +++ b/folly/experimental/io/HugePages.h @@ -17,25 +17,39 @@ #ifndef FOLLY_IO_HUGEPAGES_H_ #define FOLLY_IO_HUGEPAGES_H_ -#include -#include -#include - #include #include #include #include +#include + #include "folly/Range.h" +#include "folly/experimental/io/FsUtil.h" namespace folly { +struct HugePageSize : private boost::totally_ordered { + HugePageSize() : size(0) { } + explicit HugePageSize(size_t s) : size(s) { } + size_t size; + fs::path mountPoint; +}; + +inline bool operator<(const HugePageSize& a, const HugePageSize& b) { + return a.size < b.size; +} + +inline bool operator==(const HugePageSize& a, const HugePageSize& b) { + return a.size == b.size; +} + /** * Vector of (huge_page_size, mount_point), sorted by huge_page_size. * mount_point might be empty if no hugetlbfs file system is mounted for * that size. */ -typedef std::vector> HugePageSizeVec; +typedef std::vector HugePageSizeVec; /** * Class to interface with Linux huge pages (hugetlbfs). @@ -50,22 +64,34 @@ class HugePages { */ const HugePageSizeVec& sizes() const { return sizes_; } + /** + * Return the mount point for the requested huge page size. + * 0 = use smallest available. + * Throws on error. + */ + const HugePageSize& getSize(size_t hugePageSize = 0) const; + /** * Create a file on a huge page filesystem containing a copy of the data * from data. If multiple huge page sizes are allowed, we * pick the smallest huge page size available, unless you request one * explicitly with the hugePageSize argument. * + * The "path" argument must be rooted under the mount point for the + * selected huge page size. If relative, it is considered relative to the + * mount point. + * * We return a struct File structure containing the full path and size * (rounded up to a multiple of the huge page size) */ struct File { - std::string path; + File() : size(0) { } + fs::path path; size_t size; }; File create( - ByteRange data, StringPiece baseName, size_t hugePageSize = 0, - mode_t mode = 0644) const; + ByteRange data, const fs::path& path, + HugePageSize hugePageSize = HugePageSize()) const; private: HugePageSizeVec sizes_; diff --git a/folly/experimental/io/test/FsUtilTest.cpp b/folly/experimental/io/test/FsUtilTest.cpp new file mode 100644 index 00000000..28bc3273 --- /dev/null +++ b/folly/experimental/io/test/FsUtilTest.cpp @@ -0,0 +1,76 @@ +/* + * Copyright 2012 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "folly/experimental/io/FsUtil.h" + +#include +#include + +using namespace folly; +using namespace folly::fs; + +namespace { +// We cannot use EXPECT_EQ(a, b) due to a bug in gtest 1.6.0: gtest wants +// to print path as a container even though it has operator<< defined, +// and as path is a container of path, this leads to infinite +// recursion. +void expectPathEq(const path& a, const path& b) { + EXPECT_TRUE(a == b) << "expected path=" << a << "\nactual path=" << b; +} +} // namespace + +TEST(Simple, Path) { + path root("/"); + path abs1("/hello/world"); + path rel1("meow"); + EXPECT_TRUE(starts_with(abs1, root)); + EXPECT_FALSE(starts_with(rel1, root)); + expectPathEq(path("hello/world"), remove_prefix(abs1, root)); + EXPECT_THROW({remove_prefix(rel1, root);}, filesystem_error); + + path abs2("/hello"); + path abs3("/hello/"); + path abs4("/hello/world"); + path abs5("/hello/world/"); + path abs6("/hello/wo"); + EXPECT_TRUE(starts_with(abs1, abs2)); + EXPECT_TRUE(starts_with(abs1, abs3)); + EXPECT_TRUE(starts_with(abs1, abs4)); + EXPECT_FALSE(starts_with(abs1, abs5)); + EXPECT_FALSE(starts_with(abs1, abs6)); + expectPathEq(path("world"), remove_prefix(abs1, abs2)); + expectPathEq(path("world"), remove_prefix(abs1, abs3)); + expectPathEq(path(), remove_prefix(abs1, abs4)); + EXPECT_THROW({remove_prefix(abs1, abs5);}, filesystem_error); + EXPECT_THROW({remove_prefix(abs1, abs6);}, filesystem_error); +} + +TEST(Simple, CanonicalizeParent) { + path a("/usr/bin/tail"); + path b("/usr/lib/../bin/tail"); + path c("/usr/bin/DOES_NOT_EXIST_ASDF"); + path d("/usr/lib/../bin/DOES_NOT_EXIST_ASDF"); + + expectPathEq(a, canonical(a)); + expectPathEq(a, canonical_parent(b)); + expectPathEq(a, canonical(b)); + expectPathEq(a, canonical_parent(b)); + EXPECT_THROW({canonical(c);}, filesystem_error); + EXPECT_THROW({canonical(d);}, filesystem_error); + expectPathEq(c, canonical_parent(c)); + expectPathEq(c, canonical_parent(d)); +} + -- 2.34.1