2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "folly/MemoryMapping.h"
18 #include "folly/Format.h"
22 #include <sys/types.h>
23 #include <system_error>
24 #include <gflags/gflags.h>
26 DEFINE_int64(mlock_chunk_size, 1 << 20, // 1MB
27 "Maximum bytes to mlock/munlock/munmap at once "
28 "(will be rounded up to PAGESIZE)");
32 /* protected constructor */
33 MemoryMapping::MemoryMapping()
40 MemoryMapping::MemoryMapping(MemoryMapping&& other)
48 MemoryMapping::MemoryMapping(File file, off_t offset, off_t length,
55 init(std::move(file), offset, length, pageSize, PROT_READ, false);
58 MemoryMapping::MemoryMapping(const char* name, off_t offset, off_t length,
60 : MemoryMapping(File(name), offset, length, pageSize) { }
62 MemoryMapping::MemoryMapping(int fd, off_t offset, off_t length,
64 : MemoryMapping(File(fd), offset, length, pageSize) { }
66 void MemoryMapping::init(File file,
67 off_t offset, off_t length,
72 pageSize = sysconf(_SC_PAGESIZE);
74 CHECK_GT(pageSize, 0);
75 CHECK_EQ(pageSize & (pageSize - 1), 0); // power of two
79 // Round down the start of the mapped region
80 size_t skipStart = offset % pageSize;
83 file_ = std::move(file);
85 if (mapLength_ != -1) {
86 mapLength_ += skipStart;
88 // Round up the end of the mapped region
89 mapLength_ = (mapLength_ + pageSize - 1) / pageSize * pageSize;
94 CHECK_ERR(fstat(file_.fd(), &st));
95 off_t remaining = st.st_size - offset;
96 if (mapLength_ == -1) {
97 length = mapLength_ = remaining;
99 if (length > remaining) {
101 PCHECK(0 == ftruncate(file_.fd(), offset + length))
102 << "ftructate() failed, couldn't grow file";
108 if (mapLength_ > remaining) mapLength_ = remaining;
115 unsigned char* start = static_cast<unsigned char*>(
116 mmap(nullptr, mapLength_, prot, MAP_SHARED, file_.fd(), offset));
117 PCHECK(start != MAP_FAILED)
118 << " offset=" << offset
119 << " length=" << mapLength_;
121 data_.reset(start + skipStart, length);
127 off_t memOpChunkSize(off_t length, off_t pageSize) {
128 off_t chunkSize = length;
129 if (FLAGS_mlock_chunk_size <= 0) {
133 chunkSize = FLAGS_mlock_chunk_size;
134 off_t r = chunkSize % pageSize;
136 chunkSize += (pageSize - r);
142 * Run @op in chunks over the buffer @mem of @bufSize length.
145 * - success: true + amountSucceeded == bufSize (op success on whole buffer)
146 * - failure: false + amountSucceeded == nr bytes on which op succeeded.
148 bool memOpInChunks(std::function<int(void*, size_t)> op,
149 void* mem, size_t bufSize, off_t pageSize,
150 size_t& amountSucceeded) {
151 // unmap/mlock/munlock take a kernel semaphore and block other threads from
152 // doing other memory operations. If the size of the buffer is big the
153 // semaphore can be down for seconds (for benchmarks see
154 // http://kostja-osipov.livejournal.com/42963.html). Doing the operations in
155 // chunks breaks the locking into intervals and lets other threads do memory
156 // operations of their own.
158 size_t chunkSize = memOpChunkSize(bufSize, pageSize);
160 char* addr = static_cast<char*>(mem);
163 while (amountSucceeded < bufSize) {
164 size_t size = std::min(chunkSize, bufSize - amountSucceeded);
165 if (op(addr + amountSucceeded, size) != 0) {
168 amountSucceeded += size;
174 } // anonymous namespace
176 bool MemoryMapping::mlock(LockMode lock) {
177 size_t amountSucceeded = 0;
178 locked_ = memOpInChunks(::mlock, mapStart_, mapLength_, pageSize_,
184 auto msg(folly::format(
185 "mlock({}) failed at {}",
186 mapLength_, amountSucceeded).str());
188 if (lock == LockMode::TRY_LOCK && (errno == EPERM || errno == ENOMEM)) {
189 PLOG(WARNING) << msg;
194 // only part of the buffer was mlocked, unlock it back
195 if (!memOpInChunks(::munlock, mapStart_, amountSucceeded, pageSize_,
197 PLOG(WARNING) << "munlock()";
203 void MemoryMapping::munlock(bool dontneed) {
204 if (!locked_) return;
206 size_t amountSucceeded = 0;
207 if (!memOpInChunks(::munlock, mapStart_, mapLength_, pageSize_,
209 PLOG(WARNING) << "munlock()";
211 if (mapLength_ && dontneed &&
212 ::madvise(mapStart_, mapLength_, MADV_DONTNEED)) {
213 PLOG(WARNING) << "madvise()";
218 void MemoryMapping::hintLinearScan() {
219 advise(MADV_SEQUENTIAL);
222 MemoryMapping::~MemoryMapping() {
224 size_t amountSucceeded = 0;
225 if (!memOpInChunks(::munmap, mapStart_, mapLength_, pageSize_,
227 PLOG(FATAL) << folly::format(
228 "munmap({}) failed at {}",
229 mapLength_, amountSucceeded).str();
234 void MemoryMapping::advise(int advice) const {
235 if (mapLength_ && ::madvise(mapStart_, mapLength_, advice)) {
236 PLOG(WARNING) << "madvise()";
240 MemoryMapping& MemoryMapping::operator=(MemoryMapping other) {
245 void MemoryMapping::swap(MemoryMapping& other) {
247 swap(this->file_, other.file_);
248 swap(this->mapStart_, other.mapStart_);
249 swap(this->mapLength_, other.mapLength_);
250 swap(this->pageSize_, other.pageSize_);
251 swap(this->locked_, other.locked_);
252 swap(this->data_, other.data_);
255 WritableMemoryMapping::WritableMemoryMapping(
256 File file, off_t offset, off_t length, off_t pageSize) {
257 init(std::move(file), offset, length, pageSize, PROT_READ | PROT_WRITE, true);
260 void swap(MemoryMapping& a, MemoryMapping& b) { a.swap(b); }