2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <folly/Optional.h>
20 #include <folly/SocketAddress.h>
21 #include <folly/io/IOBuf.h>
22 #include <folly/io/ShutdownSocketSet.h>
23 #include <folly/io/async/AsyncSocketException.h>
24 #include <folly/io/async/AsyncTimeout.h>
25 #include <folly/io/async/AsyncTransport.h>
26 #include <folly/io/async/DelayedDestruction.h>
27 #include <folly/io/async/EventHandler.h>
28 #include <folly/portability/Sockets.h>
30 #include <sys/types.h>
39 * A class for performing asynchronous I/O on a socket.
41 * AsyncSocket allows users to asynchronously wait for data on a socket, and
42 * to asynchronously send data.
44 * The APIs for reading and writing are intentionally asymmetric. Waiting for
45 * data to read is a persistent API: a callback is installed, and is notified
46 * whenever new data is available. It continues to be notified of new events
47 * until it is uninstalled.
49 * AsyncSocket does not provide read timeout functionality, because it
50 * typically cannot determine when the timeout should be active. Generally, a
51 * timeout should only be enabled when processing is blocked waiting on data
52 * from the remote endpoint. For server sockets, the timeout should not be
53 * active if the server is currently processing one or more outstanding
54 * requests for this socket. For client sockets, the timeout should not be
55 * active if there are no requests pending on the socket. Additionally, if a
56 * client has multiple pending requests, it will ususally want a separate
57 * timeout for each request, rather than a single read timeout.
59 * The write API is fairly intuitive: a user can request to send a block of
60 * data, and a callback will be informed once the entire block has been
61 * transferred to the kernel, or on error. AsyncSocket does provide a send
62 * timeout, since most callers want to give up if the remote end stops
63 * responding and no further progress can be made sending the data.
67 // We do a dynamic_cast on this, in
68 // AsyncTransportWrapper::getUnderlyingTransport so be safe and
69 // force displacements for it. See:
70 // https://msdn.microsoft.com/en-us/library/7sf3txa8.aspx
71 #pragma vtordisp(push, 2)
73 class AsyncSocket : virtual public AsyncTransportWrapper {
75 typedef std::unique_ptr<AsyncSocket, Destructor> UniquePtr;
77 class ConnectCallback {
79 virtual ~ConnectCallback() = default;
82 * connectSuccess() will be invoked when the connection has been
83 * successfully established.
85 virtual void connectSuccess() noexcept = 0;
88 * connectErr() will be invoked if the connection attempt fails.
90 * @param ex An exception describing the error that occurred.
92 virtual void connectErr(const AsyncSocketException& ex)
96 explicit AsyncSocket();
98 * Create a new unconnected AsyncSocket.
100 * connect() must later be called on this socket to establish a connection.
102 explicit AsyncSocket(EventBase* evb);
104 void setShutdownSocketSet(ShutdownSocketSet* ss);
107 * Create a new AsyncSocket and begin the connection process.
109 * @param evb EventBase that will manage this socket.
110 * @param address The address to connect to.
111 * @param connectTimeout Optional timeout in milliseconds for the connection
114 AsyncSocket(EventBase* evb,
115 const folly::SocketAddress& address,
116 uint32_t connectTimeout = 0);
119 * Create a new AsyncSocket and begin the connection process.
121 * @param evb EventBase that will manage this socket.
122 * @param ip IP address to connect to (dotted-quad).
123 * @param port Destination port in host byte order.
124 * @param connectTimeout Optional timeout in milliseconds for the connection
127 AsyncSocket(EventBase* evb,
128 const std::string& ip,
130 uint32_t connectTimeout = 0);
133 * Create a AsyncSocket from an already connected socket file descriptor.
135 * Note that while AsyncSocket enables TCP_NODELAY for sockets it creates
136 * when connecting, it does not change the socket options when given an
137 * existing file descriptor. If callers want TCP_NODELAY enabled when using
138 * this version of the constructor, they need to explicitly call
139 * setNoDelay(true) after the constructor returns.
141 * @param evb EventBase that will manage this socket.
142 * @param fd File descriptor to take over (should be a connected socket).
144 AsyncSocket(EventBase* evb, int fd);
147 * Helper function to create a shared_ptr<AsyncSocket>.
149 * This passes in the correct destructor object, since AsyncSocket's
150 * destructor is protected and cannot be invoked directly.
152 static std::shared_ptr<AsyncSocket> newSocket(EventBase* evb) {
153 return std::shared_ptr<AsyncSocket>(new AsyncSocket(evb),
158 * Helper function to create a shared_ptr<AsyncSocket>.
160 static std::shared_ptr<AsyncSocket> newSocket(
162 const folly::SocketAddress& address,
163 uint32_t connectTimeout = 0) {
164 return std::shared_ptr<AsyncSocket>(
165 new AsyncSocket(evb, address, connectTimeout),
170 * Helper function to create a shared_ptr<AsyncSocket>.
172 static std::shared_ptr<AsyncSocket> newSocket(
174 const std::string& ip,
176 uint32_t connectTimeout = 0) {
177 return std::shared_ptr<AsyncSocket>(
178 new AsyncSocket(evb, ip, port, connectTimeout),
183 * Helper function to create a shared_ptr<AsyncSocket>.
185 static std::shared_ptr<AsyncSocket> newSocket(EventBase* evb, int fd) {
186 return std::shared_ptr<AsyncSocket>(new AsyncSocket(evb, fd),
191 * Destroy the socket.
193 * AsyncSocket::destroy() must be called to destroy the socket.
194 * The normal destructor is private, and should not be invoked directly.
195 * This prevents callers from deleting a AsyncSocket while it is invoking a
198 virtual void destroy() override;
201 * Get the EventBase used by this socket.
203 EventBase* getEventBase() const override {
208 * Get the file descriptor used by the AsyncSocket.
210 virtual int getFd() const {
215 * Extract the file descriptor from the AsyncSocket.
217 * This will immediately cause any installed callbacks to be invoked with an
218 * error. The AsyncSocket may no longer be used after the file descriptor
219 * has been extracted.
221 * Returns the file descriptor. The caller assumes ownership of the
222 * descriptor, and it will not be closed when the AsyncSocket is destroyed.
224 virtual int detachFd();
227 * Uniquely identifies a handle to a socket option value. Each
228 * combination of level and option name corresponds to one socket
233 bool operator<(const OptionKey& other) const {
234 if (level == other.level) {
235 return optname < other.optname;
237 return level < other.level;
239 int apply(int fd, int val) const {
240 return setsockopt(fd, level, optname, &val, sizeof(val));
246 // Maps from a socket option key to its value
247 typedef std::map<OptionKey, int> OptionMap;
249 static const OptionMap emptyOptionMap;
250 static const folly::SocketAddress& anyAddress();
253 * Initiate a connection.
255 * @param callback The callback to inform when the connection attempt
257 * @param address The address to connect to.
258 * @param timeout A timeout value, in milliseconds. If the connection
259 * does not succeed within this period,
260 * callback->connectError() will be invoked.
262 virtual void connect(
263 ConnectCallback* callback,
264 const folly::SocketAddress& address,
266 const OptionMap& options = emptyOptionMap,
267 const folly::SocketAddress& bindAddr = anyAddress()) noexcept;
270 ConnectCallback* callback,
271 const std::string& ip,
274 const OptionMap& options = emptyOptionMap) noexcept;
277 * If a connect request is in-flight, cancels it and closes the socket
278 * immediately. Otherwise, this is a no-op.
280 * This does not invoke any connection related callbacks. Call this to
281 * prevent any connect callback while cleaning up, etc.
283 void cancelConnect();
286 * Set the send timeout.
288 * If write requests do not make any progress for more than the specified
289 * number of milliseconds, fail all pending writes and close the socket.
291 * If write requests are currently pending when setSendTimeout() is called,
292 * the timeout interval is immediately restarted using the new value.
294 * (See the comments for AsyncSocket for an explanation of why AsyncSocket
295 * provides setSendTimeout() but not setRecvTimeout().)
297 * @param milliseconds The timeout duration, in milliseconds. If 0, no
298 * timeout will be used.
300 void setSendTimeout(uint32_t milliseconds) override;
303 * Get the send timeout.
305 * @return Returns the current send timeout, in milliseconds. A return value
306 * of 0 indicates that no timeout is set.
308 uint32_t getSendTimeout() const override {
313 * Set the maximum number of reads to execute from the underlying
314 * socket each time the EventBase detects that new ingress data is
315 * available. The default is unlimited, but callers can use this method
316 * to limit the amount of data read from the socket per event loop
319 * @param maxReads Maximum number of reads per data-available event;
320 * a value of zero means unlimited.
322 void setMaxReadsPerEvent(uint16_t maxReads) {
323 maxReadsPerEvent_ = maxReads;
327 * Get the maximum number of reads this object will execute from
328 * the underlying socket each time the EventBase detects that new
329 * ingress data is available.
331 * @returns Maximum number of reads per data-available event; a value
332 * of zero means unlimited.
334 uint16_t getMaxReadsPerEvent() const {
335 return maxReadsPerEvent_;
338 // Read and write methods
339 void setReadCB(ReadCallback* callback) override;
340 ReadCallback* getReadCallback() const override;
342 void write(WriteCallback* callback, const void* buf, size_t bytes,
343 WriteFlags flags = WriteFlags::NONE) override;
344 void writev(WriteCallback* callback, const iovec* vec, size_t count,
345 WriteFlags flags = WriteFlags::NONE) override;
346 void writeChain(WriteCallback* callback,
347 std::unique_ptr<folly::IOBuf>&& buf,
348 WriteFlags flags = WriteFlags::NONE) override;
351 virtual void writeRequest(WriteRequest* req);
352 void writeRequestReady() {
356 // Methods inherited from AsyncTransport
357 void close() override;
358 void closeNow() override;
359 void closeWithReset() override;
360 void shutdownWrite() override;
361 void shutdownWriteNow() override;
363 bool readable() const override;
364 bool isPending() const override;
365 virtual bool hangup() const;
366 bool good() const override;
367 bool error() const override;
368 void attachEventBase(EventBase* eventBase) override;
369 void detachEventBase() override;
370 bool isDetachable() const override;
372 void getLocalAddress(
373 folly::SocketAddress* address) const override;
375 folly::SocketAddress* address) const override;
377 bool isEorTrackingEnabled() const override { return false; }
379 void setEorTracking(bool /*track*/) override {}
381 bool connecting() const override {
382 return (state_ == StateEnum::CONNECTING);
385 virtual bool isClosedByPeer() const {
386 return (state_ == StateEnum::CLOSED &&
387 (readErr_ == READ_EOF || readErr_ == READ_ERROR));
390 virtual bool isClosedBySelf() const {
391 return (state_ == StateEnum::CLOSED &&
392 (readErr_ != READ_EOF && readErr_ != READ_ERROR));
395 size_t getAppBytesWritten() const override {
396 return appBytesWritten_;
399 size_t getRawBytesWritten() const override {
400 return getAppBytesWritten();
403 size_t getAppBytesReceived() const override {
404 return appBytesReceived_;
407 size_t getRawBytesReceived() const override {
408 return getAppBytesReceived();
411 std::chrono::nanoseconds getConnectTime() const {
412 return connectEndTime_ - connectStartTime_;
415 std::chrono::milliseconds getConnectTimeout() const {
416 return connectTimeout_;
419 // Methods controlling socket options
422 * Force writes to be transmitted immediately.
424 * This controls the TCP_NODELAY socket option. When enabled, TCP segments
425 * are sent as soon as possible, even if it is not a full frame of data.
426 * When disabled, the data may be buffered briefly to try and wait for a full
429 * By default, TCP_NODELAY is enabled for AsyncSocket objects.
431 * This method will fail if the socket is not currently open.
433 * @return Returns 0 if the TCP_NODELAY flag was successfully updated,
434 * or a non-zero errno value on error.
436 int setNoDelay(bool noDelay);
440 * Set the FD_CLOEXEC flag so that the socket will be closed if the program
441 * later forks and execs.
443 void setCloseOnExec();
446 * Set the Flavor of Congestion Control to be used for this Socket
447 * Please check '/lib/modules/<kernel>/kernel/net/ipv4' for tcp_*.ko
448 * first to make sure the module is available for plugging in
449 * Alternatively you can choose from net.ipv4.tcp_allowed_congestion_control
451 int setCongestionFlavor(const std::string &cname);
454 * Forces ACKs to be sent immediately
456 * @return Returns 0 if the TCP_QUICKACK flag was successfully updated,
457 * or a non-zero errno value on error.
459 int setQuickAck(bool quickack);
462 * Set the send bufsize
464 int setSendBufSize(size_t bufsize);
467 * Set the recv bufsize
469 int setRecvBufSize(size_t bufsize);
472 * Sets a specific tcp personality
473 * Available only on kernels 3.2 and greater
475 #define SO_SET_NAMESPACE 41
476 int setTCPProfile(int profd);
479 * Generic API for reading a socket option.
481 * @param level same as the "level" parameter in getsockopt().
482 * @param optname same as the "optname" parameter in getsockopt().
483 * @param optval pointer to the variable in which the option value should
485 * @param optlen value-result argument, initially containing the size of
486 * the buffer pointed to by optval, and modified on return
487 * to indicate the actual size of the value returned.
488 * @return same as the return value of getsockopt().
490 template <typename T>
491 int getSockOpt(int level, int optname, T* optval, socklen_t* optlen) {
492 return getsockopt(fd_, level, optname, (void*) optval, optlen);
496 * Generic API for setting a socket option.
498 * @param level same as the "level" parameter in getsockopt().
499 * @param optname same as the "optname" parameter in getsockopt().
500 * @param optval the option value to set.
501 * @return same as the return value of setsockopt().
503 template <typename T>
504 int setSockOpt(int level, int optname, const T *optval) {
505 return setsockopt(fd_, level, optname, optval, sizeof(T));
508 virtual void setPeek(bool peek) {
512 enum class StateEnum : uint8_t {
520 void setBufferCallback(BufferCallback* cb);
523 * writeReturn is the total number of bytes written, or WRITE_ERROR on error.
524 * If no data has been written, 0 is returned.
525 * exception is a more specific exception that cause a write error.
526 * Not all writes have exceptions associated with them thus writeReturn
527 * should be checked to determine whether the operation resulted in an error.
530 explicit WriteResult(ssize_t ret) : writeReturn(ret) {}
532 WriteResult(ssize_t ret, std::unique_ptr<const AsyncSocketException> e)
533 : writeReturn(ret), exception(std::move(e)) {}
536 std::unique_ptr<const AsyncSocketException> exception;
540 * readReturn is the number of bytes read, or READ_EOF on EOF, or
541 * READ_ERROR on error, or READ_BLOCKING if the operation will
543 * exception is a more specific exception that may have caused a read error.
544 * Not all read errors have exceptions associated with them thus readReturn
545 * should be checked to determine whether the operation resulted in an error.
548 explicit ReadResult(ssize_t ret) : readReturn(ret) {}
550 ReadResult(ssize_t ret, std::unique_ptr<const AsyncSocketException> e)
551 : readReturn(ret), exception(std::move(e)) {}
554 std::unique_ptr<const AsyncSocketException> exception;
558 * A WriteRequest object tracks information about a pending write operation.
562 WriteRequest(AsyncSocket* socket, WriteCallback* callback) :
563 socket_(socket), callback_(callback) {}
565 virtual void start() {};
567 virtual void destroy() = 0;
569 virtual WriteResult performWrite() = 0;
571 virtual void consume() = 0;
573 virtual bool isComplete() = 0;
575 WriteRequest* getNext() const {
579 WriteCallback* getCallback() const {
583 uint32_t getTotalBytesWritten() const {
584 return totalBytesWritten_;
587 void append(WriteRequest* next) {
588 assert(next_ == nullptr);
592 void fail(const char* fn, const AsyncSocketException& ex) {
593 socket_->failWrite(fn, ex);
596 void bytesWritten(size_t count) {
597 totalBytesWritten_ += count;
598 socket_->appBytesWritten_ += count;
602 // protected destructor, to ensure callers use destroy()
603 virtual ~WriteRequest() {}
605 AsyncSocket* socket_; ///< parent socket
606 WriteRequest* next_{nullptr}; ///< pointer to next WriteRequest
607 WriteCallback* callback_; ///< completion callback
608 uint32_t totalBytesWritten_{0}; ///< total bytes written
612 enum ReadResultEnum {
619 enum WriteResultEnum {
624 * Protected destructor.
626 * Users of AsyncSocket must never delete it directly. Instead, invoke
627 * destroy() instead. (See the documentation in DelayedDestruction.h for
632 friend std::ostream& operator << (std::ostream& os, const StateEnum& state);
635 /// shutdownWrite() called, but we are still waiting on writes to drain
636 SHUT_WRITE_PENDING = 0x01,
637 /// writes have been completely shut down
640 * Reads have been shutdown.
642 * At the moment we don't distinguish between remote read shutdown
643 * (received EOF from the remote end) and local read shutdown. We can
644 * only receive EOF when a read callback is set, and we immediately inform
645 * it of the EOF. Therefore there doesn't seem to be any reason to have a
646 * separate state of "received EOF but the local side may still want to
649 * We also don't currently provide any API for only shutting down the read
650 * side of a socket. (This is a no-op as far as TCP is concerned, anyway.)
655 class BytesWriteRequest;
657 class WriteTimeout : public AsyncTimeout {
659 WriteTimeout(AsyncSocket* socket, EventBase* eventBase)
660 : AsyncTimeout(eventBase)
663 virtual void timeoutExpired() noexcept {
664 socket_->timeoutExpired();
668 AsyncSocket* socket_;
671 class IoHandler : public EventHandler {
673 IoHandler(AsyncSocket* socket, EventBase* eventBase)
674 : EventHandler(eventBase, -1)
676 IoHandler(AsyncSocket* socket, EventBase* eventBase, int fd)
677 : EventHandler(eventBase, fd)
680 virtual void handlerReady(uint16_t events) noexcept {
681 socket_->ioReady(events);
685 AsyncSocket* socket_;
690 class ImmediateReadCB : public folly::EventBase::LoopCallback {
692 explicit ImmediateReadCB(AsyncSocket* socket) : socket_(socket) {}
693 void runLoopCallback() noexcept override {
694 DestructorGuard dg(socket_);
695 socket_->checkForImmediateRead();
698 AsyncSocket* socket_;
702 * Schedule checkForImmediateRead to be executed in the next loop
705 void scheduleImmediateRead() noexcept {
707 eventBase_->runInLoop(&immediateReadHandler_);
711 // event notification methods
712 void ioReady(uint16_t events) noexcept;
713 virtual void checkForImmediateRead() noexcept;
714 virtual void handleInitialReadWrite() noexcept;
715 virtual void prepareReadBuffer(void** buf, size_t* buflen) noexcept;
716 virtual void handleRead() noexcept;
717 virtual void handleWrite() noexcept;
718 virtual void handleConnect() noexcept;
719 void timeoutExpired() noexcept;
722 * Attempt to read from the socket.
724 * @param buf The buffer to read data into.
725 * @param buflen The length of the buffer.
727 * @return Returns a read result. See read result for details.
729 virtual ReadResult performRead(void** buf, size_t* buflen, size_t* offset);
732 * Populate an iovec array from an IOBuf and attempt to write it.
734 * @param callback Write completion/error callback.
735 * @param vec Target iovec array; caller retains ownership.
736 * @param count Number of IOBufs to write, beginning at start of buf.
737 * @param buf Chain of iovecs.
738 * @param flags set of flags for the underlying write calls, like cork
740 void writeChainImpl(WriteCallback* callback, iovec* vec,
741 size_t count, std::unique_ptr<folly::IOBuf>&& buf,
745 * Write as much data as possible to the socket without blocking,
746 * and queue up any leftover data to send when the socket can
747 * handle writes again.
749 * @param callback The callback to invoke when the write is completed.
750 * @param vec Array of buffers to write; this method will make a
751 * copy of the vector (but not the buffers themselves)
752 * if the write has to be completed asynchronously.
753 * @param count Number of elements in vec.
754 * @param buf The IOBuf that manages the buffers referenced by
755 * vec, or a pointer to nullptr if the buffers are not
756 * associated with an IOBuf. Note that ownership of
757 * the IOBuf is transferred here; upon completion of
758 * the write, the AsyncSocket deletes the IOBuf.
759 * @param flags Set of write flags.
761 void writeImpl(WriteCallback* callback, const iovec* vec, size_t count,
762 std::unique_ptr<folly::IOBuf>&& buf,
763 WriteFlags flags = WriteFlags::NONE);
766 * Attempt to write to the socket.
768 * @param vec The iovec array pointing to the buffers to write.
769 * @param count The length of the iovec array.
770 * @param flags Set of write flags.
771 * @param countWritten On return, the value pointed to by this parameter
772 * will contain the number of iovec entries that were
774 * @param partialWritten On return, the value pointed to by this parameter
775 * will contain the number of bytes written in the
776 * partially written iovec entry.
778 * @return Returns a WriteResult. See WriteResult for more details.
780 virtual WriteResult performWrite(
784 uint32_t* countWritten,
785 uint32_t* partialWritten);
787 bool updateEventRegistration();
790 * Update event registration.
792 * @param enable Flags of events to enable. Set it to 0 if no events
793 * need to be enabled in this call.
794 * @param disable Flags of events
795 * to disable. Set it to 0 if no events need to be disabled in this
798 * @return true iff the update is successful.
800 bool updateEventRegistration(uint16_t enable, uint16_t disable);
802 // Actually close the file descriptor and set it to -1 so we don't
803 // accidentally close it again.
806 // error handling methods
809 void fail(const char* fn, const AsyncSocketException& ex);
810 void failConnect(const char* fn, const AsyncSocketException& ex);
811 void failRead(const char* fn, const AsyncSocketException& ex);
812 void failWrite(const char* fn, WriteCallback* callback, size_t bytesWritten,
813 const AsyncSocketException& ex);
814 void failWrite(const char* fn, const AsyncSocketException& ex);
815 void failAllWrites(const AsyncSocketException& ex);
816 void invokeConnectErr(const AsyncSocketException& ex);
817 void invokeConnectSuccess();
818 void invalidState(ConnectCallback* callback);
819 void invalidState(ReadCallback* callback);
820 void invalidState(WriteCallback* callback);
822 std::string withAddr(const std::string& s);
824 StateEnum state_; ///< StateEnum describing current state
825 uint8_t shutdownFlags_; ///< Shutdown state (ShutdownFlags)
826 uint16_t eventFlags_; ///< EventBase::HandlerFlags settings
827 int fd_; ///< The socket file descriptor
828 mutable folly::SocketAddress addr_; ///< The address we tried to connect to
829 mutable folly::SocketAddress localAddr_;
830 ///< The address we are connecting from
831 uint32_t sendTimeout_; ///< The send timeout, in milliseconds
832 uint16_t maxReadsPerEvent_; ///< Max reads per event loop iteration
833 EventBase* eventBase_; ///< The EventBase
834 WriteTimeout writeTimeout_; ///< A timeout for connect and write
835 IoHandler ioHandler_; ///< A EventHandler to monitor the fd
836 ImmediateReadCB immediateReadHandler_; ///< LoopCallback for checking read
838 ConnectCallback* connectCallback_; ///< ConnectCallback
839 ReadCallback* readCallback_; ///< ReadCallback
840 WriteRequest* writeReqHead_; ///< Chain of WriteRequests
841 WriteRequest* writeReqTail_; ///< End of WriteRequest chain
842 ShutdownSocketSet* shutdownSocketSet_;
843 size_t appBytesReceived_; ///< Num of bytes received from socket
844 size_t appBytesWritten_; ///< Num of bytes written to socket
845 bool isBufferMovable_{false};
847 bool peek_{false}; // Peek bytes.
849 int8_t readErr_{READ_NO_ERROR}; ///< The read error encountered, if any.
851 std::chrono::steady_clock::time_point connectStartTime_;
852 std::chrono::steady_clock::time_point connectEndTime_;
854 std::chrono::milliseconds connectTimeout_{0};
856 BufferCallback* bufferCallback_{nullptr};
859 #pragma vtordisp(pop)