2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include <sys/types.h>
20 #include <sys/socket.h>
21 #include <glog/logging.h>
22 #include <folly/SocketAddress.h>
23 #include <folly/io/ShutdownSocketSet.h>
24 #include <folly/io/IOBuf.h>
25 #include <folly/io/async/AsyncTimeout.h>
26 #include <folly/io/async/AsyncSocketException.h>
27 #include <folly/io/async/AsyncTransport.h>
28 #include <folly/io/async/EventHandler.h>
29 #include <folly/io/async/DelayedDestruction.h>
37 * A class for performing asynchronous I/O on a socket.
39 * AsyncSocket allows users to asynchronously wait for data on a socket, and
40 * to asynchronously send data.
42 * The APIs for reading and writing are intentionally asymmetric. Waiting for
43 * data to read is a persistent API: a callback is installed, and is notified
44 * whenever new data is available. It continues to be notified of new events
45 * until it is uninstalled.
47 * AsyncSocket does not provide read timeout functionality, because it
48 * typically cannot determine when the timeout should be active. Generally, a
49 * timeout should only be enabled when processing is blocked waiting on data
50 * from the remote endpoint. For server sockets, the timeout should not be
51 * active if the server is currently processing one or more outstanding
52 * requests for this socket. For client sockets, the timeout should not be
53 * active if there are no requests pending on the socket. Additionally, if a
54 * client has multiple pending requests, it will ususally want a separate
55 * timeout for each request, rather than a single read timeout.
57 * The write API is fairly intuitive: a user can request to send a block of
58 * data, and a callback will be informed once the entire block has been
59 * transferred to the kernel, or on error. AsyncSocket does provide a send
60 * timeout, since most callers want to give up if the remote end stops
61 * responding and no further progress can be made sending the data.
64 class AsyncSocket : virtual public AsyncTransport {
66 typedef std::unique_ptr<AsyncSocket, Destructor> UniquePtr;
68 class ConnectCallback {
70 virtual ~ConnectCallback() {}
73 * connectSuccess() will be invoked when the connection has been
74 * successfully established.
76 virtual void connectSuccess() noexcept = 0;
79 * connectErr() will be invoked if the connection attempt fails.
81 * @param ex An exception describing the error that occurred.
83 virtual void connectErr(const AsyncSocketException& ex)
89 virtual ~ReadCallback() {}
92 * When data becomes available, getReadBuffer() will be invoked to get the
93 * buffer into which data should be read.
95 * This method allows the ReadCallback to delay buffer allocation until
96 * data becomes available. This allows applications to manage large
97 * numbers of idle connections, without having to maintain a separate read
98 * buffer for each idle connection.
100 * It is possible that in some cases, getReadBuffer() may be called
101 * multiple times before readDataAvailable() is invoked. In this case, the
102 * data will be written to the buffer returned from the most recent call to
103 * readDataAvailable(). If the previous calls to readDataAvailable()
104 * returned different buffers, the ReadCallback is responsible for ensuring
105 * that they are not leaked.
107 * If getReadBuffer() throws an exception, returns a nullptr buffer, or
108 * returns a 0 length, the ReadCallback will be uninstalled and its
109 * readErr() method will be invoked.
111 * getReadBuffer() is not allowed to change the transport state before it
112 * returns. (For example, it should never uninstall the read callback, or
113 * set a different read callback.)
115 * @param bufReturn getReadBuffer() should update *bufReturn to contain the
116 * address of the read buffer. This parameter will never
118 * @param lenReturn getReadBuffer() should update *lenReturn to contain the
119 * maximum number of bytes that may be written to the read
120 * buffer. This parameter will never be nullptr.
122 virtual void getReadBuffer(void** bufReturn, size_t* lenReturn) = 0;
125 * readDataAvailable() will be invoked when data has been successfully read
126 * into the buffer returned by the last call to getReadBuffer().
128 * The read callback remains installed after readDataAvailable() returns.
129 * It must be explicitly uninstalled to stop receiving read events.
130 * getReadBuffer() will be called at least once before each call to
131 * readDataAvailable(). getReadBuffer() will also be called before any
134 * @param len The number of bytes placed in the buffer.
136 virtual void readDataAvailable(size_t len) noexcept = 0;
139 * readEOF() will be invoked when the transport is closed.
141 * The read callback will be automatically uninstalled immediately before
142 * readEOF() is invoked.
144 virtual void readEOF() noexcept = 0;
147 * readErr() will be invoked if an error occurs reading from the
150 * The read callback will be automatically uninstalled immediately before
151 * readErr() is invoked.
153 * @param ex An exception describing the error that occurred.
155 virtual void readErr(const AsyncSocketException& ex)
159 class WriteCallback {
161 virtual ~WriteCallback() {}
164 * writeSuccess() will be invoked when all of the data has been
165 * successfully written.
167 * Note that this mainly signals that the buffer containing the data to
168 * write is no longer needed and may be freed or re-used. It does not
169 * guarantee that the data has been fully transmitted to the remote
170 * endpoint. For example, on socket-based transports, writeSuccess() only
171 * indicates that the data has been given to the kernel for eventual
174 virtual void writeSuccess() noexcept = 0;
177 * writeErr() will be invoked if an error occurs writing the data.
179 * @param bytesWritten The number of bytes that were successfull
180 * @param ex An exception describing the error that occurred.
182 virtual void writeErr(size_t bytesWritten,
183 const AsyncSocketException& ex)
187 explicit AsyncSocket();
189 * Create a new unconnected AsyncSocket.
191 * connect() must later be called on this socket to establish a connection.
193 explicit AsyncSocket(EventBase* evb);
195 void setShutdownSocketSet(ShutdownSocketSet* ss);
198 * Create a new AsyncSocket and begin the connection process.
200 * @param evb EventBase that will manage this socket.
201 * @param address The address to connect to.
202 * @param connectTimeout Optional timeout in milliseconds for the connection
205 AsyncSocket(EventBase* evb,
206 const folly::SocketAddress& address,
207 uint32_t connectTimeout = 0);
210 * Create a new AsyncSocket and begin the connection process.
212 * @param evb EventBase that will manage this socket.
213 * @param ip IP address to connect to (dotted-quad).
214 * @param port Destination port in host byte order.
215 * @param connectTimeout Optional timeout in milliseconds for the connection
218 AsyncSocket(EventBase* evb,
219 const std::string& ip,
221 uint32_t connectTimeout = 0);
224 * Create a AsyncSocket from an already connected socket file descriptor.
226 * Note that while AsyncSocket enables TCP_NODELAY for sockets it creates
227 * when connecting, it does not change the socket options when given an
228 * existing file descriptor. If callers want TCP_NODELAY enabled when using
229 * this version of the constructor, they need to explicitly call
230 * setNoDelay(true) after the constructor returns.
232 * @param evb EventBase that will manage this socket.
233 * @param fd File descriptor to take over (should be a connected socket).
235 AsyncSocket(EventBase* evb, int fd);
238 * Helper function to create a shared_ptr<AsyncSocket>.
240 * This passes in the correct destructor object, since AsyncSocket's
241 * destructor is protected and cannot be invoked directly.
243 static std::shared_ptr<AsyncSocket> newSocket(EventBase* evb) {
244 return std::shared_ptr<AsyncSocket>(new AsyncSocket(evb),
249 * Helper function to create a shared_ptr<AsyncSocket>.
251 static std::shared_ptr<AsyncSocket> newSocket(
253 const folly::SocketAddress& address,
254 uint32_t connectTimeout = 0) {
255 return std::shared_ptr<AsyncSocket>(
256 new AsyncSocket(evb, address, connectTimeout),
261 * Helper function to create a shared_ptr<AsyncSocket>.
263 static std::shared_ptr<AsyncSocket> newSocket(
265 const std::string& ip,
267 uint32_t connectTimeout = 0) {
268 return std::shared_ptr<AsyncSocket>(
269 new AsyncSocket(evb, ip, port, connectTimeout),
274 * Helper function to create a shared_ptr<AsyncSocket>.
276 static std::shared_ptr<AsyncSocket> newSocket(EventBase* evb, int fd) {
277 return std::shared_ptr<AsyncSocket>(new AsyncSocket(evb, fd),
282 * Destroy the socket.
284 * AsyncSocket::destroy() must be called to destroy the socket.
285 * The normal destructor is private, and should not be invoked directly.
286 * This prevents callers from deleting a AsyncSocket while it is invoking a
289 virtual void destroy();
292 * Get the EventBase used by this socket.
294 EventBase* getEventBase() const override {
299 * Get the file descriptor used by the AsyncSocket.
301 virtual int getFd() const {
306 * Extract the file descriptor from the AsyncSocket.
308 * This will immediately cause any installed callbacks to be invoked with an
309 * error. The AsyncSocket may no longer be used after the file descriptor
310 * has been extracted.
312 * Returns the file descriptor. The caller assumes ownership of the
313 * descriptor, and it will not be closed when the AsyncSocket is destroyed.
315 virtual int detachFd();
318 * Uniquely identifies a handle to a socket option value. Each
319 * combination of level and option name corresponds to one socket
324 bool operator<(const OptionKey& other) const {
325 if (level == other.level) {
326 return optname < other.optname;
328 return level < other.level;
330 int apply(int fd, int val) const {
331 return setsockopt(fd, level, optname, &val, sizeof(val));
337 // Maps from a socket option key to its value
338 typedef std::map<OptionKey, int> OptionMap;
340 static const OptionMap emptyOptionMap;
341 static const folly::SocketAddress anyAddress;
344 * Initiate a connection.
346 * @param callback The callback to inform when the connection attempt
348 * @param address The address to connect to.
349 * @param timeout A timeout value, in milliseconds. If the connection
350 * does not succeed within this period,
351 * callback->connectError() will be invoked.
353 virtual void connect(ConnectCallback* callback,
354 const folly::SocketAddress& address,
356 const OptionMap &options = emptyOptionMap,
357 const folly::SocketAddress& bindAddr = anyAddress
359 void connect(ConnectCallback* callback, const std::string& ip, uint16_t port,
361 const OptionMap &options = emptyOptionMap) noexcept;
364 * Set the send timeout.
366 * If write requests do not make any progress for more than the specified
367 * number of milliseconds, fail all pending writes and close the socket.
369 * If write requests are currently pending when setSendTimeout() is called,
370 * the timeout interval is immediately restarted using the new value.
372 * (See the comments for AsyncSocket for an explanation of why AsyncSocket
373 * provides setSendTimeout() but not setRecvTimeout().)
375 * @param milliseconds The timeout duration, in milliseconds. If 0, no
376 * timeout will be used.
378 void setSendTimeout(uint32_t milliseconds) override;
381 * Get the send timeout.
383 * @return Returns the current send timeout, in milliseconds. A return value
384 * of 0 indicates that no timeout is set.
386 uint32_t getSendTimeout() const override {
391 * Set the maximum number of reads to execute from the underlying
392 * socket each time the EventBase detects that new ingress data is
393 * available. The default is unlimited, but callers can use this method
394 * to limit the amount of data read from the socket per event loop
397 * @param maxReads Maximum number of reads per data-available event;
398 * a value of zero means unlimited.
400 void setMaxReadsPerEvent(uint16_t maxReads) {
401 maxReadsPerEvent_ = maxReads;
405 * Get the maximum number of reads this object will execute from
406 * the underlying socket each time the EventBase detects that new
407 * ingress data is available.
409 * @returns Maximum number of reads per data-available event; a value
410 * of zero means unlimited.
412 uint16_t getMaxReadsPerEvent() const {
413 return maxReadsPerEvent_;
416 // Read and write methods
417 void setReadCB(ReadCallback* callback);
418 ReadCallback* getReadCallback() const;
420 void write(WriteCallback* callback, const void* buf, size_t bytes,
421 WriteFlags flags = WriteFlags::NONE);
422 void writev(WriteCallback* callback, const iovec* vec, size_t count,
423 WriteFlags flags = WriteFlags::NONE);
424 void writeChain(WriteCallback* callback,
425 std::unique_ptr<folly::IOBuf>&& buf,
426 WriteFlags flags = WriteFlags::NONE);
428 // Methods inherited from AsyncTransport
429 void close() override;
430 void closeNow() override;
431 void closeWithReset() override;
432 void shutdownWrite() override;
433 void shutdownWriteNow() override;
435 bool readable() const override;
436 bool isPending() const override;
437 virtual bool hangup() const;
438 bool good() const override;
439 bool error() const override;
440 void attachEventBase(EventBase* eventBase) override;
441 void detachEventBase() override;
442 bool isDetachable() const override;
444 void getLocalAddress(
445 folly::SocketAddress* address) const override;
447 folly::SocketAddress* address) const override;
449 bool isEorTrackingEnabled() const override { return false; }
451 void setEorTracking(bool track) override {}
453 bool connecting() const override {
454 return (state_ == StateEnum::CONNECTING);
457 size_t getAppBytesWritten() const override {
458 return appBytesWritten_;
461 size_t getRawBytesWritten() const override {
462 return getAppBytesWritten();
465 size_t getAppBytesReceived() const override {
466 return appBytesReceived_;
469 size_t getRawBytesReceived() const override {
470 return getAppBytesReceived();
473 // Methods controlling socket options
476 * Force writes to be transmitted immediately.
478 * This controls the TCP_NODELAY socket option. When enabled, TCP segments
479 * are sent as soon as possible, even if it is not a full frame of data.
480 * When disabled, the data may be buffered briefly to try and wait for a full
483 * By default, TCP_NODELAY is enabled for AsyncSocket objects.
485 * This method will fail if the socket is not currently open.
487 * @return Returns 0 if the TCP_NODELAY flag was successfully updated,
488 * or a non-zero errno value on error.
490 int setNoDelay(bool noDelay);
493 * Set the Flavor of Congestion Control to be used for this Socket
494 * Please check '/lib/modules/<kernel>/kernel/net/ipv4' for tcp_*.ko
495 * first to make sure the module is available for plugging in
496 * Alternatively you can choose from net.ipv4.tcp_allowed_congestion_control
498 int setCongestionFlavor(const std::string &cname);
501 * Forces ACKs to be sent immediately
503 * @return Returns 0 if the TCP_QUICKACK flag was successfully updated,
504 * or a non-zero errno value on error.
506 int setQuickAck(bool quickack);
509 * Set the send bufsize
511 int setSendBufSize(size_t bufsize);
514 * Set the recv bufsize
516 int setRecvBufSize(size_t bufsize);
519 * Sets a specific tcp personality
520 * Available only on kernels 3.2 and greater
522 #define SO_SET_NAMESPACE 41
523 int setTCPProfile(int profd);
527 * Generic API for reading a socket option.
529 * @param level same as the "level" parameter in getsockopt().
530 * @param optname same as the "optname" parameter in getsockopt().
531 * @param optval pointer to the variable in which the option value should
533 * @return same as the return value of getsockopt().
535 template <typename T>
536 int getSockOpt(int level, int optname, T *optval) {
537 return getsockopt(fd_, level, optname, optval, sizeof(T));
541 * Generic API for setting a socket option.
543 * @param level same as the "level" parameter in getsockopt().
544 * @param optname same as the "optname" parameter in getsockopt().
545 * @param optval the option value to set.
546 * @return same as the return value of setsockopt().
548 template <typename T>
549 int setSockOpt(int level, int optname, const T *optval) {
550 return setsockopt(fd_, level, optname, optval, sizeof(T));
553 enum class StateEnum : uint8_t {
562 enum ReadResultEnum {
569 * Protected destructor.
571 * Users of AsyncSocket must never delete it directly. Instead, invoke
572 * destroy() instead. (See the documentation in DelayedDestruction.h for
577 friend std::ostream& operator << (std::ostream& os, const StateEnum& state);
580 /// shutdownWrite() called, but we are still waiting on writes to drain
581 SHUT_WRITE_PENDING = 0x01,
582 /// writes have been completely shut down
585 * Reads have been shutdown.
587 * At the moment we don't distinguish between remote read shutdown
588 * (received EOF from the remote end) and local read shutdown. We can
589 * only receive EOF when a read callback is set, and we immediately inform
590 * it of the EOF. Therefore there doesn't seem to be any reason to have a
591 * separate state of "received EOF but the local side may still want to
594 * We also don't currently provide any API for only shutting down the read
595 * side of a socket. (This is a no-op as far as TCP is concerned, anyway.)
602 class WriteTimeout : public AsyncTimeout {
604 WriteTimeout(AsyncSocket* socket, EventBase* eventBase)
605 : AsyncTimeout(eventBase)
608 virtual void timeoutExpired() noexcept {
609 socket_->timeoutExpired();
613 AsyncSocket* socket_;
616 class IoHandler : public EventHandler {
618 IoHandler(AsyncSocket* socket, EventBase* eventBase)
619 : EventHandler(eventBase, -1)
621 IoHandler(AsyncSocket* socket, EventBase* eventBase, int fd)
622 : EventHandler(eventBase, fd)
625 virtual void handlerReady(uint16_t events) noexcept {
626 socket_->ioReady(events);
630 AsyncSocket* socket_;
635 // event notification methods
636 void ioReady(uint16_t events) noexcept;
637 virtual void checkForImmediateRead() noexcept;
638 virtual void handleInitialReadWrite() noexcept;
639 virtual void handleRead() noexcept;
640 virtual void handleWrite() noexcept;
641 virtual void handleConnect() noexcept;
642 void timeoutExpired() noexcept;
645 * Attempt to read from the socket.
647 * @param buf The buffer to read data into.
648 * @param buflen The length of the buffer.
650 * @return Returns the number of bytes read, or READ_EOF on EOF, or
651 * READ_ERROR on error, or READ_BLOCKING if the operation will
654 virtual ssize_t performRead(void* buf, size_t buflen);
657 * Populate an iovec array from an IOBuf and attempt to write it.
659 * @param callback Write completion/error callback.
660 * @param vec Target iovec array; caller retains ownership.
661 * @param count Number of IOBufs to write, beginning at start of buf.
662 * @param buf Chain of iovecs.
663 * @param flags set of flags for the underlying write calls, like cork
665 void writeChainImpl(WriteCallback* callback, iovec* vec,
666 size_t count, std::unique_ptr<folly::IOBuf>&& buf,
670 * Write as much data as possible to the socket without blocking,
671 * and queue up any leftover data to send when the socket can
672 * handle writes again.
674 * @param callback The callback to invoke when the write is completed.
675 * @param vec Array of buffers to write; this method will make a
676 * copy of the vector (but not the buffers themselves)
677 * if the write has to be completed asynchronously.
678 * @param count Number of elements in vec.
679 * @param buf The IOBuf that manages the buffers referenced by
680 * vec, or a pointer to nullptr if the buffers are not
681 * associated with an IOBuf. Note that ownership of
682 * the IOBuf is transferred here; upon completion of
683 * the write, the AsyncSocket deletes the IOBuf.
684 * @param flags Set of write flags.
686 void writeImpl(WriteCallback* callback, const iovec* vec, size_t count,
687 std::unique_ptr<folly::IOBuf>&& buf,
688 WriteFlags flags = WriteFlags::NONE);
691 * Attempt to write to the socket.
693 * @param vec The iovec array pointing to the buffers to write.
694 * @param count The length of the iovec array.
695 * @param flags Set of write flags.
696 * @param countWritten On return, the value pointed to by this parameter
697 * will contain the number of iovec entries that were
699 * @param partialWritten On return, the value pointed to by this parameter
700 * will contain the number of bytes written in the
701 * partially written iovec entry.
703 * @return Returns the total number of bytes written, or -1 on error. If no
704 * data can be written immediately, 0 is returned.
706 virtual ssize_t performWrite(const iovec* vec, uint32_t count,
707 WriteFlags flags, uint32_t* countWritten,
708 uint32_t* partialWritten);
710 bool updateEventRegistration();
713 * Update event registration.
715 * @param enable Flags of events to enable. Set it to 0 if no events
716 * need to be enabled in this call.
717 * @param disable Flags of events
718 * to disable. Set it to 0 if no events need to be disabled in this
721 * @return true iff the update is successful.
723 bool updateEventRegistration(uint16_t enable, uint16_t disable);
725 // Actually close the file descriptor and set it to -1 so we don't
726 // accidentally close it again.
729 // error handling methods
732 void fail(const char* fn, const AsyncSocketException& ex);
733 void failConnect(const char* fn, const AsyncSocketException& ex);
734 void failRead(const char* fn, const AsyncSocketException& ex);
735 void failWrite(const char* fn, WriteCallback* callback, size_t bytesWritten,
736 const AsyncSocketException& ex);
737 void failWrite(const char* fn, const AsyncSocketException& ex);
738 void failAllWrites(const AsyncSocketException& ex);
739 void invalidState(ConnectCallback* callback);
740 void invalidState(ReadCallback* callback);
741 void invalidState(WriteCallback* callback);
743 std::string withAddr(const std::string& s);
745 StateEnum state_; ///< StateEnum describing current state
746 uint8_t shutdownFlags_; ///< Shutdown state (ShutdownFlags)
747 uint16_t eventFlags_; ///< EventBase::HandlerFlags settings
748 int fd_; ///< The socket file descriptor
750 folly::SocketAddress addr_; ///< The address we tried to connect to
751 uint32_t sendTimeout_; ///< The send timeout, in milliseconds
752 uint16_t maxReadsPerEvent_; ///< Max reads per event loop iteration
753 EventBase* eventBase_; ///< The EventBase
754 WriteTimeout writeTimeout_; ///< A timeout for connect and write
755 IoHandler ioHandler_; ///< A EventHandler to monitor the fd
757 ConnectCallback* connectCallback_; ///< ConnectCallback
758 ReadCallback* readCallback_; ///< ReadCallback
759 WriteRequest* writeReqHead_; ///< Chain of WriteRequests
760 WriteRequest* writeReqTail_; ///< End of WriteRequest chain
761 ShutdownSocketSet* shutdownSocketSet_;
762 size_t appBytesReceived_; ///< Num of bytes received from socket
763 size_t appBytesWritten_; ///< Num of bytes written to socket