2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
53 * Known differences from reference BSD that was tested:
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
128 static struct hlist_head *unix_sockets_unbound(void *addr)
130 unsigned long hash = (unsigned long)addr;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
140 #ifdef CONFIG_SECURITY_NETWORK
141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 UNIXCB(skb).secid = scm->secid;
146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148 scm->secid = UNIXCB(skb).secid;
151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153 return (scm->secid == UNIXCB(skb).secid);
156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
166 #endif /* CONFIG_SECURITY_NETWORK */
169 * SMP locking strategy:
170 * hash table is protected with spinlock unix_table_lock
171 * each socket state is protected by separate spin lock.
174 static inline unsigned int unix_hash_fold(__wsum n)
176 unsigned int hash = (__force unsigned int)csum_fold(n);
179 return hash&(UNIX_HASH_SIZE-1);
182 #define unix_peer(sk) (unix_sk(sk)->peer)
184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186 return unix_peer(osk) == sk;
189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
191 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
194 static inline int unix_recvq_full(struct sock const *sk)
196 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
199 struct sock *unix_peer_get(struct sock *s)
207 unix_state_unlock(s);
210 EXPORT_SYMBOL_GPL(unix_peer_get);
212 static inline void unix_release_addr(struct unix_address *addr)
214 if (atomic_dec_and_test(&addr->refcnt))
219 * Check unix socket name:
220 * - should be not zero length.
221 * - if started by not zero, should be NULL terminated (FS object)
222 * - if started by zero, it is abstract name.
225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
227 if (len <= sizeof(short) || len > sizeof(*sunaddr))
229 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
231 if (sunaddr->sun_path[0]) {
233 * This may look like an off by one error but it is a bit more
234 * subtle. 108 is the longest valid AF_UNIX path for a binding.
235 * sun_path[108] doesn't as such exist. However in kernel space
236 * we are guaranteed that it is a valid memory location in our
237 * kernel address buffer.
239 ((char *)sunaddr)[len] = 0;
240 len = strlen(sunaddr->sun_path)+1+sizeof(short);
244 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
248 static void __unix_remove_socket(struct sock *sk)
250 sk_del_node_init(sk);
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
255 WARN_ON(!sk_unhashed(sk));
256 sk_add_node(sk, list);
259 static inline void unix_remove_socket(struct sock *sk)
261 spin_lock(&unix_table_lock);
262 __unix_remove_socket(sk);
263 spin_unlock(&unix_table_lock);
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
268 spin_lock(&unix_table_lock);
269 __unix_insert_socket(list, sk);
270 spin_unlock(&unix_table_lock);
273 static struct sock *__unix_find_socket_byname(struct net *net,
274 struct sockaddr_un *sunname,
275 int len, int type, unsigned int hash)
279 sk_for_each(s, &unix_socket_table[hash ^ type]) {
280 struct unix_sock *u = unix_sk(s);
282 if (!net_eq(sock_net(s), net))
285 if (u->addr->len == len &&
286 !memcmp(u->addr->name, sunname, len))
294 static inline struct sock *unix_find_socket_byname(struct net *net,
295 struct sockaddr_un *sunname,
301 spin_lock(&unix_table_lock);
302 s = __unix_find_socket_byname(net, sunname, len, type, hash);
305 spin_unlock(&unix_table_lock);
309 static struct sock *unix_find_socket_byinode(struct inode *i)
313 spin_lock(&unix_table_lock);
315 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
316 struct dentry *dentry = unix_sk(s)->path.dentry;
318 if (dentry && d_real_inode(dentry) == i) {
325 spin_unlock(&unix_table_lock);
329 /* Support code for asymmetrically connected dgram sockets
331 * If a datagram socket is connected to a socket not itself connected
332 * to the first socket (eg, /dev/log), clients may only enqueue more
333 * messages if the present receive queue of the server socket is not
334 * "too large". This means there's a second writeability condition
335 * poll and sendmsg need to test. The dgram recv code will do a wake
336 * up on the peer_wait wait queue of a socket upon reception of a
337 * datagram which needs to be propagated to sleeping would-be writers
338 * since these might not have sent anything so far. This can't be
339 * accomplished via poll_wait because the lifetime of the server
340 * socket might be less than that of its clients if these break their
341 * association with it or if the server socket is closed while clients
342 * are still connected to it and there's no way to inform "a polling
343 * implementation" that it should let go of a certain wait queue
345 * In order to propagate a wake up, a wait_queue_t of the client
346 * socket is enqueued on the peer_wait queue of the server socket
347 * whose wake function does a wake_up on the ordinary client socket
348 * wait queue. This connection is established whenever a write (or
349 * poll for write) hit the flow control condition and broken when the
350 * association to the server socket is dissolved or after a wake up
354 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
358 wait_queue_head_t *u_sleep;
360 u = container_of(q, struct unix_sock, peer_wake);
362 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
364 u->peer_wake.private = NULL;
366 /* relaying can only happen while the wq still exists */
367 u_sleep = sk_sleep(&u->sk);
369 wake_up_interruptible_poll(u_sleep, key);
374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
376 struct unix_sock *u, *u_other;
380 u_other = unix_sk(other);
382 spin_lock(&u_other->peer_wait.lock);
384 if (!u->peer_wake.private) {
385 u->peer_wake.private = other;
386 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391 spin_unlock(&u_other->peer_wait.lock);
395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
398 struct unix_sock *u, *u_other;
401 u_other = unix_sk(other);
402 spin_lock(&u_other->peer_wait.lock);
404 if (u->peer_wake.private == other) {
405 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
406 u->peer_wake.private = NULL;
409 spin_unlock(&u_other->peer_wait.lock);
412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
415 unix_dgram_peer_wake_disconnect(sk, other);
416 wake_up_interruptible_poll(sk_sleep(sk),
423 * - unix_peer(sk) == other
424 * - association is stable
426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
430 connected = unix_dgram_peer_wake_connect(sk, other);
432 if (unix_recvq_full(other))
436 unix_dgram_peer_wake_disconnect(sk, other);
441 static int unix_writable(const struct sock *sk)
443 return sk->sk_state != TCP_LISTEN &&
444 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
447 static void unix_write_space(struct sock *sk)
449 struct socket_wq *wq;
452 if (unix_writable(sk)) {
453 wq = rcu_dereference(sk->sk_wq);
454 if (wq_has_sleeper(wq))
455 wake_up_interruptible_sync_poll(&wq->wait,
456 POLLOUT | POLLWRNORM | POLLWRBAND);
457 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
462 /* When dgram socket disconnects (or changes its peer), we clear its receive
463 * queue of packets arrived from previous peer. First, it allows to do
464 * flow control based only on wmem_alloc; second, sk connected to peer
465 * may receive messages only from that peer. */
466 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
468 if (!skb_queue_empty(&sk->sk_receive_queue)) {
469 skb_queue_purge(&sk->sk_receive_queue);
470 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
472 /* If one link of bidirectional dgram pipe is disconnected,
473 * we signal error. Messages are lost. Do not make this,
474 * when peer was not connected to us.
476 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
477 other->sk_err = ECONNRESET;
478 other->sk_error_report(other);
483 static void unix_sock_destructor(struct sock *sk)
485 struct unix_sock *u = unix_sk(sk);
487 skb_queue_purge(&sk->sk_receive_queue);
489 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
490 WARN_ON(!sk_unhashed(sk));
491 WARN_ON(sk->sk_socket);
492 if (!sock_flag(sk, SOCK_DEAD)) {
493 pr_info("Attempt to release alive unix socket: %p\n", sk);
498 unix_release_addr(u->addr);
500 atomic_long_dec(&unix_nr_socks);
502 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
504 #ifdef UNIX_REFCNT_DEBUG
505 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
506 atomic_long_read(&unix_nr_socks));
510 static void unix_release_sock(struct sock *sk, int embrion)
512 struct unix_sock *u = unix_sk(sk);
518 unix_remove_socket(sk);
523 sk->sk_shutdown = SHUTDOWN_MASK;
525 u->path.dentry = NULL;
527 state = sk->sk_state;
528 sk->sk_state = TCP_CLOSE;
529 unix_state_unlock(sk);
531 wake_up_interruptible_all(&u->peer_wait);
533 skpair = unix_peer(sk);
535 if (skpair != NULL) {
536 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
537 unix_state_lock(skpair);
539 skpair->sk_shutdown = SHUTDOWN_MASK;
540 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
541 skpair->sk_err = ECONNRESET;
542 unix_state_unlock(skpair);
543 skpair->sk_state_change(skpair);
544 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
547 unix_dgram_peer_wake_disconnect(sk, skpair);
548 sock_put(skpair); /* It may now die */
549 unix_peer(sk) = NULL;
552 /* Try to flush out this socket. Throw out buffers at least */
554 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
555 if (state == TCP_LISTEN)
556 unix_release_sock(skb->sk, 1);
557 /* passed fds are erased in the kfree_skb hook */
558 UNIXCB(skb).consumed = skb->len;
567 /* ---- Socket is dead now and most probably destroyed ---- */
570 * Fixme: BSD difference: In BSD all sockets connected to us get
571 * ECONNRESET and we die on the spot. In Linux we behave
572 * like files and pipes do and wait for the last
575 * Can't we simply set sock->err?
577 * What the above comment does talk about? --ANK(980817)
580 if (unix_tot_inflight)
581 unix_gc(); /* Garbage collect fds */
584 static void init_peercred(struct sock *sk)
586 put_pid(sk->sk_peer_pid);
587 if (sk->sk_peer_cred)
588 put_cred(sk->sk_peer_cred);
589 sk->sk_peer_pid = get_pid(task_tgid(current));
590 sk->sk_peer_cred = get_current_cred();
593 static void copy_peercred(struct sock *sk, struct sock *peersk)
595 put_pid(sk->sk_peer_pid);
596 if (sk->sk_peer_cred)
597 put_cred(sk->sk_peer_cred);
598 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
599 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
602 static int unix_listen(struct socket *sock, int backlog)
605 struct sock *sk = sock->sk;
606 struct unix_sock *u = unix_sk(sk);
607 struct pid *old_pid = NULL;
610 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
611 goto out; /* Only stream/seqpacket sockets accept */
614 goto out; /* No listens on an unbound socket */
616 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
618 if (backlog > sk->sk_max_ack_backlog)
619 wake_up_interruptible_all(&u->peer_wait);
620 sk->sk_max_ack_backlog = backlog;
621 sk->sk_state = TCP_LISTEN;
622 /* set credentials so connect can copy them */
627 unix_state_unlock(sk);
633 static int unix_release(struct socket *);
634 static int unix_bind(struct socket *, struct sockaddr *, int);
635 static int unix_stream_connect(struct socket *, struct sockaddr *,
636 int addr_len, int flags);
637 static int unix_socketpair(struct socket *, struct socket *);
638 static int unix_accept(struct socket *, struct socket *, int);
639 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
640 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
641 static unsigned int unix_dgram_poll(struct file *, struct socket *,
643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
644 static int unix_shutdown(struct socket *, int);
645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
648 size_t size, int flags);
649 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
650 struct pipe_inode_info *, size_t size,
652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
660 static int unix_set_peek_off(struct sock *sk, int val)
662 struct unix_sock *u = unix_sk(sk);
664 if (mutex_lock_interruptible(&u->iolock))
667 sk->sk_peek_off = val;
668 mutex_unlock(&u->iolock);
674 static const struct proto_ops unix_stream_ops = {
676 .owner = THIS_MODULE,
677 .release = unix_release,
679 .connect = unix_stream_connect,
680 .socketpair = unix_socketpair,
681 .accept = unix_accept,
682 .getname = unix_getname,
685 .listen = unix_listen,
686 .shutdown = unix_shutdown,
687 .setsockopt = sock_no_setsockopt,
688 .getsockopt = sock_no_getsockopt,
689 .sendmsg = unix_stream_sendmsg,
690 .recvmsg = unix_stream_recvmsg,
691 .mmap = sock_no_mmap,
692 .sendpage = unix_stream_sendpage,
693 .splice_read = unix_stream_splice_read,
694 .set_peek_off = unix_set_peek_off,
697 static const struct proto_ops unix_dgram_ops = {
699 .owner = THIS_MODULE,
700 .release = unix_release,
702 .connect = unix_dgram_connect,
703 .socketpair = unix_socketpair,
704 .accept = sock_no_accept,
705 .getname = unix_getname,
706 .poll = unix_dgram_poll,
708 .listen = sock_no_listen,
709 .shutdown = unix_shutdown,
710 .setsockopt = sock_no_setsockopt,
711 .getsockopt = sock_no_getsockopt,
712 .sendmsg = unix_dgram_sendmsg,
713 .recvmsg = unix_dgram_recvmsg,
714 .mmap = sock_no_mmap,
715 .sendpage = sock_no_sendpage,
716 .set_peek_off = unix_set_peek_off,
719 static const struct proto_ops unix_seqpacket_ops = {
721 .owner = THIS_MODULE,
722 .release = unix_release,
724 .connect = unix_stream_connect,
725 .socketpair = unix_socketpair,
726 .accept = unix_accept,
727 .getname = unix_getname,
728 .poll = unix_dgram_poll,
730 .listen = unix_listen,
731 .shutdown = unix_shutdown,
732 .setsockopt = sock_no_setsockopt,
733 .getsockopt = sock_no_getsockopt,
734 .sendmsg = unix_seqpacket_sendmsg,
735 .recvmsg = unix_seqpacket_recvmsg,
736 .mmap = sock_no_mmap,
737 .sendpage = sock_no_sendpage,
738 .set_peek_off = unix_set_peek_off,
741 static struct proto unix_proto = {
743 .owner = THIS_MODULE,
744 .obj_size = sizeof(struct unix_sock),
748 * AF_UNIX sockets do not interact with hardware, hence they
749 * dont trigger interrupts - so it's safe for them to have
750 * bh-unsafe locking for their sk_receive_queue.lock. Split off
751 * this special lock-class by reinitializing the spinlock key:
753 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
757 struct sock *sk = NULL;
760 atomic_long_inc(&unix_nr_socks);
761 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
764 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
768 sock_init_data(sock, sk);
769 lockdep_set_class(&sk->sk_receive_queue.lock,
770 &af_unix_sk_receive_queue_lock_key);
772 sk->sk_write_space = unix_write_space;
773 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
774 sk->sk_destruct = unix_sock_destructor;
776 u->path.dentry = NULL;
778 spin_lock_init(&u->lock);
779 atomic_long_set(&u->inflight, 0);
780 INIT_LIST_HEAD(&u->link);
781 mutex_init(&u->iolock); /* single task reading lock */
782 mutex_init(&u->bindlock); /* single task binding lock */
783 init_waitqueue_head(&u->peer_wait);
784 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
785 unix_insert_socket(unix_sockets_unbound(sk), sk);
788 atomic_long_dec(&unix_nr_socks);
791 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
797 static int unix_create(struct net *net, struct socket *sock, int protocol,
800 if (protocol && protocol != PF_UNIX)
801 return -EPROTONOSUPPORT;
803 sock->state = SS_UNCONNECTED;
805 switch (sock->type) {
807 sock->ops = &unix_stream_ops;
810 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
814 sock->type = SOCK_DGRAM;
816 sock->ops = &unix_dgram_ops;
819 sock->ops = &unix_seqpacket_ops;
822 return -ESOCKTNOSUPPORT;
825 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
828 static int unix_release(struct socket *sock)
830 struct sock *sk = sock->sk;
835 unix_release_sock(sk, 0);
841 static int unix_autobind(struct socket *sock)
843 struct sock *sk = sock->sk;
844 struct net *net = sock_net(sk);
845 struct unix_sock *u = unix_sk(sk);
846 static u32 ordernum = 1;
847 struct unix_address *addr;
849 unsigned int retries = 0;
851 err = mutex_lock_interruptible(&u->bindlock);
860 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
864 addr->name->sun_family = AF_UNIX;
865 atomic_set(&addr->refcnt, 1);
868 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
869 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
871 spin_lock(&unix_table_lock);
872 ordernum = (ordernum+1)&0xFFFFF;
874 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
876 spin_unlock(&unix_table_lock);
878 * __unix_find_socket_byname() may take long time if many names
879 * are already in use.
882 /* Give up if all names seems to be in use. */
883 if (retries++ == 0xFFFFF) {
890 addr->hash ^= sk->sk_type;
892 __unix_remove_socket(sk);
894 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
895 spin_unlock(&unix_table_lock);
898 out: mutex_unlock(&u->bindlock);
902 static struct sock *unix_find_other(struct net *net,
903 struct sockaddr_un *sunname, int len,
904 int type, unsigned int hash, int *error)
910 if (sunname->sun_path[0]) {
912 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
915 inode = d_real_inode(path.dentry);
916 err = inode_permission(inode, MAY_WRITE);
921 if (!S_ISSOCK(inode->i_mode))
923 u = unix_find_socket_byinode(inode);
927 if (u->sk_type == type)
933 if (u->sk_type != type) {
939 u = unix_find_socket_byname(net, sunname, len, type, hash);
941 struct dentry *dentry;
942 dentry = unix_sk(u)->path.dentry;
944 touch_atime(&unix_sk(u)->path);
957 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
959 struct dentry *dentry;
963 * Get the parent directory, calculate the hash for last
966 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
967 err = PTR_ERR(dentry);
972 * All right, let's create it.
974 err = security_path_mknod(&path, dentry, mode, 0);
976 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
978 res->mnt = mntget(path.mnt);
979 res->dentry = dget(dentry);
982 done_path_create(&path, dentry);
986 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
988 struct sock *sk = sock->sk;
989 struct net *net = sock_net(sk);
990 struct unix_sock *u = unix_sk(sk);
991 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
992 char *sun_path = sunaddr->sun_path;
995 struct unix_address *addr;
996 struct hlist_head *list;
999 if (sunaddr->sun_family != AF_UNIX)
1002 if (addr_len == sizeof(short)) {
1003 err = unix_autobind(sock);
1007 err = unix_mkname(sunaddr, addr_len, &hash);
1012 err = mutex_lock_interruptible(&u->bindlock);
1021 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1025 memcpy(addr->name, sunaddr, addr_len);
1026 addr->len = addr_len;
1027 addr->hash = hash ^ sk->sk_type;
1028 atomic_set(&addr->refcnt, 1);
1032 umode_t mode = S_IFSOCK |
1033 (SOCK_INODE(sock)->i_mode & ~current_umask());
1034 err = unix_mknod(sun_path, mode, &path);
1038 unix_release_addr(addr);
1041 addr->hash = UNIX_HASH_SIZE;
1042 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1043 spin_lock(&unix_table_lock);
1045 list = &unix_socket_table[hash];
1047 spin_lock(&unix_table_lock);
1049 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1050 sk->sk_type, hash)) {
1051 unix_release_addr(addr);
1055 list = &unix_socket_table[addr->hash];
1059 __unix_remove_socket(sk);
1061 __unix_insert_socket(list, sk);
1064 spin_unlock(&unix_table_lock);
1066 mutex_unlock(&u->bindlock);
1071 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1073 if (unlikely(sk1 == sk2) || !sk2) {
1074 unix_state_lock(sk1);
1078 unix_state_lock(sk1);
1079 unix_state_lock_nested(sk2);
1081 unix_state_lock(sk2);
1082 unix_state_lock_nested(sk1);
1086 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1088 if (unlikely(sk1 == sk2) || !sk2) {
1089 unix_state_unlock(sk1);
1092 unix_state_unlock(sk1);
1093 unix_state_unlock(sk2);
1096 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1097 int alen, int flags)
1099 struct sock *sk = sock->sk;
1100 struct net *net = sock_net(sk);
1101 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1106 if (addr->sa_family != AF_UNSPEC) {
1107 err = unix_mkname(sunaddr, alen, &hash);
1112 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1113 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1117 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1121 unix_state_double_lock(sk, other);
1123 /* Apparently VFS overslept socket death. Retry. */
1124 if (sock_flag(other, SOCK_DEAD)) {
1125 unix_state_double_unlock(sk, other);
1131 if (!unix_may_send(sk, other))
1134 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1140 * 1003.1g breaking connected state with AF_UNSPEC
1143 unix_state_double_lock(sk, other);
1147 * If it was connected, reconnect.
1149 if (unix_peer(sk)) {
1150 struct sock *old_peer = unix_peer(sk);
1151 unix_peer(sk) = other;
1152 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1154 unix_state_double_unlock(sk, other);
1156 if (other != old_peer)
1157 unix_dgram_disconnected(sk, old_peer);
1160 unix_peer(sk) = other;
1161 unix_state_double_unlock(sk, other);
1166 unix_state_double_unlock(sk, other);
1172 static long unix_wait_for_peer(struct sock *other, long timeo)
1174 struct unix_sock *u = unix_sk(other);
1178 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1180 sched = !sock_flag(other, SOCK_DEAD) &&
1181 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1182 unix_recvq_full(other);
1184 unix_state_unlock(other);
1187 timeo = schedule_timeout(timeo);
1189 finish_wait(&u->peer_wait, &wait);
1193 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1194 int addr_len, int flags)
1196 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1197 struct sock *sk = sock->sk;
1198 struct net *net = sock_net(sk);
1199 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1200 struct sock *newsk = NULL;
1201 struct sock *other = NULL;
1202 struct sk_buff *skb = NULL;
1208 err = unix_mkname(sunaddr, addr_len, &hash);
1213 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1214 (err = unix_autobind(sock)) != 0)
1217 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1219 /* First of all allocate resources.
1220 If we will make it after state is locked,
1221 we will have to recheck all again in any case.
1226 /* create new sock for complete connection */
1227 newsk = unix_create1(sock_net(sk), NULL, 0);
1231 /* Allocate skb for sending to listening sock */
1232 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1237 /* Find listening sock. */
1238 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1242 /* Latch state of peer */
1243 unix_state_lock(other);
1245 /* Apparently VFS overslept socket death. Retry. */
1246 if (sock_flag(other, SOCK_DEAD)) {
1247 unix_state_unlock(other);
1252 err = -ECONNREFUSED;
1253 if (other->sk_state != TCP_LISTEN)
1255 if (other->sk_shutdown & RCV_SHUTDOWN)
1258 if (unix_recvq_full(other)) {
1263 timeo = unix_wait_for_peer(other, timeo);
1265 err = sock_intr_errno(timeo);
1266 if (signal_pending(current))
1274 It is tricky place. We need to grab our state lock and cannot
1275 drop lock on peer. It is dangerous because deadlock is
1276 possible. Connect to self case and simultaneous
1277 attempt to connect are eliminated by checking socket
1278 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1279 check this before attempt to grab lock.
1281 Well, and we have to recheck the state after socket locked.
1287 /* This is ok... continue with connect */
1289 case TCP_ESTABLISHED:
1290 /* Socket is already connected */
1298 unix_state_lock_nested(sk);
1300 if (sk->sk_state != st) {
1301 unix_state_unlock(sk);
1302 unix_state_unlock(other);
1307 err = security_unix_stream_connect(sk, other, newsk);
1309 unix_state_unlock(sk);
1313 /* The way is open! Fastly set all the necessary fields... */
1316 unix_peer(newsk) = sk;
1317 newsk->sk_state = TCP_ESTABLISHED;
1318 newsk->sk_type = sk->sk_type;
1319 init_peercred(newsk);
1320 newu = unix_sk(newsk);
1321 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1322 otheru = unix_sk(other);
1324 /* copy address information from listening to new sock*/
1326 atomic_inc(&otheru->addr->refcnt);
1327 newu->addr = otheru->addr;
1329 if (otheru->path.dentry) {
1330 path_get(&otheru->path);
1331 newu->path = otheru->path;
1334 /* Set credentials */
1335 copy_peercred(sk, other);
1337 sock->state = SS_CONNECTED;
1338 sk->sk_state = TCP_ESTABLISHED;
1341 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1342 unix_peer(sk) = newsk;
1344 unix_state_unlock(sk);
1346 /* take ten and and send info to listening sock */
1347 spin_lock(&other->sk_receive_queue.lock);
1348 __skb_queue_tail(&other->sk_receive_queue, skb);
1349 spin_unlock(&other->sk_receive_queue.lock);
1350 unix_state_unlock(other);
1351 other->sk_data_ready(other);
1357 unix_state_unlock(other);
1362 unix_release_sock(newsk, 0);
1368 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1370 struct sock *ska = socka->sk, *skb = sockb->sk;
1372 /* Join our sockets back to back */
1375 unix_peer(ska) = skb;
1376 unix_peer(skb) = ska;
1380 if (ska->sk_type != SOCK_DGRAM) {
1381 ska->sk_state = TCP_ESTABLISHED;
1382 skb->sk_state = TCP_ESTABLISHED;
1383 socka->state = SS_CONNECTED;
1384 sockb->state = SS_CONNECTED;
1389 static void unix_sock_inherit_flags(const struct socket *old,
1392 if (test_bit(SOCK_PASSCRED, &old->flags))
1393 set_bit(SOCK_PASSCRED, &new->flags);
1394 if (test_bit(SOCK_PASSSEC, &old->flags))
1395 set_bit(SOCK_PASSSEC, &new->flags);
1398 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1400 struct sock *sk = sock->sk;
1402 struct sk_buff *skb;
1406 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1410 if (sk->sk_state != TCP_LISTEN)
1413 /* If socket state is TCP_LISTEN it cannot change (for now...),
1414 * so that no locks are necessary.
1417 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1419 /* This means receive shutdown. */
1426 skb_free_datagram(sk, skb);
1427 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1429 /* attach accepted sock to socket */
1430 unix_state_lock(tsk);
1431 newsock->state = SS_CONNECTED;
1432 unix_sock_inherit_flags(sock, newsock);
1433 sock_graft(tsk, newsock);
1434 unix_state_unlock(tsk);
1442 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1444 struct sock *sk = sock->sk;
1445 struct unix_sock *u;
1446 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1450 sk = unix_peer_get(sk);
1461 unix_state_lock(sk);
1463 sunaddr->sun_family = AF_UNIX;
1464 sunaddr->sun_path[0] = 0;
1465 *uaddr_len = sizeof(short);
1467 struct unix_address *addr = u->addr;
1469 *uaddr_len = addr->len;
1470 memcpy(sunaddr, addr->name, *uaddr_len);
1472 unix_state_unlock(sk);
1478 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1482 scm->fp = UNIXCB(skb).fp;
1483 UNIXCB(skb).fp = NULL;
1485 for (i = scm->fp->count-1; i >= 0; i--)
1486 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1489 static void unix_destruct_scm(struct sk_buff *skb)
1491 struct scm_cookie scm;
1492 memset(&scm, 0, sizeof(scm));
1493 scm.pid = UNIXCB(skb).pid;
1495 unix_detach_fds(&scm, skb);
1497 /* Alas, it calls VFS */
1498 /* So fscking what? fput() had been SMP-safe since the last Summer */
1504 * The "user->unix_inflight" variable is protected by the garbage
1505 * collection lock, and we just read it locklessly here. If you go
1506 * over the limit, there might be a tiny race in actually noticing
1507 * it across threads. Tough.
1509 static inline bool too_many_unix_fds(struct task_struct *p)
1511 struct user_struct *user = current_user();
1513 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1514 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1518 #define MAX_RECURSION_LEVEL 4
1520 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1523 unsigned char max_level = 0;
1524 int unix_sock_count = 0;
1526 if (too_many_unix_fds(current))
1527 return -ETOOMANYREFS;
1529 for (i = scm->fp->count - 1; i >= 0; i--) {
1530 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1534 max_level = max(max_level,
1535 unix_sk(sk)->recursion_level);
1538 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1539 return -ETOOMANYREFS;
1542 * Need to duplicate file references for the sake of garbage
1543 * collection. Otherwise a socket in the fps might become a
1544 * candidate for GC while the skb is not yet queued.
1546 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1547 if (!UNIXCB(skb).fp)
1550 for (i = scm->fp->count - 1; i >= 0; i--)
1551 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1555 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1559 UNIXCB(skb).pid = get_pid(scm->pid);
1560 UNIXCB(skb).uid = scm->creds.uid;
1561 UNIXCB(skb).gid = scm->creds.gid;
1562 UNIXCB(skb).fp = NULL;
1563 unix_get_secdata(scm, skb);
1564 if (scm->fp && send_fds)
1565 err = unix_attach_fds(scm, skb);
1567 skb->destructor = unix_destruct_scm;
1571 static bool unix_passcred_enabled(const struct socket *sock,
1572 const struct sock *other)
1574 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1575 !other->sk_socket ||
1576 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1580 * Some apps rely on write() giving SCM_CREDENTIALS
1581 * We include credentials if source or destination socket
1582 * asserted SOCK_PASSCRED.
1584 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1585 const struct sock *other)
1587 if (UNIXCB(skb).pid)
1589 if (unix_passcred_enabled(sock, other)) {
1590 UNIXCB(skb).pid = get_pid(task_tgid(current));
1591 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1595 static int maybe_init_creds(struct scm_cookie *scm,
1596 struct socket *socket,
1597 const struct sock *other)
1600 struct msghdr msg = { .msg_controllen = 0 };
1602 err = scm_send(socket, &msg, scm, false);
1606 if (unix_passcred_enabled(socket, other)) {
1607 scm->pid = get_pid(task_tgid(current));
1608 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1613 static bool unix_skb_scm_eq(struct sk_buff *skb,
1614 struct scm_cookie *scm)
1616 const struct unix_skb_parms *u = &UNIXCB(skb);
1618 return u->pid == scm->pid &&
1619 uid_eq(u->uid, scm->creds.uid) &&
1620 gid_eq(u->gid, scm->creds.gid) &&
1621 unix_secdata_eq(scm, skb);
1625 * Send AF_UNIX data.
1628 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1631 struct sock *sk = sock->sk;
1632 struct net *net = sock_net(sk);
1633 struct unix_sock *u = unix_sk(sk);
1634 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1635 struct sock *other = NULL;
1636 int namelen = 0; /* fake GCC */
1639 struct sk_buff *skb;
1641 struct scm_cookie scm;
1647 err = scm_send(sock, msg, &scm, false);
1652 if (msg->msg_flags&MSG_OOB)
1655 if (msg->msg_namelen) {
1656 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1663 other = unix_peer_get(sk);
1668 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1669 && (err = unix_autobind(sock)) != 0)
1673 if (len > sk->sk_sndbuf - 32)
1676 if (len > SKB_MAX_ALLOC) {
1677 data_len = min_t(size_t,
1678 len - SKB_MAX_ALLOC,
1679 MAX_SKB_FRAGS * PAGE_SIZE);
1680 data_len = PAGE_ALIGN(data_len);
1682 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1685 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1686 msg->msg_flags & MSG_DONTWAIT, &err,
1687 PAGE_ALLOC_COSTLY_ORDER);
1691 err = unix_scm_to_skb(&scm, skb, true);
1694 max_level = err + 1;
1696 skb_put(skb, len - data_len);
1697 skb->data_len = data_len;
1699 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1703 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1708 if (sunaddr == NULL)
1711 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1717 if (sk_filter(other, skb) < 0) {
1718 /* Toss the packet but do not return any error to the sender */
1724 unix_state_lock(other);
1727 if (!unix_may_send(sk, other))
1730 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1732 * Check with 1003.1g - what should
1735 unix_state_unlock(other);
1739 unix_state_lock(sk);
1742 if (unix_peer(sk) == other) {
1743 unix_peer(sk) = NULL;
1744 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1746 unix_state_unlock(sk);
1748 unix_dgram_disconnected(sk, other);
1750 err = -ECONNREFUSED;
1752 unix_state_unlock(sk);
1762 if (other->sk_shutdown & RCV_SHUTDOWN)
1765 if (sk->sk_type != SOCK_SEQPACKET) {
1766 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1771 /* other == sk && unix_peer(other) != sk if
1772 * - unix_peer(sk) == NULL, destination address bound to sk
1773 * - unix_peer(sk) == sk by time of get but disconnected before lock
1776 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1778 timeo = unix_wait_for_peer(other, timeo);
1780 err = sock_intr_errno(timeo);
1781 if (signal_pending(current))
1788 unix_state_unlock(other);
1789 unix_state_double_lock(sk, other);
1792 if (unix_peer(sk) != other ||
1793 unix_dgram_peer_wake_me(sk, other)) {
1801 goto restart_locked;
1805 if (unlikely(sk_locked))
1806 unix_state_unlock(sk);
1808 if (sock_flag(other, SOCK_RCVTSTAMP))
1809 __net_timestamp(skb);
1810 maybe_add_creds(skb, sock, other);
1811 skb_queue_tail(&other->sk_receive_queue, skb);
1812 if (max_level > unix_sk(other)->recursion_level)
1813 unix_sk(other)->recursion_level = max_level;
1814 unix_state_unlock(other);
1815 other->sk_data_ready(other);
1822 unix_state_unlock(sk);
1823 unix_state_unlock(other);
1833 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1834 * bytes, and a minimun of a full page.
1836 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1838 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1841 struct sock *sk = sock->sk;
1842 struct sock *other = NULL;
1844 struct sk_buff *skb;
1846 struct scm_cookie scm;
1847 bool fds_sent = false;
1852 err = scm_send(sock, msg, &scm, false);
1857 if (msg->msg_flags&MSG_OOB)
1860 if (msg->msg_namelen) {
1861 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1865 other = unix_peer(sk);
1870 if (sk->sk_shutdown & SEND_SHUTDOWN)
1873 while (sent < len) {
1876 /* Keep two messages in the pipe so it schedules better */
1877 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1879 /* allow fallback to order-0 allocations */
1880 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1882 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1884 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1886 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1887 msg->msg_flags & MSG_DONTWAIT, &err,
1888 get_order(UNIX_SKB_FRAGS_SZ));
1892 /* Only send the fds in the first buffer */
1893 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1898 max_level = err + 1;
1901 skb_put(skb, size - data_len);
1902 skb->data_len = data_len;
1904 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1910 unix_state_lock(other);
1912 if (sock_flag(other, SOCK_DEAD) ||
1913 (other->sk_shutdown & RCV_SHUTDOWN))
1916 maybe_add_creds(skb, sock, other);
1917 skb_queue_tail(&other->sk_receive_queue, skb);
1918 if (max_level > unix_sk(other)->recursion_level)
1919 unix_sk(other)->recursion_level = max_level;
1920 unix_state_unlock(other);
1921 other->sk_data_ready(other);
1930 unix_state_unlock(other);
1933 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1934 send_sig(SIGPIPE, current, 0);
1938 return sent ? : err;
1941 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1942 int offset, size_t size, int flags)
1945 bool send_sigpipe = false;
1946 bool init_scm = true;
1947 struct scm_cookie scm;
1948 struct sock *other, *sk = socket->sk;
1949 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1951 if (flags & MSG_OOB)
1954 other = unix_peer(sk);
1955 if (!other || sk->sk_state != TCP_ESTABLISHED)
1960 unix_state_unlock(other);
1961 mutex_unlock(&unix_sk(other)->iolock);
1962 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1968 /* we must acquire iolock as we modify already present
1969 * skbs in the sk_receive_queue and mess with skb->len
1971 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1973 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1977 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1979 send_sigpipe = true;
1983 unix_state_lock(other);
1985 if (sock_flag(other, SOCK_DEAD) ||
1986 other->sk_shutdown & RCV_SHUTDOWN) {
1988 send_sigpipe = true;
1989 goto err_state_unlock;
1993 err = maybe_init_creds(&scm, socket, other);
1995 goto err_state_unlock;
1999 skb = skb_peek_tail(&other->sk_receive_queue);
2000 if (tail && tail == skb) {
2002 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2009 } else if (newskb) {
2010 /* this is fast path, we don't necessarily need to
2011 * call to kfree_skb even though with newskb == NULL
2012 * this - does no harm
2014 consume_skb(newskb);
2018 if (skb_append_pagefrags(skb, page, offset, size)) {
2024 skb->data_len += size;
2025 skb->truesize += size;
2026 atomic_add(size, &sk->sk_wmem_alloc);
2029 err = unix_scm_to_skb(&scm, skb, false);
2031 goto err_state_unlock;
2032 spin_lock(&other->sk_receive_queue.lock);
2033 __skb_queue_tail(&other->sk_receive_queue, newskb);
2034 spin_unlock(&other->sk_receive_queue.lock);
2037 unix_state_unlock(other);
2038 mutex_unlock(&unix_sk(other)->iolock);
2040 other->sk_data_ready(other);
2045 unix_state_unlock(other);
2047 mutex_unlock(&unix_sk(other)->iolock);
2050 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2051 send_sig(SIGPIPE, current, 0);
2057 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2061 struct sock *sk = sock->sk;
2063 err = sock_error(sk);
2067 if (sk->sk_state != TCP_ESTABLISHED)
2070 if (msg->msg_namelen)
2071 msg->msg_namelen = 0;
2073 return unix_dgram_sendmsg(sock, msg, len);
2076 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2077 size_t size, int flags)
2079 struct sock *sk = sock->sk;
2081 if (sk->sk_state != TCP_ESTABLISHED)
2084 return unix_dgram_recvmsg(sock, msg, size, flags);
2087 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2089 struct unix_sock *u = unix_sk(sk);
2092 msg->msg_namelen = u->addr->len;
2093 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2097 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2098 size_t size, int flags)
2100 struct scm_cookie scm;
2101 struct sock *sk = sock->sk;
2102 struct unix_sock *u = unix_sk(sk);
2103 int noblock = flags & MSG_DONTWAIT;
2104 struct sk_buff *skb;
2112 err = mutex_lock_interruptible(&u->iolock);
2113 if (unlikely(err)) {
2114 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2115 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2117 err = noblock ? -EAGAIN : -ERESTARTSYS;
2121 skip = sk_peek_offset(sk, flags);
2123 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2125 unix_state_lock(sk);
2126 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2127 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2128 (sk->sk_shutdown & RCV_SHUTDOWN))
2130 unix_state_unlock(sk);
2134 wake_up_interruptible_sync_poll(&u->peer_wait,
2135 POLLOUT | POLLWRNORM | POLLWRBAND);
2138 unix_copy_addr(msg, skb->sk);
2140 if (size > skb->len - skip)
2141 size = skb->len - skip;
2142 else if (size < skb->len - skip)
2143 msg->msg_flags |= MSG_TRUNC;
2145 err = skb_copy_datagram_msg(skb, skip, msg, size);
2149 if (sock_flag(sk, SOCK_RCVTSTAMP))
2150 __sock_recv_timestamp(msg, sk, skb);
2152 memset(&scm, 0, sizeof(scm));
2154 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2155 unix_set_secdata(&scm, skb);
2157 if (!(flags & MSG_PEEK)) {
2159 unix_detach_fds(&scm, skb);
2161 sk_peek_offset_bwd(sk, skb->len);
2163 /* It is questionable: on PEEK we could:
2164 - do not return fds - good, but too simple 8)
2165 - return fds, and do not return them on read (old strategy,
2167 - clone fds (I chose it for now, it is the most universal
2170 POSIX 1003.1g does not actually define this clearly
2171 at all. POSIX 1003.1g doesn't define a lot of things
2176 sk_peek_offset_fwd(sk, size);
2179 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2181 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2183 scm_recv(sock, msg, &scm, flags);
2186 skb_free_datagram(sk, skb);
2188 mutex_unlock(&u->iolock);
2194 * Sleep until more data has arrived. But check for races..
2196 static long unix_stream_data_wait(struct sock *sk, long timeo,
2197 struct sk_buff *last, unsigned int last_len)
2199 struct sk_buff *tail;
2202 unix_state_lock(sk);
2205 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2207 tail = skb_peek_tail(&sk->sk_receive_queue);
2209 (tail && tail->len != last_len) ||
2211 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2212 signal_pending(current) ||
2216 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2217 unix_state_unlock(sk);
2218 timeo = freezable_schedule_timeout(timeo);
2219 unix_state_lock(sk);
2221 if (sock_flag(sk, SOCK_DEAD))
2224 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2227 finish_wait(sk_sleep(sk), &wait);
2228 unix_state_unlock(sk);
2232 static unsigned int unix_skb_len(const struct sk_buff *skb)
2234 return skb->len - UNIXCB(skb).consumed;
2237 struct unix_stream_read_state {
2238 int (*recv_actor)(struct sk_buff *, int, int,
2239 struct unix_stream_read_state *);
2240 struct socket *socket;
2242 struct pipe_inode_info *pipe;
2245 unsigned int splice_flags;
2248 static int unix_stream_read_generic(struct unix_stream_read_state *state)
2250 struct scm_cookie scm;
2251 struct socket *sock = state->socket;
2252 struct sock *sk = sock->sk;
2253 struct unix_sock *u = unix_sk(sk);
2255 int flags = state->flags;
2256 int noblock = flags & MSG_DONTWAIT;
2257 bool check_creds = false;
2262 size_t size = state->size;
2263 unsigned int last_len;
2265 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2270 if (unlikely(flags & MSG_OOB)) {
2275 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2276 timeo = sock_rcvtimeo(sk, noblock);
2278 memset(&scm, 0, sizeof(scm));
2280 /* Lock the socket to prevent queue disordering
2281 * while sleeps in memcpy_tomsg
2283 mutex_lock(&u->iolock);
2285 if (flags & MSG_PEEK)
2286 skip = sk_peek_offset(sk, flags);
2293 struct sk_buff *skb, *last;
2295 unix_state_lock(sk);
2296 if (sock_flag(sk, SOCK_DEAD)) {
2300 last = skb = skb_peek(&sk->sk_receive_queue);
2301 last_len = last ? last->len : 0;
2304 unix_sk(sk)->recursion_level = 0;
2305 if (copied >= target)
2309 * POSIX 1003.1g mandates this order.
2312 err = sock_error(sk);
2315 if (sk->sk_shutdown & RCV_SHUTDOWN)
2318 unix_state_unlock(sk);
2324 mutex_unlock(&u->iolock);
2326 timeo = unix_stream_data_wait(sk, timeo, last,
2329 if (signal_pending(current)) {
2330 err = sock_intr_errno(timeo);
2335 mutex_lock(&u->iolock);
2338 unix_state_unlock(sk);
2342 while (skip >= unix_skb_len(skb)) {
2343 skip -= unix_skb_len(skb);
2345 last_len = skb->len;
2346 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2351 unix_state_unlock(sk);
2354 /* Never glue messages from different writers */
2355 if (!unix_skb_scm_eq(skb, &scm))
2357 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2358 /* Copy credentials */
2359 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2360 unix_set_secdata(&scm, skb);
2364 /* Copy address just once */
2365 if (state->msg && state->msg->msg_name) {
2366 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2367 state->msg->msg_name);
2368 unix_copy_addr(state->msg, skb->sk);
2372 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2374 chunk = state->recv_actor(skb, skip, chunk, state);
2375 drop_skb = !unix_skb_len(skb);
2376 /* skb is only safe to use if !drop_skb */
2387 /* the skb was touched by a concurrent reader;
2388 * we should not expect anything from this skb
2389 * anymore and assume it invalid - we can be
2390 * sure it was dropped from the socket queue
2392 * let's report a short read
2398 /* Mark read part of skb as used */
2399 if (!(flags & MSG_PEEK)) {
2400 UNIXCB(skb).consumed += chunk;
2402 sk_peek_offset_bwd(sk, chunk);
2405 unix_detach_fds(&scm, skb);
2407 if (unix_skb_len(skb))
2410 skb_unlink(skb, &sk->sk_receive_queue);
2416 /* It is questionable, see note in unix_dgram_recvmsg.
2419 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2421 sk_peek_offset_fwd(sk, chunk);
2428 last_len = skb->len;
2429 unix_state_lock(sk);
2430 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2433 unix_state_unlock(sk);
2438 mutex_unlock(&u->iolock);
2440 scm_recv(sock, state->msg, &scm, flags);
2444 return copied ? : err;
2447 static int unix_stream_read_actor(struct sk_buff *skb,
2448 int skip, int chunk,
2449 struct unix_stream_read_state *state)
2453 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2455 return ret ?: chunk;
2458 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2459 size_t size, int flags)
2461 struct unix_stream_read_state state = {
2462 .recv_actor = unix_stream_read_actor,
2469 return unix_stream_read_generic(&state);
2472 static ssize_t skb_unix_socket_splice(struct sock *sk,
2473 struct pipe_inode_info *pipe,
2474 struct splice_pipe_desc *spd)
2477 struct unix_sock *u = unix_sk(sk);
2479 mutex_unlock(&u->iolock);
2480 ret = splice_to_pipe(pipe, spd);
2481 mutex_lock(&u->iolock);
2486 static int unix_stream_splice_actor(struct sk_buff *skb,
2487 int skip, int chunk,
2488 struct unix_stream_read_state *state)
2490 return skb_splice_bits(skb, state->socket->sk,
2491 UNIXCB(skb).consumed + skip,
2492 state->pipe, chunk, state->splice_flags,
2493 skb_unix_socket_splice);
2496 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2497 struct pipe_inode_info *pipe,
2498 size_t size, unsigned int flags)
2500 struct unix_stream_read_state state = {
2501 .recv_actor = unix_stream_splice_actor,
2505 .splice_flags = flags,
2508 if (unlikely(*ppos))
2511 if (sock->file->f_flags & O_NONBLOCK ||
2512 flags & SPLICE_F_NONBLOCK)
2513 state.flags = MSG_DONTWAIT;
2515 return unix_stream_read_generic(&state);
2518 static int unix_shutdown(struct socket *sock, int mode)
2520 struct sock *sk = sock->sk;
2523 if (mode < SHUT_RD || mode > SHUT_RDWR)
2526 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2527 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2528 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2532 unix_state_lock(sk);
2533 sk->sk_shutdown |= mode;
2534 other = unix_peer(sk);
2537 unix_state_unlock(sk);
2538 sk->sk_state_change(sk);
2541 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2545 if (mode&RCV_SHUTDOWN)
2546 peer_mode |= SEND_SHUTDOWN;
2547 if (mode&SEND_SHUTDOWN)
2548 peer_mode |= RCV_SHUTDOWN;
2549 unix_state_lock(other);
2550 other->sk_shutdown |= peer_mode;
2551 unix_state_unlock(other);
2552 other->sk_state_change(other);
2553 if (peer_mode == SHUTDOWN_MASK)
2554 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2555 else if (peer_mode & RCV_SHUTDOWN)
2556 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2564 long unix_inq_len(struct sock *sk)
2566 struct sk_buff *skb;
2569 if (sk->sk_state == TCP_LISTEN)
2572 spin_lock(&sk->sk_receive_queue.lock);
2573 if (sk->sk_type == SOCK_STREAM ||
2574 sk->sk_type == SOCK_SEQPACKET) {
2575 skb_queue_walk(&sk->sk_receive_queue, skb)
2576 amount += unix_skb_len(skb);
2578 skb = skb_peek(&sk->sk_receive_queue);
2582 spin_unlock(&sk->sk_receive_queue.lock);
2586 EXPORT_SYMBOL_GPL(unix_inq_len);
2588 long unix_outq_len(struct sock *sk)
2590 return sk_wmem_alloc_get(sk);
2592 EXPORT_SYMBOL_GPL(unix_outq_len);
2594 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2596 struct sock *sk = sock->sk;
2602 amount = unix_outq_len(sk);
2603 err = put_user(amount, (int __user *)arg);
2606 amount = unix_inq_len(sk);
2610 err = put_user(amount, (int __user *)arg);
2619 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2621 struct sock *sk = sock->sk;
2624 sock_poll_wait(file, sk_sleep(sk), wait);
2627 /* exceptional events? */
2630 if (sk->sk_shutdown == SHUTDOWN_MASK)
2632 if (sk->sk_shutdown & RCV_SHUTDOWN)
2633 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2636 if (!skb_queue_empty(&sk->sk_receive_queue))
2637 mask |= POLLIN | POLLRDNORM;
2639 /* Connection-based need to check for termination and startup */
2640 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2641 sk->sk_state == TCP_CLOSE)
2645 * we set writable also when the other side has shut down the
2646 * connection. This prevents stuck sockets.
2648 if (unix_writable(sk))
2649 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2654 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2657 struct sock *sk = sock->sk, *other;
2658 unsigned int mask, writable;
2660 sock_poll_wait(file, sk_sleep(sk), wait);
2663 /* exceptional events? */
2664 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2666 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2668 if (sk->sk_shutdown & RCV_SHUTDOWN)
2669 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2670 if (sk->sk_shutdown == SHUTDOWN_MASK)
2674 if (!skb_queue_empty(&sk->sk_receive_queue))
2675 mask |= POLLIN | POLLRDNORM;
2677 /* Connection-based need to check for termination and startup */
2678 if (sk->sk_type == SOCK_SEQPACKET) {
2679 if (sk->sk_state == TCP_CLOSE)
2681 /* connection hasn't started yet? */
2682 if (sk->sk_state == TCP_SYN_SENT)
2686 /* No write status requested, avoid expensive OUT tests. */
2687 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2690 writable = unix_writable(sk);
2692 unix_state_lock(sk);
2694 other = unix_peer(sk);
2695 if (other && unix_peer(other) != sk &&
2696 unix_recvq_full(other) &&
2697 unix_dgram_peer_wake_me(sk, other))
2700 unix_state_unlock(sk);
2704 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2706 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2711 #ifdef CONFIG_PROC_FS
2713 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2715 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2716 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2717 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2719 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2721 unsigned long offset = get_offset(*pos);
2722 unsigned long bucket = get_bucket(*pos);
2724 unsigned long count = 0;
2726 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2727 if (sock_net(sk) != seq_file_net(seq))
2729 if (++count == offset)
2736 static struct sock *unix_next_socket(struct seq_file *seq,
2740 unsigned long bucket;
2742 while (sk > (struct sock *)SEQ_START_TOKEN) {
2746 if (sock_net(sk) == seq_file_net(seq))
2751 sk = unix_from_bucket(seq, pos);
2756 bucket = get_bucket(*pos) + 1;
2757 *pos = set_bucket_offset(bucket, 1);
2758 } while (bucket < ARRAY_SIZE(unix_socket_table));
2763 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2764 __acquires(unix_table_lock)
2766 spin_lock(&unix_table_lock);
2769 return SEQ_START_TOKEN;
2771 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2774 return unix_next_socket(seq, NULL, pos);
2777 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2780 return unix_next_socket(seq, v, pos);
2783 static void unix_seq_stop(struct seq_file *seq, void *v)
2784 __releases(unix_table_lock)
2786 spin_unlock(&unix_table_lock);
2789 static int unix_seq_show(struct seq_file *seq, void *v)
2792 if (v == SEQ_START_TOKEN)
2793 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2797 struct unix_sock *u = unix_sk(s);
2800 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2802 atomic_read(&s->sk_refcnt),
2804 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2807 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2808 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2816 len = u->addr->len - sizeof(short);
2817 if (!UNIX_ABSTRACT(s))
2823 for ( ; i < len; i++)
2824 seq_putc(seq, u->addr->name->sun_path[i]);
2826 unix_state_unlock(s);
2827 seq_putc(seq, '\n');
2833 static const struct seq_operations unix_seq_ops = {
2834 .start = unix_seq_start,
2835 .next = unix_seq_next,
2836 .stop = unix_seq_stop,
2837 .show = unix_seq_show,
2840 static int unix_seq_open(struct inode *inode, struct file *file)
2842 return seq_open_net(inode, file, &unix_seq_ops,
2843 sizeof(struct seq_net_private));
2846 static const struct file_operations unix_seq_fops = {
2847 .owner = THIS_MODULE,
2848 .open = unix_seq_open,
2850 .llseek = seq_lseek,
2851 .release = seq_release_net,
2856 static const struct net_proto_family unix_family_ops = {
2858 .create = unix_create,
2859 .owner = THIS_MODULE,
2863 static int __net_init unix_net_init(struct net *net)
2865 int error = -ENOMEM;
2867 net->unx.sysctl_max_dgram_qlen = 10;
2868 if (unix_sysctl_register(net))
2871 #ifdef CONFIG_PROC_FS
2872 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2873 unix_sysctl_unregister(net);
2882 static void __net_exit unix_net_exit(struct net *net)
2884 unix_sysctl_unregister(net);
2885 remove_proc_entry("unix", net->proc_net);
2888 static struct pernet_operations unix_net_ops = {
2889 .init = unix_net_init,
2890 .exit = unix_net_exit,
2893 static int __init af_unix_init(void)
2897 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2899 rc = proto_register(&unix_proto, 1);
2901 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2905 sock_register(&unix_family_ops);
2906 register_pernet_subsys(&unix_net_ops);
2911 static void __exit af_unix_exit(void)
2913 sock_unregister(PF_UNIX);
2914 proto_unregister(&unix_proto);
2915 unregister_pernet_subsys(&unix_net_ops);
2918 /* Earlier than device_initcall() so that other drivers invoking
2919 request_module() don't end up in a loop when modprobe tries
2920 to use a UNIX socket. But later than subsys_initcall() because
2921 we depend on stuff initialised there */
2922 fs_initcall(af_unix_init);
2923 module_exit(af_unix_exit);
2925 MODULE_LICENSE("GPL");
2926 MODULE_ALIAS_NETPROTO(PF_UNIX);