ipvs: rename functions related to dst_cache reset
[firefly-linux-kernel-4.4.55.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274         __u32 ahash;
275
276 #ifdef CONFIG_IP_VS_IPV6
277         if (af == AF_INET6)
278                 addr_fold = addr->ip6[0]^addr->ip6[1]^
279                             addr->ip6[2]^addr->ip6[3];
280 #endif
281         ahash = ntohl(addr_fold);
282         ahash ^= ((size_t) net >> 8);
283
284         return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
285                IP_VS_SVC_TAB_MASK;
286 }
287
288 /*
289  *      Returns hash value of fwmark for virtual service lookup
290  */
291 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
292 {
293         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
294 }
295
296 /*
297  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
298  *      or in the ip_vs_svc_fwm_table by fwmark.
299  *      Should be called with locked tables.
300  */
301 static int ip_vs_svc_hash(struct ip_vs_service *svc)
302 {
303         unsigned int hash;
304
305         if (svc->flags & IP_VS_SVC_F_HASHED) {
306                 pr_err("%s(): request for already hashed, called from %pF\n",
307                        __func__, __builtin_return_address(0));
308                 return 0;
309         }
310
311         if (svc->fwmark == 0) {
312                 /*
313                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
314                  */
315                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
316                                          &svc->addr, svc->port);
317                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
318         } else {
319                 /*
320                  *  Hash it by fwmark in svc_fwm_table
321                  */
322                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
323                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
324         }
325
326         svc->flags |= IP_VS_SVC_F_HASHED;
327         /* increase its refcnt because it is referenced by the svc table */
328         atomic_inc(&svc->refcnt);
329         return 1;
330 }
331
332
333 /*
334  *      Unhashes a service from svc_table / svc_fwm_table.
335  *      Should be called with locked tables.
336  */
337 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
338 {
339         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
340                 pr_err("%s(): request for unhash flagged, called from %pF\n",
341                        __func__, __builtin_return_address(0));
342                 return 0;
343         }
344
345         if (svc->fwmark == 0) {
346                 /* Remove it from the svc_table table */
347                 list_del(&svc->s_list);
348         } else {
349                 /* Remove it from the svc_fwm_table table */
350                 list_del(&svc->f_list);
351         }
352
353         svc->flags &= ~IP_VS_SVC_F_HASHED;
354         atomic_dec(&svc->refcnt);
355         return 1;
356 }
357
358
359 /*
360  *      Get service by {netns, proto,addr,port} in the service table.
361  */
362 static inline struct ip_vs_service *
363 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
364                      const union nf_inet_addr *vaddr, __be16 vport)
365 {
366         unsigned int hash;
367         struct ip_vs_service *svc;
368
369         /* Check for "full" addressed entries */
370         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
371
372         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
373                 if ((svc->af == af)
374                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
375                     && (svc->port == vport)
376                     && (svc->protocol == protocol)
377                     && net_eq(svc->net, net)) {
378                         /* HIT */
379                         return svc;
380                 }
381         }
382
383         return NULL;
384 }
385
386
387 /*
388  *      Get service by {fwmark} in the service table.
389  */
390 static inline struct ip_vs_service *
391 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
392 {
393         unsigned int hash;
394         struct ip_vs_service *svc;
395
396         /* Check for fwmark addressed entries */
397         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
398
399         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
400                 if (svc->fwmark == fwmark && svc->af == af
401                     && net_eq(svc->net, net)) {
402                         /* HIT */
403                         return svc;
404                 }
405         }
406
407         return NULL;
408 }
409
410 struct ip_vs_service *
411 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
412                   const union nf_inet_addr *vaddr, __be16 vport)
413 {
414         struct ip_vs_service *svc;
415         struct netns_ipvs *ipvs = net_ipvs(net);
416
417         read_lock(&__ip_vs_svc_lock);
418
419         /*
420          *      Check the table hashed by fwmark first
421          */
422         if (fwmark) {
423                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
424                 if (svc)
425                         goto out;
426         }
427
428         /*
429          *      Check the table hashed by <protocol,addr,port>
430          *      for "full" addressed entries
431          */
432         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
433
434         if (svc == NULL
435             && protocol == IPPROTO_TCP
436             && atomic_read(&ipvs->ftpsvc_counter)
437             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
438                 /*
439                  * Check if ftp service entry exists, the packet
440                  * might belong to FTP data connections.
441                  */
442                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
443         }
444
445         if (svc == NULL
446             && atomic_read(&ipvs->nullsvc_counter)) {
447                 /*
448                  * Check if the catch-all port (port zero) exists
449                  */
450                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
451         }
452
453   out:
454         if (svc)
455                 atomic_inc(&svc->usecnt);
456         read_unlock(&__ip_vs_svc_lock);
457
458         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
459                       fwmark, ip_vs_proto_name(protocol),
460                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
461                       svc ? "hit" : "not hit");
462
463         return svc;
464 }
465
466
467 static inline void
468 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
469 {
470         atomic_inc(&svc->refcnt);
471         dest->svc = svc;
472 }
473
474 static void
475 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
476 {
477         struct ip_vs_service *svc = dest->svc;
478
479         dest->svc = NULL;
480         if (atomic_dec_and_test(&svc->refcnt)) {
481                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
482                               svc->fwmark,
483                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
484                               ntohs(svc->port), atomic_read(&svc->usecnt));
485                 free_percpu(svc->stats.cpustats);
486                 kfree(svc);
487         }
488 }
489
490
491 /*
492  *      Returns hash value for real service
493  */
494 static inline unsigned int ip_vs_rs_hashkey(int af,
495                                             const union nf_inet_addr *addr,
496                                             __be16 port)
497 {
498         register unsigned int porth = ntohs(port);
499         __be32 addr_fold = addr->ip;
500
501 #ifdef CONFIG_IP_VS_IPV6
502         if (af == AF_INET6)
503                 addr_fold = addr->ip6[0]^addr->ip6[1]^
504                             addr->ip6[2]^addr->ip6[3];
505 #endif
506
507         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
508                 & IP_VS_RTAB_MASK;
509 }
510
511 /*
512  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
513  *      should be called with locked tables.
514  */
515 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
516 {
517         unsigned int hash;
518
519         if (!list_empty(&dest->d_list)) {
520                 return 0;
521         }
522
523         /*
524          *      Hash by proto,addr,port,
525          *      which are the parameters of the real service.
526          */
527         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
528
529         list_add(&dest->d_list, &ipvs->rs_table[hash]);
530
531         return 1;
532 }
533
534 /*
535  *      UNhashes ip_vs_dest from rs_table.
536  *      should be called with locked tables.
537  */
538 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
539 {
540         /*
541          * Remove it from the rs_table table.
542          */
543         if (!list_empty(&dest->d_list)) {
544                 list_del_init(&dest->d_list);
545         }
546
547         return 1;
548 }
549
550 /*
551  *      Lookup real service by <proto,addr,port> in the real service table.
552  */
553 struct ip_vs_dest *
554 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
555                           const union nf_inet_addr *daddr,
556                           __be16 dport)
557 {
558         struct netns_ipvs *ipvs = net_ipvs(net);
559         unsigned int hash;
560         struct ip_vs_dest *dest;
561
562         /*
563          *      Check for "full" addressed entries
564          *      Return the first found entry
565          */
566         hash = ip_vs_rs_hashkey(af, daddr, dport);
567
568         read_lock(&ipvs->rs_lock);
569         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
570                 if ((dest->af == af)
571                     && ip_vs_addr_equal(af, &dest->addr, daddr)
572                     && (dest->port == dport)
573                     && ((dest->protocol == protocol) ||
574                         dest->vfwmark)) {
575                         /* HIT */
576                         read_unlock(&ipvs->rs_lock);
577                         return dest;
578                 }
579         }
580         read_unlock(&ipvs->rs_lock);
581
582         return NULL;
583 }
584
585 /*
586  *      Lookup destination by {addr,port} in the given service
587  */
588 static struct ip_vs_dest *
589 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
590                   __be16 dport)
591 {
592         struct ip_vs_dest *dest;
593
594         /*
595          * Find the destination for the given service
596          */
597         list_for_each_entry(dest, &svc->destinations, n_list) {
598                 if ((dest->af == svc->af)
599                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
600                     && (dest->port == dport)) {
601                         /* HIT */
602                         return dest;
603                 }
604         }
605
606         return NULL;
607 }
608
609 /*
610  * Find destination by {daddr,dport,vaddr,protocol}
611  * Cretaed to be used in ip_vs_process_message() in
612  * the backup synchronization daemon. It finds the
613  * destination to be bound to the received connection
614  * on the backup.
615  *
616  * ip_vs_lookup_real_service() looked promissing, but
617  * seems not working as expected.
618  */
619 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
620                                    const union nf_inet_addr *daddr,
621                                    __be16 dport,
622                                    const union nf_inet_addr *vaddr,
623                                    __be16 vport, __u16 protocol, __u32 fwmark,
624                                    __u32 flags)
625 {
626         struct ip_vs_dest *dest;
627         struct ip_vs_service *svc;
628         __be16 port = dport;
629
630         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
631         if (!svc)
632                 return NULL;
633         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
634                 port = 0;
635         dest = ip_vs_lookup_dest(svc, daddr, port);
636         if (!dest)
637                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
638         if (dest)
639                 atomic_inc(&dest->refcnt);
640         ip_vs_service_put(svc);
641         return dest;
642 }
643
644 /* Release dst_cache for dest in user context */
645 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest)
646 {
647         struct dst_entry *old_dst;
648
649         old_dst = dest->dst_cache;
650         dest->dst_cache = NULL;
651         dst_release(old_dst);
652         dest->dst_saddr.ip = 0;
653 }
654
655 /*
656  *  Lookup dest by {svc,addr,port} in the destination trash.
657  *  The destination trash is used to hold the destinations that are removed
658  *  from the service table but are still referenced by some conn entries.
659  *  The reason to add the destination trash is when the dest is temporary
660  *  down (either by administrator or by monitor program), the dest can be
661  *  picked back from the trash, the remaining connections to the dest can
662  *  continue, and the counting information of the dest is also useful for
663  *  scheduling.
664  */
665 static struct ip_vs_dest *
666 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
667                      __be16 dport)
668 {
669         struct ip_vs_dest *dest, *nxt;
670         struct netns_ipvs *ipvs = net_ipvs(svc->net);
671
672         /*
673          * Find the destination in trash
674          */
675         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
676                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
677                               "dest->refcnt=%d\n",
678                               dest->vfwmark,
679                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
680                               ntohs(dest->port),
681                               atomic_read(&dest->refcnt));
682                 if (dest->af == svc->af &&
683                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
684                     dest->port == dport &&
685                     dest->vfwmark == svc->fwmark &&
686                     dest->protocol == svc->protocol &&
687                     (svc->fwmark ||
688                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
689                       dest->vport == svc->port))) {
690                         /* HIT */
691                         return dest;
692                 }
693
694                 /*
695                  * Try to purge the destination from trash if not referenced
696                  */
697                 if (atomic_read(&dest->refcnt) == 1) {
698                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
699                                       "from trash\n",
700                                       dest->vfwmark,
701                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
702                                       ntohs(dest->port));
703                         list_del(&dest->n_list);
704                         __ip_vs_dst_cache_reset(dest);
705                         __ip_vs_unbind_svc(dest);
706                         free_percpu(dest->stats.cpustats);
707                         kfree(dest);
708                 }
709         }
710
711         return NULL;
712 }
713
714
715 /*
716  *  Clean up all the destinations in the trash
717  *  Called by the ip_vs_control_cleanup()
718  *
719  *  When the ip_vs_control_clearup is activated by ipvs module exit,
720  *  the service tables must have been flushed and all the connections
721  *  are expired, and the refcnt of each destination in the trash must
722  *  be 1, so we simply release them here.
723  */
724 static void ip_vs_trash_cleanup(struct net *net)
725 {
726         struct ip_vs_dest *dest, *nxt;
727         struct netns_ipvs *ipvs = net_ipvs(net);
728
729         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
730                 list_del(&dest->n_list);
731                 __ip_vs_dst_cache_reset(dest);
732                 __ip_vs_unbind_svc(dest);
733                 free_percpu(dest->stats.cpustats);
734                 kfree(dest);
735         }
736 }
737
738 static void
739 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
740 {
741 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
742
743         spin_lock_bh(&src->lock);
744
745         IP_VS_SHOW_STATS_COUNTER(conns);
746         IP_VS_SHOW_STATS_COUNTER(inpkts);
747         IP_VS_SHOW_STATS_COUNTER(outpkts);
748         IP_VS_SHOW_STATS_COUNTER(inbytes);
749         IP_VS_SHOW_STATS_COUNTER(outbytes);
750
751         ip_vs_read_estimator(dst, src);
752
753         spin_unlock_bh(&src->lock);
754 }
755
756 static void
757 ip_vs_zero_stats(struct ip_vs_stats *stats)
758 {
759         spin_lock_bh(&stats->lock);
760
761         /* get current counters as zero point, rates are zeroed */
762
763 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
764
765         IP_VS_ZERO_STATS_COUNTER(conns);
766         IP_VS_ZERO_STATS_COUNTER(inpkts);
767         IP_VS_ZERO_STATS_COUNTER(outpkts);
768         IP_VS_ZERO_STATS_COUNTER(inbytes);
769         IP_VS_ZERO_STATS_COUNTER(outbytes);
770
771         ip_vs_zero_estimator(stats);
772
773         spin_unlock_bh(&stats->lock);
774 }
775
776 /*
777  *      Update a destination in the given service
778  */
779 static void
780 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
781                     struct ip_vs_dest_user_kern *udest, int add)
782 {
783         struct netns_ipvs *ipvs = net_ipvs(svc->net);
784         int conn_flags;
785
786         /* set the weight and the flags */
787         atomic_set(&dest->weight, udest->weight);
788         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
789         conn_flags |= IP_VS_CONN_F_INACTIVE;
790
791         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
793                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794         } else {
795                 /*
796                  *    Put the real service in rs_table if not present.
797                  *    For now only for NAT!
798                  */
799                 write_lock_bh(&ipvs->rs_lock);
800                 ip_vs_rs_hash(ipvs, dest);
801                 write_unlock_bh(&ipvs->rs_lock);
802         }
803         atomic_set(&dest->conn_flags, conn_flags);
804
805         /* bind the service */
806         if (!dest->svc) {
807                 __ip_vs_bind_svc(dest, svc);
808         } else {
809                 if (dest->svc != svc) {
810                         __ip_vs_unbind_svc(dest);
811                         ip_vs_zero_stats(&dest->stats);
812                         __ip_vs_bind_svc(dest, svc);
813                 }
814         }
815
816         /* set the dest status flags */
817         dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821         dest->u_threshold = udest->u_threshold;
822         dest->l_threshold = udest->l_threshold;
823
824         spin_lock_bh(&dest->dst_lock);
825         __ip_vs_dst_cache_reset(dest);
826         spin_unlock_bh(&dest->dst_lock);
827
828         if (add)
829                 ip_vs_start_estimator(svc->net, &dest->stats);
830
831         write_lock_bh(&__ip_vs_svc_lock);
832
833         /* Wait until all other svc users go away */
834         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
835
836         if (add) {
837                 list_add(&dest->n_list, &svc->destinations);
838                 svc->num_dests++;
839         }
840
841         /* call the update_service, because server weight may be changed */
842         if (svc->scheduler->update_service)
843                 svc->scheduler->update_service(svc);
844
845         write_unlock_bh(&__ip_vs_svc_lock);
846 }
847
848
849 /*
850  *      Create a destination for the given service
851  */
852 static int
853 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
854                struct ip_vs_dest **dest_p)
855 {
856         struct ip_vs_dest *dest;
857         unsigned int atype;
858
859         EnterFunction(2);
860
861 #ifdef CONFIG_IP_VS_IPV6
862         if (svc->af == AF_INET6) {
863                 atype = ipv6_addr_type(&udest->addr.in6);
864                 if ((!(atype & IPV6_ADDR_UNICAST) ||
865                         atype & IPV6_ADDR_LINKLOCAL) &&
866                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
867                         return -EINVAL;
868         } else
869 #endif
870         {
871                 atype = inet_addr_type(svc->net, udest->addr.ip);
872                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
873                         return -EINVAL;
874         }
875
876         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
877         if (dest == NULL)
878                 return -ENOMEM;
879
880         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
881         if (!dest->stats.cpustats)
882                 goto err_alloc;
883
884         dest->af = svc->af;
885         dest->protocol = svc->protocol;
886         dest->vaddr = svc->addr;
887         dest->vport = svc->port;
888         dest->vfwmark = svc->fwmark;
889         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
890         dest->port = udest->port;
891
892         atomic_set(&dest->activeconns, 0);
893         atomic_set(&dest->inactconns, 0);
894         atomic_set(&dest->persistconns, 0);
895         atomic_set(&dest->refcnt, 1);
896
897         INIT_LIST_HEAD(&dest->d_list);
898         spin_lock_init(&dest->dst_lock);
899         spin_lock_init(&dest->stats.lock);
900         __ip_vs_update_dest(svc, dest, udest, 1);
901
902         *dest_p = dest;
903
904         LeaveFunction(2);
905         return 0;
906
907 err_alloc:
908         kfree(dest);
909         return -ENOMEM;
910 }
911
912
913 /*
914  *      Add a destination into an existing service
915  */
916 static int
917 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
918 {
919         struct ip_vs_dest *dest;
920         union nf_inet_addr daddr;
921         __be16 dport = udest->port;
922         int ret;
923
924         EnterFunction(2);
925
926         if (udest->weight < 0) {
927                 pr_err("%s(): server weight less than zero\n", __func__);
928                 return -ERANGE;
929         }
930
931         if (udest->l_threshold > udest->u_threshold) {
932                 pr_err("%s(): lower threshold is higher than upper threshold\n",
933                         __func__);
934                 return -ERANGE;
935         }
936
937         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
938
939         /*
940          * Check if the dest already exists in the list
941          */
942         dest = ip_vs_lookup_dest(svc, &daddr, dport);
943
944         if (dest != NULL) {
945                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
946                 return -EEXIST;
947         }
948
949         /*
950          * Check if the dest already exists in the trash and
951          * is from the same service
952          */
953         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
954
955         if (dest != NULL) {
956                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
957                               "dest->refcnt=%d, service %u/%s:%u\n",
958                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
959                               atomic_read(&dest->refcnt),
960                               dest->vfwmark,
961                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
962                               ntohs(dest->vport));
963
964                 /*
965                  * Get the destination from the trash
966                  */
967                 list_del(&dest->n_list);
968
969                 __ip_vs_update_dest(svc, dest, udest, 1);
970                 ret = 0;
971         } else {
972                 /*
973                  * Allocate and initialize the dest structure
974                  */
975                 ret = ip_vs_new_dest(svc, udest, &dest);
976         }
977         LeaveFunction(2);
978
979         return ret;
980 }
981
982
983 /*
984  *      Edit a destination in the given service
985  */
986 static int
987 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
988 {
989         struct ip_vs_dest *dest;
990         union nf_inet_addr daddr;
991         __be16 dport = udest->port;
992
993         EnterFunction(2);
994
995         if (udest->weight < 0) {
996                 pr_err("%s(): server weight less than zero\n", __func__);
997                 return -ERANGE;
998         }
999
1000         if (udest->l_threshold > udest->u_threshold) {
1001                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1002                         __func__);
1003                 return -ERANGE;
1004         }
1005
1006         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1007
1008         /*
1009          *  Lookup the destination list
1010          */
1011         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1012
1013         if (dest == NULL) {
1014                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1015                 return -ENOENT;
1016         }
1017
1018         __ip_vs_update_dest(svc, dest, udest, 0);
1019         LeaveFunction(2);
1020
1021         return 0;
1022 }
1023
1024
1025 /*
1026  *      Delete a destination (must be already unlinked from the service)
1027  */
1028 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1029 {
1030         struct netns_ipvs *ipvs = net_ipvs(net);
1031
1032         ip_vs_stop_estimator(net, &dest->stats);
1033
1034         /*
1035          *  Remove it from the d-linked list with the real services.
1036          */
1037         write_lock_bh(&ipvs->rs_lock);
1038         ip_vs_rs_unhash(dest);
1039         write_unlock_bh(&ipvs->rs_lock);
1040
1041         /*
1042          *  Decrease the refcnt of the dest, and free the dest
1043          *  if nobody refers to it (refcnt=0). Otherwise, throw
1044          *  the destination into the trash.
1045          */
1046         if (atomic_dec_and_test(&dest->refcnt)) {
1047                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1048                               dest->vfwmark,
1049                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1050                               ntohs(dest->port));
1051                 __ip_vs_dst_cache_reset(dest);
1052                 /* simply decrease svc->refcnt here, let the caller check
1053                    and release the service if nobody refers to it.
1054                    Only user context can release destination and service,
1055                    and only one user context can update virtual service at a
1056                    time, so the operation here is OK */
1057                 atomic_dec(&dest->svc->refcnt);
1058                 free_percpu(dest->stats.cpustats);
1059                 kfree(dest);
1060         } else {
1061                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1062                               "dest->refcnt=%d\n",
1063                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1064                               ntohs(dest->port),
1065                               atomic_read(&dest->refcnt));
1066                 list_add(&dest->n_list, &ipvs->dest_trash);
1067                 atomic_inc(&dest->refcnt);
1068         }
1069 }
1070
1071
1072 /*
1073  *      Unlink a destination from the given service
1074  */
1075 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1076                                 struct ip_vs_dest *dest,
1077                                 int svcupd)
1078 {
1079         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1080
1081         /*
1082          *  Remove it from the d-linked destination list.
1083          */
1084         list_del(&dest->n_list);
1085         svc->num_dests--;
1086
1087         /*
1088          *  Call the update_service function of its scheduler
1089          */
1090         if (svcupd && svc->scheduler->update_service)
1091                         svc->scheduler->update_service(svc);
1092 }
1093
1094
1095 /*
1096  *      Delete a destination server in the given service
1097  */
1098 static int
1099 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1100 {
1101         struct ip_vs_dest *dest;
1102         __be16 dport = udest->port;
1103
1104         EnterFunction(2);
1105
1106         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1107
1108         if (dest == NULL) {
1109                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1110                 return -ENOENT;
1111         }
1112
1113         write_lock_bh(&__ip_vs_svc_lock);
1114
1115         /*
1116          *      Wait until all other svc users go away.
1117          */
1118         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1119
1120         /*
1121          *      Unlink dest from the service
1122          */
1123         __ip_vs_unlink_dest(svc, dest, 1);
1124
1125         write_unlock_bh(&__ip_vs_svc_lock);
1126
1127         /*
1128          *      Delete the destination
1129          */
1130         __ip_vs_del_dest(svc->net, dest);
1131
1132         LeaveFunction(2);
1133
1134         return 0;
1135 }
1136
1137
1138 /*
1139  *      Add a service into the service hash table
1140  */
1141 static int
1142 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1143                   struct ip_vs_service **svc_p)
1144 {
1145         int ret = 0;
1146         struct ip_vs_scheduler *sched = NULL;
1147         struct ip_vs_pe *pe = NULL;
1148         struct ip_vs_service *svc = NULL;
1149         struct netns_ipvs *ipvs = net_ipvs(net);
1150
1151         /* increase the module use count */
1152         ip_vs_use_count_inc();
1153
1154         /* Lookup the scheduler by 'u->sched_name' */
1155         sched = ip_vs_scheduler_get(u->sched_name);
1156         if (sched == NULL) {
1157                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1158                 ret = -ENOENT;
1159                 goto out_err;
1160         }
1161
1162         if (u->pe_name && *u->pe_name) {
1163                 pe = ip_vs_pe_getbyname(u->pe_name);
1164                 if (pe == NULL) {
1165                         pr_info("persistence engine module ip_vs_pe_%s "
1166                                 "not found\n", u->pe_name);
1167                         ret = -ENOENT;
1168                         goto out_err;
1169                 }
1170         }
1171
1172 #ifdef CONFIG_IP_VS_IPV6
1173         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1174                 ret = -EINVAL;
1175                 goto out_err;
1176         }
1177 #endif
1178
1179         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1180         if (svc == NULL) {
1181                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1182                 ret = -ENOMEM;
1183                 goto out_err;
1184         }
1185         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1186         if (!svc->stats.cpustats) {
1187                 ret = -ENOMEM;
1188                 goto out_err;
1189         }
1190
1191         /* I'm the first user of the service */
1192         atomic_set(&svc->usecnt, 0);
1193         atomic_set(&svc->refcnt, 0);
1194
1195         svc->af = u->af;
1196         svc->protocol = u->protocol;
1197         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1198         svc->port = u->port;
1199         svc->fwmark = u->fwmark;
1200         svc->flags = u->flags;
1201         svc->timeout = u->timeout * HZ;
1202         svc->netmask = u->netmask;
1203         svc->net = net;
1204
1205         INIT_LIST_HEAD(&svc->destinations);
1206         rwlock_init(&svc->sched_lock);
1207         spin_lock_init(&svc->stats.lock);
1208
1209         /* Bind the scheduler */
1210         ret = ip_vs_bind_scheduler(svc, sched);
1211         if (ret)
1212                 goto out_err;
1213         sched = NULL;
1214
1215         /* Bind the ct retriever */
1216         ip_vs_bind_pe(svc, pe);
1217         pe = NULL;
1218
1219         /* Update the virtual service counters */
1220         if (svc->port == FTPPORT)
1221                 atomic_inc(&ipvs->ftpsvc_counter);
1222         else if (svc->port == 0)
1223                 atomic_inc(&ipvs->nullsvc_counter);
1224
1225         ip_vs_start_estimator(net, &svc->stats);
1226
1227         /* Count only IPv4 services for old get/setsockopt interface */
1228         if (svc->af == AF_INET)
1229                 ipvs->num_services++;
1230
1231         /* Hash the service into the service table */
1232         write_lock_bh(&__ip_vs_svc_lock);
1233         ip_vs_svc_hash(svc);
1234         write_unlock_bh(&__ip_vs_svc_lock);
1235
1236         *svc_p = svc;
1237         /* Now there is a service - full throttle */
1238         ipvs->enable = 1;
1239         return 0;
1240
1241
1242  out_err:
1243         if (svc != NULL) {
1244                 ip_vs_unbind_scheduler(svc);
1245                 if (svc->inc) {
1246                         local_bh_disable();
1247                         ip_vs_app_inc_put(svc->inc);
1248                         local_bh_enable();
1249                 }
1250                 if (svc->stats.cpustats)
1251                         free_percpu(svc->stats.cpustats);
1252                 kfree(svc);
1253         }
1254         ip_vs_scheduler_put(sched);
1255         ip_vs_pe_put(pe);
1256
1257         /* decrease the module use count */
1258         ip_vs_use_count_dec();
1259
1260         return ret;
1261 }
1262
1263
1264 /*
1265  *      Edit a service and bind it with a new scheduler
1266  */
1267 static int
1268 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1269 {
1270         struct ip_vs_scheduler *sched, *old_sched;
1271         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1272         int ret = 0;
1273
1274         /*
1275          * Lookup the scheduler, by 'u->sched_name'
1276          */
1277         sched = ip_vs_scheduler_get(u->sched_name);
1278         if (sched == NULL) {
1279                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1280                 return -ENOENT;
1281         }
1282         old_sched = sched;
1283
1284         if (u->pe_name && *u->pe_name) {
1285                 pe = ip_vs_pe_getbyname(u->pe_name);
1286                 if (pe == NULL) {
1287                         pr_info("persistence engine module ip_vs_pe_%s "
1288                                 "not found\n", u->pe_name);
1289                         ret = -ENOENT;
1290                         goto out;
1291                 }
1292                 old_pe = pe;
1293         }
1294
1295 #ifdef CONFIG_IP_VS_IPV6
1296         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1297                 ret = -EINVAL;
1298                 goto out;
1299         }
1300 #endif
1301
1302         write_lock_bh(&__ip_vs_svc_lock);
1303
1304         /*
1305          * Wait until all other svc users go away.
1306          */
1307         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1308
1309         /*
1310          * Set the flags and timeout value
1311          */
1312         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1313         svc->timeout = u->timeout * HZ;
1314         svc->netmask = u->netmask;
1315
1316         old_sched = svc->scheduler;
1317         if (sched != old_sched) {
1318                 /*
1319                  * Unbind the old scheduler
1320                  */
1321                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1322                         old_sched = sched;
1323                         goto out_unlock;
1324                 }
1325
1326                 /*
1327                  * Bind the new scheduler
1328                  */
1329                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1330                         /*
1331                          * If ip_vs_bind_scheduler fails, restore the old
1332                          * scheduler.
1333                          * The main reason of failure is out of memory.
1334                          *
1335                          * The question is if the old scheduler can be
1336                          * restored all the time. TODO: if it cannot be
1337                          * restored some time, we must delete the service,
1338                          * otherwise the system may crash.
1339                          */
1340                         ip_vs_bind_scheduler(svc, old_sched);
1341                         old_sched = sched;
1342                         goto out_unlock;
1343                 }
1344         }
1345
1346         old_pe = svc->pe;
1347         if (pe != old_pe) {
1348                 ip_vs_unbind_pe(svc);
1349                 ip_vs_bind_pe(svc, pe);
1350         }
1351
1352 out_unlock:
1353         write_unlock_bh(&__ip_vs_svc_lock);
1354 out:
1355         ip_vs_scheduler_put(old_sched);
1356         ip_vs_pe_put(old_pe);
1357         return ret;
1358 }
1359
1360
1361 /*
1362  *      Delete a service from the service list
1363  *      - The service must be unlinked, unlocked and not referenced!
1364  *      - We are called under _bh lock
1365  */
1366 static void __ip_vs_del_service(struct ip_vs_service *svc)
1367 {
1368         struct ip_vs_dest *dest, *nxt;
1369         struct ip_vs_scheduler *old_sched;
1370         struct ip_vs_pe *old_pe;
1371         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1372
1373         pr_info("%s: enter\n", __func__);
1374
1375         /* Count only IPv4 services for old get/setsockopt interface */
1376         if (svc->af == AF_INET)
1377                 ipvs->num_services--;
1378
1379         ip_vs_stop_estimator(svc->net, &svc->stats);
1380
1381         /* Unbind scheduler */
1382         old_sched = svc->scheduler;
1383         ip_vs_unbind_scheduler(svc);
1384         ip_vs_scheduler_put(old_sched);
1385
1386         /* Unbind persistence engine */
1387         old_pe = svc->pe;
1388         ip_vs_unbind_pe(svc);
1389         ip_vs_pe_put(old_pe);
1390
1391         /* Unbind app inc */
1392         if (svc->inc) {
1393                 ip_vs_app_inc_put(svc->inc);
1394                 svc->inc = NULL;
1395         }
1396
1397         /*
1398          *    Unlink the whole destination list
1399          */
1400         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1401                 __ip_vs_unlink_dest(svc, dest, 0);
1402                 __ip_vs_del_dest(svc->net, dest);
1403         }
1404
1405         /*
1406          *    Update the virtual service counters
1407          */
1408         if (svc->port == FTPPORT)
1409                 atomic_dec(&ipvs->ftpsvc_counter);
1410         else if (svc->port == 0)
1411                 atomic_dec(&ipvs->nullsvc_counter);
1412
1413         /*
1414          *    Free the service if nobody refers to it
1415          */
1416         if (atomic_read(&svc->refcnt) == 0) {
1417                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1418                               svc->fwmark,
1419                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1420                               ntohs(svc->port), atomic_read(&svc->usecnt));
1421                 free_percpu(svc->stats.cpustats);
1422                 kfree(svc);
1423         }
1424
1425         /* decrease the module use count */
1426         ip_vs_use_count_dec();
1427 }
1428
1429 /*
1430  * Unlink a service from list and try to delete it if its refcnt reached 0
1431  */
1432 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1433 {
1434         /*
1435          * Unhash it from the service table
1436          */
1437         write_lock_bh(&__ip_vs_svc_lock);
1438
1439         ip_vs_svc_unhash(svc);
1440
1441         /*
1442          * Wait until all the svc users go away.
1443          */
1444         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1445
1446         __ip_vs_del_service(svc);
1447
1448         write_unlock_bh(&__ip_vs_svc_lock);
1449 }
1450
1451 /*
1452  *      Delete a service from the service list
1453  */
1454 static int ip_vs_del_service(struct ip_vs_service *svc)
1455 {
1456         if (svc == NULL)
1457                 return -EEXIST;
1458         ip_vs_unlink_service(svc);
1459
1460         return 0;
1461 }
1462
1463
1464 /*
1465  *      Flush all the virtual services
1466  */
1467 static int ip_vs_flush(struct net *net)
1468 {
1469         int idx;
1470         struct ip_vs_service *svc, *nxt;
1471
1472         /*
1473          * Flush the service table hashed by <netns,protocol,addr,port>
1474          */
1475         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1476                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1477                                          s_list) {
1478                         if (net_eq(svc->net, net))
1479                                 ip_vs_unlink_service(svc);
1480                 }
1481         }
1482
1483         /*
1484          * Flush the service table hashed by fwmark
1485          */
1486         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487                 list_for_each_entry_safe(svc, nxt,
1488                                          &ip_vs_svc_fwm_table[idx], f_list) {
1489                         if (net_eq(svc->net, net))
1490                                 ip_vs_unlink_service(svc);
1491                 }
1492         }
1493
1494         return 0;
1495 }
1496
1497 /*
1498  *      Delete service by {netns} in the service table.
1499  *      Called by __ip_vs_cleanup()
1500  */
1501 void ip_vs_service_net_cleanup(struct net *net)
1502 {
1503         EnterFunction(2);
1504         /* Check for "full" addressed entries */
1505         mutex_lock(&__ip_vs_mutex);
1506         ip_vs_flush(net);
1507         mutex_unlock(&__ip_vs_mutex);
1508         LeaveFunction(2);
1509 }
1510
1511 /* Put all references for device (dst_cache) */
1512 static inline void
1513 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)
1514 {
1515         spin_lock_bh(&dest->dst_lock);
1516         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1517                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1518                               dev->name,
1519                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1520                               ntohs(dest->port),
1521                               atomic_read(&dest->refcnt));
1522                 __ip_vs_dst_cache_reset(dest);
1523         }
1524         spin_unlock_bh(&dest->dst_lock);
1525
1526 }
1527 /* Netdev event receiver
1528  * Currently only NETDEV_DOWN is handled to release refs to cached dsts
1529  */
1530 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1531                             void *ptr)
1532 {
1533         struct net_device *dev = ptr;
1534         struct net *net = dev_net(dev);
1535         struct netns_ipvs *ipvs = net_ipvs(net);
1536         struct ip_vs_service *svc;
1537         struct ip_vs_dest *dest;
1538         unsigned int idx;
1539
1540         if (event != NETDEV_DOWN || !ipvs)
1541                 return NOTIFY_DONE;
1542         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1543         EnterFunction(2);
1544         mutex_lock(&__ip_vs_mutex);
1545         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1546                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1547                         if (net_eq(svc->net, net)) {
1548                                 list_for_each_entry(dest, &svc->destinations,
1549                                                     n_list) {
1550                                         ip_vs_forget_dev(dest, dev);
1551                                 }
1552                         }
1553                 }
1554
1555                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1556                         if (net_eq(svc->net, net)) {
1557                                 list_for_each_entry(dest, &svc->destinations,
1558                                                     n_list) {
1559                                         ip_vs_forget_dev(dest, dev);
1560                                 }
1561                         }
1562
1563                 }
1564         }
1565
1566         list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1567                 ip_vs_forget_dev(dest, dev);
1568         }
1569         mutex_unlock(&__ip_vs_mutex);
1570         LeaveFunction(2);
1571         return NOTIFY_DONE;
1572 }
1573
1574 /*
1575  *      Zero counters in a service or all services
1576  */
1577 static int ip_vs_zero_service(struct ip_vs_service *svc)
1578 {
1579         struct ip_vs_dest *dest;
1580
1581         write_lock_bh(&__ip_vs_svc_lock);
1582         list_for_each_entry(dest, &svc->destinations, n_list) {
1583                 ip_vs_zero_stats(&dest->stats);
1584         }
1585         ip_vs_zero_stats(&svc->stats);
1586         write_unlock_bh(&__ip_vs_svc_lock);
1587         return 0;
1588 }
1589
1590 static int ip_vs_zero_all(struct net *net)
1591 {
1592         int idx;
1593         struct ip_vs_service *svc;
1594
1595         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1596                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1597                         if (net_eq(svc->net, net))
1598                                 ip_vs_zero_service(svc);
1599                 }
1600         }
1601
1602         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1603                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1604                         if (net_eq(svc->net, net))
1605                                 ip_vs_zero_service(svc);
1606                 }
1607         }
1608
1609         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1610         return 0;
1611 }
1612
1613 #ifdef CONFIG_SYSCTL
1614
1615 static int zero;
1616 static int three = 3;
1617
1618 static int
1619 proc_do_defense_mode(ctl_table *table, int write,
1620                      void __user *buffer, size_t *lenp, loff_t *ppos)
1621 {
1622         struct net *net = current->nsproxy->net_ns;
1623         int *valp = table->data;
1624         int val = *valp;
1625         int rc;
1626
1627         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1628         if (write && (*valp != val)) {
1629                 if ((*valp < 0) || (*valp > 3)) {
1630                         /* Restore the correct value */
1631                         *valp = val;
1632                 } else {
1633                         update_defense_level(net_ipvs(net));
1634                 }
1635         }
1636         return rc;
1637 }
1638
1639 static int
1640 proc_do_sync_threshold(ctl_table *table, int write,
1641                        void __user *buffer, size_t *lenp, loff_t *ppos)
1642 {
1643         int *valp = table->data;
1644         int val[2];
1645         int rc;
1646
1647         /* backup the value first */
1648         memcpy(val, valp, sizeof(val));
1649
1650         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1651         if (write && (valp[0] < 0 || valp[1] < 0 ||
1652             (valp[0] >= valp[1] && valp[1]))) {
1653                 /* Restore the correct value */
1654                 memcpy(valp, val, sizeof(val));
1655         }
1656         return rc;
1657 }
1658
1659 static int
1660 proc_do_sync_mode(ctl_table *table, int write,
1661                      void __user *buffer, size_t *lenp, loff_t *ppos)
1662 {
1663         int *valp = table->data;
1664         int val = *valp;
1665         int rc;
1666
1667         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1668         if (write && (*valp != val)) {
1669                 if ((*valp < 0) || (*valp > 1)) {
1670                         /* Restore the correct value */
1671                         *valp = val;
1672                 }
1673         }
1674         return rc;
1675 }
1676
1677 static int
1678 proc_do_sync_ports(ctl_table *table, int write,
1679                    void __user *buffer, size_t *lenp, loff_t *ppos)
1680 {
1681         int *valp = table->data;
1682         int val = *valp;
1683         int rc;
1684
1685         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1686         if (write && (*valp != val)) {
1687                 if (*valp < 1 || !is_power_of_2(*valp)) {
1688                         /* Restore the correct value */
1689                         *valp = val;
1690                 }
1691         }
1692         return rc;
1693 }
1694
1695 /*
1696  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1697  *      Do not change order or insert new entries without
1698  *      align with netns init in ip_vs_control_net_init()
1699  */
1700
1701 static struct ctl_table vs_vars[] = {
1702         {
1703                 .procname       = "amemthresh",
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = proc_dointvec,
1707         },
1708         {
1709                 .procname       = "am_droprate",
1710                 .maxlen         = sizeof(int),
1711                 .mode           = 0644,
1712                 .proc_handler   = proc_dointvec,
1713         },
1714         {
1715                 .procname       = "drop_entry",
1716                 .maxlen         = sizeof(int),
1717                 .mode           = 0644,
1718                 .proc_handler   = proc_do_defense_mode,
1719         },
1720         {
1721                 .procname       = "drop_packet",
1722                 .maxlen         = sizeof(int),
1723                 .mode           = 0644,
1724                 .proc_handler   = proc_do_defense_mode,
1725         },
1726 #ifdef CONFIG_IP_VS_NFCT
1727         {
1728                 .procname       = "conntrack",
1729                 .maxlen         = sizeof(int),
1730                 .mode           = 0644,
1731                 .proc_handler   = &proc_dointvec,
1732         },
1733 #endif
1734         {
1735                 .procname       = "secure_tcp",
1736                 .maxlen         = sizeof(int),
1737                 .mode           = 0644,
1738                 .proc_handler   = proc_do_defense_mode,
1739         },
1740         {
1741                 .procname       = "snat_reroute",
1742                 .maxlen         = sizeof(int),
1743                 .mode           = 0644,
1744                 .proc_handler   = &proc_dointvec,
1745         },
1746         {
1747                 .procname       = "sync_version",
1748                 .maxlen         = sizeof(int),
1749                 .mode           = 0644,
1750                 .proc_handler   = &proc_do_sync_mode,
1751         },
1752         {
1753                 .procname       = "sync_ports",
1754                 .maxlen         = sizeof(int),
1755                 .mode           = 0644,
1756                 .proc_handler   = &proc_do_sync_ports,
1757         },
1758         {
1759                 .procname       = "sync_qlen_max",
1760                 .maxlen         = sizeof(int),
1761                 .mode           = 0644,
1762                 .proc_handler   = proc_dointvec,
1763         },
1764         {
1765                 .procname       = "sync_sock_size",
1766                 .maxlen         = sizeof(int),
1767                 .mode           = 0644,
1768                 .proc_handler   = proc_dointvec,
1769         },
1770         {
1771                 .procname       = "cache_bypass",
1772                 .maxlen         = sizeof(int),
1773                 .mode           = 0644,
1774                 .proc_handler   = proc_dointvec,
1775         },
1776         {
1777                 .procname       = "expire_nodest_conn",
1778                 .maxlen         = sizeof(int),
1779                 .mode           = 0644,
1780                 .proc_handler   = proc_dointvec,
1781         },
1782         {
1783                 .procname       = "expire_quiescent_template",
1784                 .maxlen         = sizeof(int),
1785                 .mode           = 0644,
1786                 .proc_handler   = proc_dointvec,
1787         },
1788         {
1789                 .procname       = "sync_threshold",
1790                 .maxlen         =
1791                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1792                 .mode           = 0644,
1793                 .proc_handler   = proc_do_sync_threshold,
1794         },
1795         {
1796                 .procname       = "sync_refresh_period",
1797                 .maxlen         = sizeof(int),
1798                 .mode           = 0644,
1799                 .proc_handler   = proc_dointvec_jiffies,
1800         },
1801         {
1802                 .procname       = "sync_retries",
1803                 .maxlen         = sizeof(int),
1804                 .mode           = 0644,
1805                 .proc_handler   = proc_dointvec_minmax,
1806                 .extra1         = &zero,
1807                 .extra2         = &three,
1808         },
1809         {
1810                 .procname       = "nat_icmp_send",
1811                 .maxlen         = sizeof(int),
1812                 .mode           = 0644,
1813                 .proc_handler   = proc_dointvec,
1814         },
1815         {
1816                 .procname       = "pmtu_disc",
1817                 .maxlen         = sizeof(int),
1818                 .mode           = 0644,
1819                 .proc_handler   = proc_dointvec,
1820         },
1821         {
1822                 .procname       = "backup_only",
1823                 .maxlen         = sizeof(int),
1824                 .mode           = 0644,
1825                 .proc_handler   = proc_dointvec,
1826         },
1827 #ifdef CONFIG_IP_VS_DEBUG
1828         {
1829                 .procname       = "debug_level",
1830                 .data           = &sysctl_ip_vs_debug_level,
1831                 .maxlen         = sizeof(int),
1832                 .mode           = 0644,
1833                 .proc_handler   = proc_dointvec,
1834         },
1835 #endif
1836 #if 0
1837         {
1838                 .procname       = "timeout_established",
1839                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1840                 .maxlen         = sizeof(int),
1841                 .mode           = 0644,
1842                 .proc_handler   = proc_dointvec_jiffies,
1843         },
1844         {
1845                 .procname       = "timeout_synsent",
1846                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1847                 .maxlen         = sizeof(int),
1848                 .mode           = 0644,
1849                 .proc_handler   = proc_dointvec_jiffies,
1850         },
1851         {
1852                 .procname       = "timeout_synrecv",
1853                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1854                 .maxlen         = sizeof(int),
1855                 .mode           = 0644,
1856                 .proc_handler   = proc_dointvec_jiffies,
1857         },
1858         {
1859                 .procname       = "timeout_finwait",
1860                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1861                 .maxlen         = sizeof(int),
1862                 .mode           = 0644,
1863                 .proc_handler   = proc_dointvec_jiffies,
1864         },
1865         {
1866                 .procname       = "timeout_timewait",
1867                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1868                 .maxlen         = sizeof(int),
1869                 .mode           = 0644,
1870                 .proc_handler   = proc_dointvec_jiffies,
1871         },
1872         {
1873                 .procname       = "timeout_close",
1874                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1875                 .maxlen         = sizeof(int),
1876                 .mode           = 0644,
1877                 .proc_handler   = proc_dointvec_jiffies,
1878         },
1879         {
1880                 .procname       = "timeout_closewait",
1881                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1882                 .maxlen         = sizeof(int),
1883                 .mode           = 0644,
1884                 .proc_handler   = proc_dointvec_jiffies,
1885         },
1886         {
1887                 .procname       = "timeout_lastack",
1888                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1889                 .maxlen         = sizeof(int),
1890                 .mode           = 0644,
1891                 .proc_handler   = proc_dointvec_jiffies,
1892         },
1893         {
1894                 .procname       = "timeout_listen",
1895                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1896                 .maxlen         = sizeof(int),
1897                 .mode           = 0644,
1898                 .proc_handler   = proc_dointvec_jiffies,
1899         },
1900         {
1901                 .procname       = "timeout_synack",
1902                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1903                 .maxlen         = sizeof(int),
1904                 .mode           = 0644,
1905                 .proc_handler   = proc_dointvec_jiffies,
1906         },
1907         {
1908                 .procname       = "timeout_udp",
1909                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1910                 .maxlen         = sizeof(int),
1911                 .mode           = 0644,
1912                 .proc_handler   = proc_dointvec_jiffies,
1913         },
1914         {
1915                 .procname       = "timeout_icmp",
1916                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1917                 .maxlen         = sizeof(int),
1918                 .mode           = 0644,
1919                 .proc_handler   = proc_dointvec_jiffies,
1920         },
1921 #endif
1922         { }
1923 };
1924
1925 #endif
1926
1927 #ifdef CONFIG_PROC_FS
1928
1929 struct ip_vs_iter {
1930         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1931         struct list_head *table;
1932         int bucket;
1933 };
1934
1935 /*
1936  *      Write the contents of the VS rule table to a PROCfs file.
1937  *      (It is kept just for backward compatibility)
1938  */
1939 static inline const char *ip_vs_fwd_name(unsigned int flags)
1940 {
1941         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1942         case IP_VS_CONN_F_LOCALNODE:
1943                 return "Local";
1944         case IP_VS_CONN_F_TUNNEL:
1945                 return "Tunnel";
1946         case IP_VS_CONN_F_DROUTE:
1947                 return "Route";
1948         default:
1949                 return "Masq";
1950         }
1951 }
1952
1953
1954 /* Get the Nth entry in the two lists */
1955 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1956 {
1957         struct net *net = seq_file_net(seq);
1958         struct ip_vs_iter *iter = seq->private;
1959         int idx;
1960         struct ip_vs_service *svc;
1961
1962         /* look in hash by protocol */
1963         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1964                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1965                         if (net_eq(svc->net, net) && pos-- == 0) {
1966                                 iter->table = ip_vs_svc_table;
1967                                 iter->bucket = idx;
1968                                 return svc;
1969                         }
1970                 }
1971         }
1972
1973         /* keep looking in fwmark */
1974         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1975                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1976                         if (net_eq(svc->net, net) && pos-- == 0) {
1977                                 iter->table = ip_vs_svc_fwm_table;
1978                                 iter->bucket = idx;
1979                                 return svc;
1980                         }
1981                 }
1982         }
1983
1984         return NULL;
1985 }
1986
1987 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1988 __acquires(__ip_vs_svc_lock)
1989 {
1990
1991         read_lock_bh(&__ip_vs_svc_lock);
1992         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1993 }
1994
1995
1996 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1997 {
1998         struct list_head *e;
1999         struct ip_vs_iter *iter;
2000         struct ip_vs_service *svc;
2001
2002         ++*pos;
2003         if (v == SEQ_START_TOKEN)
2004                 return ip_vs_info_array(seq,0);
2005
2006         svc = v;
2007         iter = seq->private;
2008
2009         if (iter->table == ip_vs_svc_table) {
2010                 /* next service in table hashed by protocol */
2011                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
2012                         return list_entry(e, struct ip_vs_service, s_list);
2013
2014
2015                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2016                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2017                                             s_list) {
2018                                 return svc;
2019                         }
2020                 }
2021
2022                 iter->table = ip_vs_svc_fwm_table;
2023                 iter->bucket = -1;
2024                 goto scan_fwmark;
2025         }
2026
2027         /* next service in hashed by fwmark */
2028         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2029                 return list_entry(e, struct ip_vs_service, f_list);
2030
2031  scan_fwmark:
2032         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2033                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2034                                     f_list)
2035                         return svc;
2036         }
2037
2038         return NULL;
2039 }
2040
2041 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2042 __releases(__ip_vs_svc_lock)
2043 {
2044         read_unlock_bh(&__ip_vs_svc_lock);
2045 }
2046
2047
2048 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2049 {
2050         if (v == SEQ_START_TOKEN) {
2051                 seq_printf(seq,
2052                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2053                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2054                 seq_puts(seq,
2055                          "Prot LocalAddress:Port Scheduler Flags\n");
2056                 seq_puts(seq,
2057                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2058         } else {
2059                 const struct ip_vs_service *svc = v;
2060                 const struct ip_vs_iter *iter = seq->private;
2061                 const struct ip_vs_dest *dest;
2062
2063                 if (iter->table == ip_vs_svc_table) {
2064 #ifdef CONFIG_IP_VS_IPV6
2065                         if (svc->af == AF_INET6)
2066                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2067                                            ip_vs_proto_name(svc->protocol),
2068                                            &svc->addr.in6,
2069                                            ntohs(svc->port),
2070                                            svc->scheduler->name);
2071                         else
2072 #endif
2073                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2074                                            ip_vs_proto_name(svc->protocol),
2075                                            ntohl(svc->addr.ip),
2076                                            ntohs(svc->port),
2077                                            svc->scheduler->name,
2078                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2079                 } else {
2080                         seq_printf(seq, "FWM  %08X %s %s",
2081                                    svc->fwmark, svc->scheduler->name,
2082                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2083                 }
2084
2085                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2086                         seq_printf(seq, "persistent %d %08X\n",
2087                                 svc->timeout,
2088                                 ntohl(svc->netmask));
2089                 else
2090                         seq_putc(seq, '\n');
2091
2092                 list_for_each_entry(dest, &svc->destinations, n_list) {
2093 #ifdef CONFIG_IP_VS_IPV6
2094                         if (dest->af == AF_INET6)
2095                                 seq_printf(seq,
2096                                            "  -> [%pI6]:%04X"
2097                                            "      %-7s %-6d %-10d %-10d\n",
2098                                            &dest->addr.in6,
2099                                            ntohs(dest->port),
2100                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2101                                            atomic_read(&dest->weight),
2102                                            atomic_read(&dest->activeconns),
2103                                            atomic_read(&dest->inactconns));
2104                         else
2105 #endif
2106                                 seq_printf(seq,
2107                                            "  -> %08X:%04X      "
2108                                            "%-7s %-6d %-10d %-10d\n",
2109                                            ntohl(dest->addr.ip),
2110                                            ntohs(dest->port),
2111                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2112                                            atomic_read(&dest->weight),
2113                                            atomic_read(&dest->activeconns),
2114                                            atomic_read(&dest->inactconns));
2115
2116                 }
2117         }
2118         return 0;
2119 }
2120
2121 static const struct seq_operations ip_vs_info_seq_ops = {
2122         .start = ip_vs_info_seq_start,
2123         .next  = ip_vs_info_seq_next,
2124         .stop  = ip_vs_info_seq_stop,
2125         .show  = ip_vs_info_seq_show,
2126 };
2127
2128 static int ip_vs_info_open(struct inode *inode, struct file *file)
2129 {
2130         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2131                         sizeof(struct ip_vs_iter));
2132 }
2133
2134 static const struct file_operations ip_vs_info_fops = {
2135         .owner   = THIS_MODULE,
2136         .open    = ip_vs_info_open,
2137         .read    = seq_read,
2138         .llseek  = seq_lseek,
2139         .release = seq_release_net,
2140 };
2141
2142 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2143 {
2144         struct net *net = seq_file_single_net(seq);
2145         struct ip_vs_stats_user show;
2146
2147 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2148         seq_puts(seq,
2149                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2150         seq_printf(seq,
2151                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2152
2153         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2154         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2155                    show.inpkts, show.outpkts,
2156                    (unsigned long long) show.inbytes,
2157                    (unsigned long long) show.outbytes);
2158
2159 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2160         seq_puts(seq,
2161                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2162         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2163                         show.cps, show.inpps, show.outpps,
2164                         show.inbps, show.outbps);
2165
2166         return 0;
2167 }
2168
2169 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2170 {
2171         return single_open_net(inode, file, ip_vs_stats_show);
2172 }
2173
2174 static const struct file_operations ip_vs_stats_fops = {
2175         .owner = THIS_MODULE,
2176         .open = ip_vs_stats_seq_open,
2177         .read = seq_read,
2178         .llseek = seq_lseek,
2179         .release = single_release_net,
2180 };
2181
2182 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2183 {
2184         struct net *net = seq_file_single_net(seq);
2185         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2186         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2187         struct ip_vs_stats_user rates;
2188         int i;
2189
2190 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2191         seq_puts(seq,
2192                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2193         seq_printf(seq,
2194                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2195
2196         for_each_possible_cpu(i) {
2197                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2198                 unsigned int start;
2199                 __u64 inbytes, outbytes;
2200
2201                 do {
2202                         start = u64_stats_fetch_begin_bh(&u->syncp);
2203                         inbytes = u->ustats.inbytes;
2204                         outbytes = u->ustats.outbytes;
2205                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2206
2207                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2208                            i, u->ustats.conns, u->ustats.inpkts,
2209                            u->ustats.outpkts, (__u64)inbytes,
2210                            (__u64)outbytes);
2211         }
2212
2213         spin_lock_bh(&tot_stats->lock);
2214
2215         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2216                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2217                    tot_stats->ustats.outpkts,
2218                    (unsigned long long) tot_stats->ustats.inbytes,
2219                    (unsigned long long) tot_stats->ustats.outbytes);
2220
2221         ip_vs_read_estimator(&rates, tot_stats);
2222
2223         spin_unlock_bh(&tot_stats->lock);
2224
2225 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2226         seq_puts(seq,
2227                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2228         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2229                         rates.cps,
2230                         rates.inpps,
2231                         rates.outpps,
2232                         rates.inbps,
2233                         rates.outbps);
2234
2235         return 0;
2236 }
2237
2238 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2239 {
2240         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2241 }
2242
2243 static const struct file_operations ip_vs_stats_percpu_fops = {
2244         .owner = THIS_MODULE,
2245         .open = ip_vs_stats_percpu_seq_open,
2246         .read = seq_read,
2247         .llseek = seq_lseek,
2248         .release = single_release_net,
2249 };
2250 #endif
2251
2252 /*
2253  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2254  */
2255 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2256 {
2257 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2258         struct ip_vs_proto_data *pd;
2259 #endif
2260
2261         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2262                   u->tcp_timeout,
2263                   u->tcp_fin_timeout,
2264                   u->udp_timeout);
2265
2266 #ifdef CONFIG_IP_VS_PROTO_TCP
2267         if (u->tcp_timeout) {
2268                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2269                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2270                         = u->tcp_timeout * HZ;
2271         }
2272
2273         if (u->tcp_fin_timeout) {
2274                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2275                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2276                         = u->tcp_fin_timeout * HZ;
2277         }
2278 #endif
2279
2280 #ifdef CONFIG_IP_VS_PROTO_UDP
2281         if (u->udp_timeout) {
2282                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2283                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2284                         = u->udp_timeout * HZ;
2285         }
2286 #endif
2287         return 0;
2288 }
2289
2290
2291 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2292 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2293 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2294                                  sizeof(struct ip_vs_dest_user))
2295 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2296 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2297 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2298
2299 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2300         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2301         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2302         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2303         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2304         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2305         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2306         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2307         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2308         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2309         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2310         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2311 };
2312
2313 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2314                                   struct ip_vs_service_user *usvc_compat)
2315 {
2316         memset(usvc, 0, sizeof(*usvc));
2317
2318         usvc->af                = AF_INET;
2319         usvc->protocol          = usvc_compat->protocol;
2320         usvc->addr.ip           = usvc_compat->addr;
2321         usvc->port              = usvc_compat->port;
2322         usvc->fwmark            = usvc_compat->fwmark;
2323
2324         /* Deep copy of sched_name is not needed here */
2325         usvc->sched_name        = usvc_compat->sched_name;
2326
2327         usvc->flags             = usvc_compat->flags;
2328         usvc->timeout           = usvc_compat->timeout;
2329         usvc->netmask           = usvc_compat->netmask;
2330 }
2331
2332 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2333                                    struct ip_vs_dest_user *udest_compat)
2334 {
2335         memset(udest, 0, sizeof(*udest));
2336
2337         udest->addr.ip          = udest_compat->addr;
2338         udest->port             = udest_compat->port;
2339         udest->conn_flags       = udest_compat->conn_flags;
2340         udest->weight           = udest_compat->weight;
2341         udest->u_threshold      = udest_compat->u_threshold;
2342         udest->l_threshold      = udest_compat->l_threshold;
2343 }
2344
2345 static int
2346 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2347 {
2348         struct net *net = sock_net(sk);
2349         int ret;
2350         unsigned char arg[MAX_ARG_LEN];
2351         struct ip_vs_service_user *usvc_compat;
2352         struct ip_vs_service_user_kern usvc;
2353         struct ip_vs_service *svc;
2354         struct ip_vs_dest_user *udest_compat;
2355         struct ip_vs_dest_user_kern udest;
2356         struct netns_ipvs *ipvs = net_ipvs(net);
2357
2358         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2359                 return -EPERM;
2360
2361         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2362                 return -EINVAL;
2363         if (len < 0 || len >  MAX_ARG_LEN)
2364                 return -EINVAL;
2365         if (len != set_arglen[SET_CMDID(cmd)]) {
2366                 pr_err("set_ctl: len %u != %u\n",
2367                        len, set_arglen[SET_CMDID(cmd)]);
2368                 return -EINVAL;
2369         }
2370
2371         if (copy_from_user(arg, user, len) != 0)
2372                 return -EFAULT;
2373
2374         /* increase the module use count */
2375         ip_vs_use_count_inc();
2376
2377         /* Handle daemons since they have another lock */
2378         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2379             cmd == IP_VS_SO_SET_STOPDAEMON) {
2380                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2381
2382                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2383                         ret = -ERESTARTSYS;
2384                         goto out_dec;
2385                 }
2386                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2387                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2388                                                 dm->syncid);
2389                 else
2390                         ret = stop_sync_thread(net, dm->state);
2391                 mutex_unlock(&ipvs->sync_mutex);
2392                 goto out_dec;
2393         }
2394
2395         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2396                 ret = -ERESTARTSYS;
2397                 goto out_dec;
2398         }
2399
2400         if (cmd == IP_VS_SO_SET_FLUSH) {
2401                 /* Flush the virtual service */
2402                 ret = ip_vs_flush(net);
2403                 goto out_unlock;
2404         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2405                 /* Set timeout values for (tcp tcpfin udp) */
2406                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2407                 goto out_unlock;
2408         }
2409
2410         usvc_compat = (struct ip_vs_service_user *)arg;
2411         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2412
2413         /* We only use the new structs internally, so copy userspace compat
2414          * structs to extended internal versions */
2415         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2416         ip_vs_copy_udest_compat(&udest, udest_compat);
2417
2418         if (cmd == IP_VS_SO_SET_ZERO) {
2419                 /* if no service address is set, zero counters in all */
2420                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2421                         ret = ip_vs_zero_all(net);
2422                         goto out_unlock;
2423                 }
2424         }
2425
2426         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2427         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2428             usvc.protocol != IPPROTO_SCTP) {
2429                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2430                        usvc.protocol, &usvc.addr.ip,
2431                        ntohs(usvc.port), usvc.sched_name);
2432                 ret = -EFAULT;
2433                 goto out_unlock;
2434         }
2435
2436         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2437         if (usvc.fwmark == 0)
2438                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2439                                            &usvc.addr, usvc.port);
2440         else
2441                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2442
2443         if (cmd != IP_VS_SO_SET_ADD
2444             && (svc == NULL || svc->protocol != usvc.protocol)) {
2445                 ret = -ESRCH;
2446                 goto out_unlock;
2447         }
2448
2449         switch (cmd) {
2450         case IP_VS_SO_SET_ADD:
2451                 if (svc != NULL)
2452                         ret = -EEXIST;
2453                 else
2454                         ret = ip_vs_add_service(net, &usvc, &svc);
2455                 break;
2456         case IP_VS_SO_SET_EDIT:
2457                 ret = ip_vs_edit_service(svc, &usvc);
2458                 break;
2459         case IP_VS_SO_SET_DEL:
2460                 ret = ip_vs_del_service(svc);
2461                 if (!ret)
2462                         goto out_unlock;
2463                 break;
2464         case IP_VS_SO_SET_ZERO:
2465                 ret = ip_vs_zero_service(svc);
2466                 break;
2467         case IP_VS_SO_SET_ADDDEST:
2468                 ret = ip_vs_add_dest(svc, &udest);
2469                 break;
2470         case IP_VS_SO_SET_EDITDEST:
2471                 ret = ip_vs_edit_dest(svc, &udest);
2472                 break;
2473         case IP_VS_SO_SET_DELDEST:
2474                 ret = ip_vs_del_dest(svc, &udest);
2475                 break;
2476         default:
2477                 ret = -EINVAL;
2478         }
2479
2480   out_unlock:
2481         mutex_unlock(&__ip_vs_mutex);
2482   out_dec:
2483         /* decrease the module use count */
2484         ip_vs_use_count_dec();
2485
2486         return ret;
2487 }
2488
2489
2490 static void
2491 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2492 {
2493         dst->protocol = src->protocol;
2494         dst->addr = src->addr.ip;
2495         dst->port = src->port;
2496         dst->fwmark = src->fwmark;
2497         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2498         dst->flags = src->flags;
2499         dst->timeout = src->timeout / HZ;
2500         dst->netmask = src->netmask;
2501         dst->num_dests = src->num_dests;
2502         ip_vs_copy_stats(&dst->stats, &src->stats);
2503 }
2504
2505 static inline int
2506 __ip_vs_get_service_entries(struct net *net,
2507                             const struct ip_vs_get_services *get,
2508                             struct ip_vs_get_services __user *uptr)
2509 {
2510         int idx, count=0;
2511         struct ip_vs_service *svc;
2512         struct ip_vs_service_entry entry;
2513         int ret = 0;
2514
2515         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2516                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2517                         /* Only expose IPv4 entries to old interface */
2518                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2519                                 continue;
2520
2521                         if (count >= get->num_services)
2522                                 goto out;
2523                         memset(&entry, 0, sizeof(entry));
2524                         ip_vs_copy_service(&entry, svc);
2525                         if (copy_to_user(&uptr->entrytable[count],
2526                                          &entry, sizeof(entry))) {
2527                                 ret = -EFAULT;
2528                                 goto out;
2529                         }
2530                         count++;
2531                 }
2532         }
2533
2534         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2535                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2536                         /* Only expose IPv4 entries to old interface */
2537                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2538                                 continue;
2539
2540                         if (count >= get->num_services)
2541                                 goto out;
2542                         memset(&entry, 0, sizeof(entry));
2543                         ip_vs_copy_service(&entry, svc);
2544                         if (copy_to_user(&uptr->entrytable[count],
2545                                          &entry, sizeof(entry))) {
2546                                 ret = -EFAULT;
2547                                 goto out;
2548                         }
2549                         count++;
2550                 }
2551         }
2552 out:
2553         return ret;
2554 }
2555
2556 static inline int
2557 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2558                          struct ip_vs_get_dests __user *uptr)
2559 {
2560         struct ip_vs_service *svc;
2561         union nf_inet_addr addr = { .ip = get->addr };
2562         int ret = 0;
2563
2564         if (get->fwmark)
2565                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2566         else
2567                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2568                                            get->port);
2569
2570         if (svc) {
2571                 int count = 0;
2572                 struct ip_vs_dest *dest;
2573                 struct ip_vs_dest_entry entry;
2574
2575                 list_for_each_entry(dest, &svc->destinations, n_list) {
2576                         if (count >= get->num_dests)
2577                                 break;
2578
2579                         entry.addr = dest->addr.ip;
2580                         entry.port = dest->port;
2581                         entry.conn_flags = atomic_read(&dest->conn_flags);
2582                         entry.weight = atomic_read(&dest->weight);
2583                         entry.u_threshold = dest->u_threshold;
2584                         entry.l_threshold = dest->l_threshold;
2585                         entry.activeconns = atomic_read(&dest->activeconns);
2586                         entry.inactconns = atomic_read(&dest->inactconns);
2587                         entry.persistconns = atomic_read(&dest->persistconns);
2588                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2589                         if (copy_to_user(&uptr->entrytable[count],
2590                                          &entry, sizeof(entry))) {
2591                                 ret = -EFAULT;
2592                                 break;
2593                         }
2594                         count++;
2595                 }
2596         } else
2597                 ret = -ESRCH;
2598         return ret;
2599 }
2600
2601 static inline void
2602 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2603 {
2604 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2605         struct ip_vs_proto_data *pd;
2606 #endif
2607
2608         memset(u, 0, sizeof (*u));
2609
2610 #ifdef CONFIG_IP_VS_PROTO_TCP
2611         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2612         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2613         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2614 #endif
2615 #ifdef CONFIG_IP_VS_PROTO_UDP
2616         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2617         u->udp_timeout =
2618                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2619 #endif
2620 }
2621
2622
2623 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2624 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2625 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2626 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2627 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2628 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2629 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2630
2631 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2632         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2633         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2634         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2635         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2636         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2637         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2638         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2639 };
2640
2641 static int
2642 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2643 {
2644         unsigned char arg[128];
2645         int ret = 0;
2646         unsigned int copylen;
2647         struct net *net = sock_net(sk);
2648         struct netns_ipvs *ipvs = net_ipvs(net);
2649
2650         BUG_ON(!net);
2651         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2652                 return -EPERM;
2653
2654         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2655                 return -EINVAL;
2656
2657         if (*len < get_arglen[GET_CMDID(cmd)]) {
2658                 pr_err("get_ctl: len %u < %u\n",
2659                        *len, get_arglen[GET_CMDID(cmd)]);
2660                 return -EINVAL;
2661         }
2662
2663         copylen = get_arglen[GET_CMDID(cmd)];
2664         if (copylen > 128)
2665                 return -EINVAL;
2666
2667         if (copy_from_user(arg, user, copylen) != 0)
2668                 return -EFAULT;
2669         /*
2670          * Handle daemons first since it has its own locking
2671          */
2672         if (cmd == IP_VS_SO_GET_DAEMON) {
2673                 struct ip_vs_daemon_user d[2];
2674
2675                 memset(&d, 0, sizeof(d));
2676                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2677                         return -ERESTARTSYS;
2678
2679                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2680                         d[0].state = IP_VS_STATE_MASTER;
2681                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2682                                 sizeof(d[0].mcast_ifn));
2683                         d[0].syncid = ipvs->master_syncid;
2684                 }
2685                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2686                         d[1].state = IP_VS_STATE_BACKUP;
2687                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2688                                 sizeof(d[1].mcast_ifn));
2689                         d[1].syncid = ipvs->backup_syncid;
2690                 }
2691                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2692                         ret = -EFAULT;
2693                 mutex_unlock(&ipvs->sync_mutex);
2694                 return ret;
2695         }
2696
2697         if (mutex_lock_interruptible(&__ip_vs_mutex))
2698                 return -ERESTARTSYS;
2699
2700         switch (cmd) {
2701         case IP_VS_SO_GET_VERSION:
2702         {
2703                 char buf[64];
2704
2705                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2706                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2707                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2708                         ret = -EFAULT;
2709                         goto out;
2710                 }
2711                 *len = strlen(buf)+1;
2712         }
2713         break;
2714
2715         case IP_VS_SO_GET_INFO:
2716         {
2717                 struct ip_vs_getinfo info;
2718                 info.version = IP_VS_VERSION_CODE;
2719                 info.size = ip_vs_conn_tab_size;
2720                 info.num_services = ipvs->num_services;
2721                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2722                         ret = -EFAULT;
2723         }
2724         break;
2725
2726         case IP_VS_SO_GET_SERVICES:
2727         {
2728                 struct ip_vs_get_services *get;
2729                 int size;
2730
2731                 get = (struct ip_vs_get_services *)arg;
2732                 size = sizeof(*get) +
2733                         sizeof(struct ip_vs_service_entry) * get->num_services;
2734                 if (*len != size) {
2735                         pr_err("length: %u != %u\n", *len, size);
2736                         ret = -EINVAL;
2737                         goto out;
2738                 }
2739                 ret = __ip_vs_get_service_entries(net, get, user);
2740         }
2741         break;
2742
2743         case IP_VS_SO_GET_SERVICE:
2744         {
2745                 struct ip_vs_service_entry *entry;
2746                 struct ip_vs_service *svc;
2747                 union nf_inet_addr addr;
2748
2749                 entry = (struct ip_vs_service_entry *)arg;
2750                 addr.ip = entry->addr;
2751                 if (entry->fwmark)
2752                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2753                 else
2754                         svc = __ip_vs_service_find(net, AF_INET,
2755                                                    entry->protocol, &addr,
2756                                                    entry->port);
2757                 if (svc) {
2758                         ip_vs_copy_service(entry, svc);
2759                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2760                                 ret = -EFAULT;
2761                 } else
2762                         ret = -ESRCH;
2763         }
2764         break;
2765
2766         case IP_VS_SO_GET_DESTS:
2767         {
2768                 struct ip_vs_get_dests *get;
2769                 int size;
2770
2771                 get = (struct ip_vs_get_dests *)arg;
2772                 size = sizeof(*get) +
2773                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2774                 if (*len != size) {
2775                         pr_err("length: %u != %u\n", *len, size);
2776                         ret = -EINVAL;
2777                         goto out;
2778                 }
2779                 ret = __ip_vs_get_dest_entries(net, get, user);
2780         }
2781         break;
2782
2783         case IP_VS_SO_GET_TIMEOUT:
2784         {
2785                 struct ip_vs_timeout_user t;
2786
2787                 __ip_vs_get_timeouts(net, &t);
2788                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2789                         ret = -EFAULT;
2790         }
2791         break;
2792
2793         default:
2794                 ret = -EINVAL;
2795         }
2796
2797 out:
2798         mutex_unlock(&__ip_vs_mutex);
2799         return ret;
2800 }
2801
2802
2803 static struct nf_sockopt_ops ip_vs_sockopts = {
2804         .pf             = PF_INET,
2805         .set_optmin     = IP_VS_BASE_CTL,
2806         .set_optmax     = IP_VS_SO_SET_MAX+1,
2807         .set            = do_ip_vs_set_ctl,
2808         .get_optmin     = IP_VS_BASE_CTL,
2809         .get_optmax     = IP_VS_SO_GET_MAX+1,
2810         .get            = do_ip_vs_get_ctl,
2811         .owner          = THIS_MODULE,
2812 };
2813
2814 /*
2815  * Generic Netlink interface
2816  */
2817
2818 /* IPVS genetlink family */
2819 static struct genl_family ip_vs_genl_family = {
2820         .id             = GENL_ID_GENERATE,
2821         .hdrsize        = 0,
2822         .name           = IPVS_GENL_NAME,
2823         .version        = IPVS_GENL_VERSION,
2824         .maxattr        = IPVS_CMD_MAX,
2825         .netnsok        = true,         /* Make ipvsadm to work on netns */
2826 };
2827
2828 /* Policy used for first-level command attributes */
2829 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2830         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2831         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2832         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2833         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2834         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2835         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2836 };
2837
2838 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2839 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2840         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2841         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2842                                             .len = IP_VS_IFNAME_MAXLEN },
2843         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2844 };
2845
2846 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2847 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2848         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2849         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2850         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2851                                             .len = sizeof(union nf_inet_addr) },
2852         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2853         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2854         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2855                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2856         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2857                                             .len = IP_VS_PENAME_MAXLEN },
2858         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2859                                             .len = sizeof(struct ip_vs_flags) },
2860         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2861         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2862         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2863 };
2864
2865 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2866 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2867         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2868                                             .len = sizeof(union nf_inet_addr) },
2869         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2870         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2871         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2872         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2873         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2874         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2875         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2876         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2877         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2878 };
2879
2880 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2881                                  struct ip_vs_stats *stats)
2882 {
2883         struct ip_vs_stats_user ustats;
2884         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2885         if (!nl_stats)
2886                 return -EMSGSIZE;
2887
2888         ip_vs_copy_stats(&ustats, stats);
2889
2890         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2891             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2892             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2893             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2894             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2895             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2896             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2897             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2898             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2899             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2900                 goto nla_put_failure;
2901         nla_nest_end(skb, nl_stats);
2902
2903         return 0;
2904
2905 nla_put_failure:
2906         nla_nest_cancel(skb, nl_stats);
2907         return -EMSGSIZE;
2908 }
2909
2910 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2911                                    struct ip_vs_service *svc)
2912 {
2913         struct nlattr *nl_service;
2914         struct ip_vs_flags flags = { .flags = svc->flags,
2915                                      .mask = ~0 };
2916
2917         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2918         if (!nl_service)
2919                 return -EMSGSIZE;
2920
2921         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2922                 goto nla_put_failure;
2923         if (svc->fwmark) {
2924                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2925                         goto nla_put_failure;
2926         } else {
2927                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2928                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2929                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2930                         goto nla_put_failure;
2931         }
2932
2933         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2934             (svc->pe &&
2935              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2936             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2937             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2938             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2939                 goto nla_put_failure;
2940         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2941                 goto nla_put_failure;
2942
2943         nla_nest_end(skb, nl_service);
2944
2945         return 0;
2946
2947 nla_put_failure:
2948         nla_nest_cancel(skb, nl_service);
2949         return -EMSGSIZE;
2950 }
2951
2952 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2953                                    struct ip_vs_service *svc,
2954                                    struct netlink_callback *cb)
2955 {
2956         void *hdr;
2957
2958         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2959                           &ip_vs_genl_family, NLM_F_MULTI,
2960                           IPVS_CMD_NEW_SERVICE);
2961         if (!hdr)
2962                 return -EMSGSIZE;
2963
2964         if (ip_vs_genl_fill_service(skb, svc) < 0)
2965                 goto nla_put_failure;
2966
2967         return genlmsg_end(skb, hdr);
2968
2969 nla_put_failure:
2970         genlmsg_cancel(skb, hdr);
2971         return -EMSGSIZE;
2972 }
2973
2974 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2975                                     struct netlink_callback *cb)
2976 {
2977         int idx = 0, i;
2978         int start = cb->args[0];
2979         struct ip_vs_service *svc;
2980         struct net *net = skb_sknet(skb);
2981
2982         mutex_lock(&__ip_vs_mutex);
2983         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2984                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2985                         if (++idx <= start || !net_eq(svc->net, net))
2986                                 continue;
2987                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2988                                 idx--;
2989                                 goto nla_put_failure;
2990                         }
2991                 }
2992         }
2993
2994         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2995                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2996                         if (++idx <= start || !net_eq(svc->net, net))
2997                                 continue;
2998                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2999                                 idx--;
3000                                 goto nla_put_failure;
3001                         }
3002                 }
3003         }
3004
3005 nla_put_failure:
3006         mutex_unlock(&__ip_vs_mutex);
3007         cb->args[0] = idx;
3008
3009         return skb->len;
3010 }
3011
3012 static int ip_vs_genl_parse_service(struct net *net,
3013                                     struct ip_vs_service_user_kern *usvc,
3014                                     struct nlattr *nla, int full_entry,
3015                                     struct ip_vs_service **ret_svc)
3016 {
3017         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3018         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3019         struct ip_vs_service *svc;
3020
3021         /* Parse mandatory identifying service fields first */
3022         if (nla == NULL ||
3023             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3024                 return -EINVAL;
3025
3026         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3027         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3028         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3029         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3030         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3031
3032         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3033                 return -EINVAL;
3034
3035         memset(usvc, 0, sizeof(*usvc));
3036
3037         usvc->af = nla_get_u16(nla_af);
3038 #ifdef CONFIG_IP_VS_IPV6
3039         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3040 #else
3041         if (usvc->af != AF_INET)
3042 #endif
3043                 return -EAFNOSUPPORT;
3044
3045         if (nla_fwmark) {
3046                 usvc->protocol = IPPROTO_TCP;
3047                 usvc->fwmark = nla_get_u32(nla_fwmark);
3048         } else {
3049                 usvc->protocol = nla_get_u16(nla_protocol);
3050                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3051                 usvc->port = nla_get_u16(nla_port);
3052                 usvc->fwmark = 0;
3053         }
3054
3055         if (usvc->fwmark)
3056                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3057         else
3058                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3059                                            &usvc->addr, usvc->port);
3060         *ret_svc = svc;
3061
3062         /* If a full entry was requested, check for the additional fields */
3063         if (full_entry) {
3064                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3065                               *nla_netmask;
3066                 struct ip_vs_flags flags;
3067
3068                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3069                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3070                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3071                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3072                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3073
3074                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3075                         return -EINVAL;
3076
3077                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3078
3079                 /* prefill flags from service if it already exists */
3080                 if (svc)
3081                         usvc->flags = svc->flags;
3082
3083                 /* set new flags from userland */
3084                 usvc->flags = (usvc->flags & ~flags.mask) |
3085                               (flags.flags & flags.mask);
3086                 usvc->sched_name = nla_data(nla_sched);
3087                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3088                 usvc->timeout = nla_get_u32(nla_timeout);
3089                 usvc->netmask = nla_get_u32(nla_netmask);
3090         }
3091
3092         return 0;
3093 }
3094
3095 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3096                                                      struct nlattr *nla)
3097 {
3098         struct ip_vs_service_user_kern usvc;
3099         struct ip_vs_service *svc;
3100         int ret;
3101
3102         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3103         return ret ? ERR_PTR(ret) : svc;
3104 }
3105
3106 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3107 {
3108         struct nlattr *nl_dest;
3109
3110         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3111         if (!nl_dest)
3112                 return -EMSGSIZE;
3113
3114         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3115             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3116             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3117                         (atomic_read(&dest->conn_flags) &
3118                          IP_VS_CONN_F_FWD_MASK)) ||
3119             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3120                         atomic_read(&dest->weight)) ||
3121             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3122             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3123             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3124                         atomic_read(&dest->activeconns)) ||
3125             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3126                         atomic_read(&dest->inactconns)) ||
3127             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3128                         atomic_read(&dest->persistconns)))
3129                 goto nla_put_failure;
3130         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3131                 goto nla_put_failure;
3132
3133         nla_nest_end(skb, nl_dest);
3134
3135         return 0;
3136
3137 nla_put_failure:
3138         nla_nest_cancel(skb, nl_dest);
3139         return -EMSGSIZE;
3140 }
3141
3142 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3143                                 struct netlink_callback *cb)
3144 {
3145         void *hdr;
3146
3147         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3148                           &ip_vs_genl_family, NLM_F_MULTI,
3149                           IPVS_CMD_NEW_DEST);
3150         if (!hdr)
3151                 return -EMSGSIZE;
3152
3153         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3154                 goto nla_put_failure;
3155
3156         return genlmsg_end(skb, hdr);
3157
3158 nla_put_failure:
3159         genlmsg_cancel(skb, hdr);
3160         return -EMSGSIZE;
3161 }
3162
3163 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3164                                  struct netlink_callback *cb)
3165 {
3166         int idx = 0;
3167         int start = cb->args[0];
3168         struct ip_vs_service *svc;
3169         struct ip_vs_dest *dest;
3170         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3171         struct net *net = skb_sknet(skb);
3172
3173         mutex_lock(&__ip_vs_mutex);
3174
3175         /* Try to find the service for which to dump destinations */
3176         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3177                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3178                 goto out_err;
3179
3180
3181         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3182         if (IS_ERR(svc) || svc == NULL)
3183                 goto out_err;
3184
3185         /* Dump the destinations */
3186         list_for_each_entry(dest, &svc->destinations, n_list) {
3187                 if (++idx <= start)
3188                         continue;
3189                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3190                         idx--;
3191                         goto nla_put_failure;
3192                 }
3193         }
3194
3195 nla_put_failure:
3196         cb->args[0] = idx;
3197
3198 out_err:
3199         mutex_unlock(&__ip_vs_mutex);
3200
3201         return skb->len;
3202 }
3203
3204 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3205                                  struct nlattr *nla, int full_entry)
3206 {
3207         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3208         struct nlattr *nla_addr, *nla_port;
3209
3210         /* Parse mandatory identifying destination fields first */
3211         if (nla == NULL ||
3212             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3213                 return -EINVAL;
3214
3215         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3216         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3217
3218         if (!(nla_addr && nla_port))
3219                 return -EINVAL;
3220
3221         memset(udest, 0, sizeof(*udest));
3222
3223         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3224         udest->port = nla_get_u16(nla_port);
3225
3226         /* If a full entry was requested, check for the additional fields */
3227         if (full_entry) {
3228                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3229                               *nla_l_thresh;
3230
3231                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3232                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3233                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3234                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3235
3236                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3237                         return -EINVAL;
3238
3239                 udest->conn_flags = nla_get_u32(nla_fwd)
3240                                     & IP_VS_CONN_F_FWD_MASK;
3241                 udest->weight = nla_get_u32(nla_weight);
3242                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3243                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3244         }
3245
3246         return 0;
3247 }
3248
3249 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3250                                   const char *mcast_ifn, __be32 syncid)
3251 {
3252         struct nlattr *nl_daemon;
3253
3254         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3255         if (!nl_daemon)
3256                 return -EMSGSIZE;
3257
3258         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3259             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3260             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3261                 goto nla_put_failure;
3262         nla_nest_end(skb, nl_daemon);
3263
3264         return 0;
3265
3266 nla_put_failure:
3267         nla_nest_cancel(skb, nl_daemon);
3268         return -EMSGSIZE;
3269 }
3270
3271 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3272                                   const char *mcast_ifn, __be32 syncid,
3273                                   struct netlink_callback *cb)
3274 {
3275         void *hdr;
3276         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3277                           &ip_vs_genl_family, NLM_F_MULTI,
3278                           IPVS_CMD_NEW_DAEMON);
3279         if (!hdr)
3280                 return -EMSGSIZE;
3281
3282         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3283                 goto nla_put_failure;
3284
3285         return genlmsg_end(skb, hdr);
3286
3287 nla_put_failure:
3288         genlmsg_cancel(skb, hdr);
3289         return -EMSGSIZE;
3290 }
3291
3292 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3293                                    struct netlink_callback *cb)
3294 {
3295         struct net *net = skb_sknet(skb);
3296         struct netns_ipvs *ipvs = net_ipvs(net);
3297
3298         mutex_lock(&ipvs->sync_mutex);
3299         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3300                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3301                                            ipvs->master_mcast_ifn,
3302                                            ipvs->master_syncid, cb) < 0)
3303                         goto nla_put_failure;
3304
3305                 cb->args[0] = 1;
3306         }
3307
3308         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3309                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3310                                            ipvs->backup_mcast_ifn,
3311                                            ipvs->backup_syncid, cb) < 0)
3312                         goto nla_put_failure;
3313
3314                 cb->args[1] = 1;
3315         }
3316
3317 nla_put_failure:
3318         mutex_unlock(&ipvs->sync_mutex);
3319
3320         return skb->len;
3321 }
3322
3323 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3324 {
3325         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3326               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3327               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3328                 return -EINVAL;
3329
3330         return start_sync_thread(net,
3331                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3332                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3333                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3334 }
3335
3336 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3337 {
3338         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3339                 return -EINVAL;
3340
3341         return stop_sync_thread(net,
3342                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3343 }
3344
3345 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3346 {
3347         struct ip_vs_timeout_user t;
3348
3349         __ip_vs_get_timeouts(net, &t);
3350
3351         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3352                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3353
3354         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3355                 t.tcp_fin_timeout =
3356                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3357
3358         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3359                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3360
3361         return ip_vs_set_timeout(net, &t);
3362 }
3363
3364 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3365 {
3366         int ret = 0, cmd;
3367         struct net *net;
3368         struct netns_ipvs *ipvs;
3369
3370         net = skb_sknet(skb);
3371         ipvs = net_ipvs(net);
3372         cmd = info->genlhdr->cmd;
3373
3374         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3375                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3376
3377                 mutex_lock(&ipvs->sync_mutex);
3378                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3379                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3380                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3381                                      ip_vs_daemon_policy)) {
3382                         ret = -EINVAL;
3383                         goto out;
3384                 }
3385
3386                 if (cmd == IPVS_CMD_NEW_DAEMON)
3387                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3388                 else
3389                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3390 out:
3391                 mutex_unlock(&ipvs->sync_mutex);
3392         }
3393         return ret;
3394 }
3395
3396 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3397 {
3398         struct ip_vs_service *svc = NULL;
3399         struct ip_vs_service_user_kern usvc;
3400         struct ip_vs_dest_user_kern udest;
3401         int ret = 0, cmd;
3402         int need_full_svc = 0, need_full_dest = 0;
3403         struct net *net;
3404
3405         net = skb_sknet(skb);
3406         cmd = info->genlhdr->cmd;
3407
3408         mutex_lock(&__ip_vs_mutex);
3409
3410         if (cmd == IPVS_CMD_FLUSH) {
3411                 ret = ip_vs_flush(net);
3412                 goto out;
3413         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3414                 ret = ip_vs_genl_set_config(net, info->attrs);
3415                 goto out;
3416         } else if (cmd == IPVS_CMD_ZERO &&
3417                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3418                 ret = ip_vs_zero_all(net);
3419                 goto out;
3420         }
3421
3422         /* All following commands require a service argument, so check if we
3423          * received a valid one. We need a full service specification when
3424          * adding / editing a service. Only identifying members otherwise. */
3425         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3426                 need_full_svc = 1;
3427
3428         ret = ip_vs_genl_parse_service(net, &usvc,
3429                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3430                                        need_full_svc, &svc);
3431         if (ret)
3432                 goto out;
3433
3434         /* Unless we're adding a new service, the service must already exist */
3435         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3436                 ret = -ESRCH;
3437                 goto out;
3438         }
3439
3440         /* Destination commands require a valid destination argument. For
3441          * adding / editing a destination, we need a full destination
3442          * specification. */
3443         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3444             cmd == IPVS_CMD_DEL_DEST) {
3445                 if (cmd != IPVS_CMD_DEL_DEST)
3446                         need_full_dest = 1;
3447
3448                 ret = ip_vs_genl_parse_dest(&udest,
3449                                             info->attrs[IPVS_CMD_ATTR_DEST],
3450                                             need_full_dest);
3451                 if (ret)
3452                         goto out;
3453         }
3454
3455         switch (cmd) {
3456         case IPVS_CMD_NEW_SERVICE:
3457                 if (svc == NULL)
3458                         ret = ip_vs_add_service(net, &usvc, &svc);
3459                 else
3460                         ret = -EEXIST;
3461                 break;
3462         case IPVS_CMD_SET_SERVICE:
3463                 ret = ip_vs_edit_service(svc, &usvc);
3464                 break;
3465         case IPVS_CMD_DEL_SERVICE:
3466                 ret = ip_vs_del_service(svc);
3467                 /* do not use svc, it can be freed */
3468                 break;
3469         case IPVS_CMD_NEW_DEST:
3470                 ret = ip_vs_add_dest(svc, &udest);
3471                 break;
3472         case IPVS_CMD_SET_DEST:
3473                 ret = ip_vs_edit_dest(svc, &udest);
3474                 break;
3475         case IPVS_CMD_DEL_DEST:
3476                 ret = ip_vs_del_dest(svc, &udest);
3477                 break;
3478         case IPVS_CMD_ZERO:
3479                 ret = ip_vs_zero_service(svc);
3480                 break;
3481         default:
3482                 ret = -EINVAL;
3483         }
3484
3485 out:
3486         mutex_unlock(&__ip_vs_mutex);
3487
3488         return ret;
3489 }
3490
3491 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3492 {
3493         struct sk_buff *msg;
3494         void *reply;
3495         int ret, cmd, reply_cmd;
3496         struct net *net;
3497
3498         net = skb_sknet(skb);
3499         cmd = info->genlhdr->cmd;
3500
3501         if (cmd == IPVS_CMD_GET_SERVICE)
3502                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3503         else if (cmd == IPVS_CMD_GET_INFO)
3504                 reply_cmd = IPVS_CMD_SET_INFO;
3505         else if (cmd == IPVS_CMD_GET_CONFIG)
3506                 reply_cmd = IPVS_CMD_SET_CONFIG;
3507         else {
3508                 pr_err("unknown Generic Netlink command\n");
3509                 return -EINVAL;
3510         }
3511
3512         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3513         if (!msg)
3514                 return -ENOMEM;
3515
3516         mutex_lock(&__ip_vs_mutex);
3517
3518         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3519         if (reply == NULL)
3520                 goto nla_put_failure;
3521
3522         switch (cmd) {
3523         case IPVS_CMD_GET_SERVICE:
3524         {
3525                 struct ip_vs_service *svc;
3526
3527                 svc = ip_vs_genl_find_service(net,
3528                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3529                 if (IS_ERR(svc)) {
3530                         ret = PTR_ERR(svc);
3531                         goto out_err;
3532                 } else if (svc) {
3533                         ret = ip_vs_genl_fill_service(msg, svc);
3534                         if (ret)
3535                                 goto nla_put_failure;
3536                 } else {
3537                         ret = -ESRCH;
3538                         goto out_err;
3539                 }
3540
3541                 break;
3542         }
3543
3544         case IPVS_CMD_GET_CONFIG:
3545         {
3546                 struct ip_vs_timeout_user t;
3547
3548                 __ip_vs_get_timeouts(net, &t);
3549 #ifdef CONFIG_IP_VS_PROTO_TCP
3550                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3551                                 t.tcp_timeout) ||
3552                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3553                                 t.tcp_fin_timeout))
3554                         goto nla_put_failure;
3555 #endif
3556 #ifdef CONFIG_IP_VS_PROTO_UDP
3557                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3558                         goto nla_put_failure;
3559 #endif
3560
3561                 break;
3562         }
3563
3564         case IPVS_CMD_GET_INFO:
3565                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3566                                 IP_VS_VERSION_CODE) ||
3567                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3568                                 ip_vs_conn_tab_size))
3569                         goto nla_put_failure;
3570                 break;
3571         }
3572
3573         genlmsg_end(msg, reply);
3574         ret = genlmsg_reply(msg, info);
3575         goto out;
3576
3577 nla_put_failure:
3578         pr_err("not enough space in Netlink message\n");
3579         ret = -EMSGSIZE;
3580
3581 out_err:
3582         nlmsg_free(msg);
3583 out:
3584         mutex_unlock(&__ip_vs_mutex);
3585
3586         return ret;
3587 }
3588
3589
3590 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3591         {
3592                 .cmd    = IPVS_CMD_NEW_SERVICE,
3593                 .flags  = GENL_ADMIN_PERM,
3594                 .policy = ip_vs_cmd_policy,
3595                 .doit   = ip_vs_genl_set_cmd,
3596         },
3597         {
3598                 .cmd    = IPVS_CMD_SET_SERVICE,
3599                 .flags  = GENL_ADMIN_PERM,
3600                 .policy = ip_vs_cmd_policy,
3601                 .doit   = ip_vs_genl_set_cmd,
3602         },
3603         {
3604                 .cmd    = IPVS_CMD_DEL_SERVICE,
3605                 .flags  = GENL_ADMIN_PERM,
3606                 .policy = ip_vs_cmd_policy,
3607                 .doit   = ip_vs_genl_set_cmd,
3608         },
3609         {
3610                 .cmd    = IPVS_CMD_GET_SERVICE,
3611                 .flags  = GENL_ADMIN_PERM,
3612                 .doit   = ip_vs_genl_get_cmd,
3613                 .dumpit = ip_vs_genl_dump_services,
3614                 .policy = ip_vs_cmd_policy,
3615         },
3616         {
3617                 .cmd    = IPVS_CMD_NEW_DEST,
3618                 .flags  = GENL_ADMIN_PERM,
3619                 .policy = ip_vs_cmd_policy,
3620                 .doit   = ip_vs_genl_set_cmd,
3621         },
3622         {
3623                 .cmd    = IPVS_CMD_SET_DEST,
3624                 .flags  = GENL_ADMIN_PERM,
3625                 .policy = ip_vs_cmd_policy,
3626                 .doit   = ip_vs_genl_set_cmd,
3627         },
3628         {
3629                 .cmd    = IPVS_CMD_DEL_DEST,
3630                 .flags  = GENL_ADMIN_PERM,
3631                 .policy = ip_vs_cmd_policy,
3632                 .doit   = ip_vs_genl_set_cmd,
3633         },
3634         {
3635                 .cmd    = IPVS_CMD_GET_DEST,
3636                 .flags  = GENL_ADMIN_PERM,
3637                 .policy = ip_vs_cmd_policy,
3638                 .dumpit = ip_vs_genl_dump_dests,
3639         },
3640         {
3641                 .cmd    = IPVS_CMD_NEW_DAEMON,
3642                 .flags  = GENL_ADMIN_PERM,
3643                 .policy = ip_vs_cmd_policy,
3644                 .doit   = ip_vs_genl_set_daemon,
3645         },
3646         {
3647                 .cmd    = IPVS_CMD_DEL_DAEMON,
3648                 .flags  = GENL_ADMIN_PERM,
3649                 .policy = ip_vs_cmd_policy,
3650                 .doit   = ip_vs_genl_set_daemon,
3651         },
3652         {
3653                 .cmd    = IPVS_CMD_GET_DAEMON,
3654                 .flags  = GENL_ADMIN_PERM,
3655                 .dumpit = ip_vs_genl_dump_daemons,
3656         },
3657         {
3658                 .cmd    = IPVS_CMD_SET_CONFIG,
3659                 .flags  = GENL_ADMIN_PERM,
3660                 .policy = ip_vs_cmd_policy,
3661                 .doit   = ip_vs_genl_set_cmd,
3662         },
3663         {
3664                 .cmd    = IPVS_CMD_GET_CONFIG,
3665                 .flags  = GENL_ADMIN_PERM,
3666                 .doit   = ip_vs_genl_get_cmd,
3667         },
3668         {
3669                 .cmd    = IPVS_CMD_GET_INFO,
3670                 .flags  = GENL_ADMIN_PERM,
3671                 .doit   = ip_vs_genl_get_cmd,
3672         },
3673         {
3674                 .cmd    = IPVS_CMD_ZERO,
3675                 .flags  = GENL_ADMIN_PERM,
3676                 .policy = ip_vs_cmd_policy,
3677                 .doit   = ip_vs_genl_set_cmd,
3678         },
3679         {
3680                 .cmd    = IPVS_CMD_FLUSH,
3681                 .flags  = GENL_ADMIN_PERM,
3682                 .doit   = ip_vs_genl_set_cmd,
3683         },
3684 };
3685
3686 static int __init ip_vs_genl_register(void)
3687 {
3688         return genl_register_family_with_ops(&ip_vs_genl_family,
3689                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3690 }
3691
3692 static void ip_vs_genl_unregister(void)
3693 {
3694         genl_unregister_family(&ip_vs_genl_family);
3695 }
3696
3697 /* End of Generic Netlink interface definitions */
3698
3699 /*
3700  * per netns intit/exit func.
3701  */
3702 #ifdef CONFIG_SYSCTL
3703 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3704 {
3705         int idx;
3706         struct netns_ipvs *ipvs = net_ipvs(net);
3707         struct ctl_table *tbl;
3708
3709         atomic_set(&ipvs->dropentry, 0);
3710         spin_lock_init(&ipvs->dropentry_lock);
3711         spin_lock_init(&ipvs->droppacket_lock);
3712         spin_lock_init(&ipvs->securetcp_lock);
3713
3714         if (!net_eq(net, &init_net)) {
3715                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3716                 if (tbl == NULL)
3717                         return -ENOMEM;
3718
3719                 /* Don't export sysctls to unprivileged users */
3720                 if (net->user_ns != &init_user_ns)
3721                         tbl[0].procname = NULL;
3722         } else
3723                 tbl = vs_vars;
3724         /* Initialize sysctl defaults */
3725         idx = 0;
3726         ipvs->sysctl_amemthresh = 1024;
3727         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3728         ipvs->sysctl_am_droprate = 10;
3729         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3730         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3731         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3732 #ifdef CONFIG_IP_VS_NFCT
3733         tbl[idx++].data = &ipvs->sysctl_conntrack;
3734 #endif
3735         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3736         ipvs->sysctl_snat_reroute = 1;
3737         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3738         ipvs->sysctl_sync_ver = 1;
3739         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3740         ipvs->sysctl_sync_ports = 1;
3741         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3742         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3743         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3744         ipvs->sysctl_sync_sock_size = 0;
3745         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3746         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3747         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3748         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3749         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3750         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3751         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3752         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3753         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3754         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3755         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3756         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3757         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3758         ipvs->sysctl_pmtu_disc = 1;
3759         tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3760         tbl[idx++].data = &ipvs->sysctl_backup_only;
3761
3762
3763         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3764         if (ipvs->sysctl_hdr == NULL) {
3765                 if (!net_eq(net, &init_net))
3766                         kfree(tbl);
3767                 return -ENOMEM;
3768         }
3769         ip_vs_start_estimator(net, &ipvs->tot_stats);
3770         ipvs->sysctl_tbl = tbl;
3771         /* Schedule defense work */
3772         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3773         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3774
3775         return 0;
3776 }
3777
3778 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3779 {
3780         struct netns_ipvs *ipvs = net_ipvs(net);
3781
3782         cancel_delayed_work_sync(&ipvs->defense_work);
3783         cancel_work_sync(&ipvs->defense_work.work);
3784         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3785 }
3786
3787 #else
3788
3789 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3790 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3791
3792 #endif
3793
3794 static struct notifier_block ip_vs_dst_notifier = {
3795         .notifier_call = ip_vs_dst_event,
3796 };
3797
3798 int __net_init ip_vs_control_net_init(struct net *net)
3799 {
3800         int idx;
3801         struct netns_ipvs *ipvs = net_ipvs(net);
3802
3803         rwlock_init(&ipvs->rs_lock);
3804
3805         /* Initialize rs_table */
3806         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3807                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3808
3809         INIT_LIST_HEAD(&ipvs->dest_trash);
3810         atomic_set(&ipvs->ftpsvc_counter, 0);
3811         atomic_set(&ipvs->nullsvc_counter, 0);
3812
3813         /* procfs stats */
3814         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3815         if (!ipvs->tot_stats.cpustats)
3816                 return -ENOMEM;
3817
3818         spin_lock_init(&ipvs->tot_stats.lock);
3819
3820         proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
3821         proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
3822         proc_create("ip_vs_stats_percpu", 0, net->proc_net,
3823                     &ip_vs_stats_percpu_fops);
3824
3825         if (ip_vs_control_net_init_sysctl(net))
3826                 goto err;
3827
3828         return 0;
3829
3830 err:
3831         free_percpu(ipvs->tot_stats.cpustats);
3832         return -ENOMEM;
3833 }
3834
3835 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3836 {
3837         struct netns_ipvs *ipvs = net_ipvs(net);
3838
3839         ip_vs_trash_cleanup(net);
3840         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3841         ip_vs_control_net_cleanup_sysctl(net);
3842         remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
3843         remove_proc_entry("ip_vs_stats", net->proc_net);
3844         remove_proc_entry("ip_vs", net->proc_net);
3845         free_percpu(ipvs->tot_stats.cpustats);
3846 }
3847
3848 int __init ip_vs_register_nl_ioctl(void)
3849 {
3850         int ret;
3851
3852         ret = nf_register_sockopt(&ip_vs_sockopts);
3853         if (ret) {
3854                 pr_err("cannot register sockopt.\n");
3855                 goto err_sock;
3856         }
3857
3858         ret = ip_vs_genl_register();
3859         if (ret) {
3860                 pr_err("cannot register Generic Netlink interface.\n");
3861                 goto err_genl;
3862         }
3863         return 0;
3864
3865 err_genl:
3866         nf_unregister_sockopt(&ip_vs_sockopts);
3867 err_sock:
3868         return ret;
3869 }
3870
3871 void ip_vs_unregister_nl_ioctl(void)
3872 {
3873         ip_vs_genl_unregister();
3874         nf_unregister_sockopt(&ip_vs_sockopts);
3875 }
3876
3877 int __init ip_vs_control_init(void)
3878 {
3879         int idx;
3880         int ret;
3881
3882         EnterFunction(2);
3883
3884         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3885         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3886                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3887                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3888         }
3889
3890         smp_wmb();      /* Do we really need it now ? */
3891
3892         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3893         if (ret < 0)
3894                 return ret;
3895
3896         LeaveFunction(2);
3897         return 0;
3898 }
3899
3900
3901 void ip_vs_control_cleanup(void)
3902 {
3903         EnterFunction(2);
3904         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3905         LeaveFunction(2);
3906 }