ipvs: unify the formula to estimate the overhead of processing connections
[firefly-linux-kernel-4.4.55.git] / net / netfilter / ipvs / ip_vs_lblcr.c
1 /*
2  * IPVS:        Locality-Based Least-Connection with Replication scheduler
3  *
4  * Authors:     Wensong Zhang <wensong@gnuchina.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Changes:
12  *     Julian Anastasov        :    Added the missing (dest->weight>0)
13  *                                  condition in the ip_vs_dest_set_max.
14  *
15  */
16
17 /*
18  * The lblc/r algorithm is as follows (pseudo code):
19  *
20  *       if serverSet[dest_ip] is null then
21  *               n, serverSet[dest_ip] <- {weighted least-conn node};
22  *       else
23  *               n <- {least-conn (alive) node in serverSet[dest_ip]};
24  *               if (n is null) OR
25  *                  (n.conns>n.weight AND
26  *                   there is a node m with m.conns<m.weight/2) then
27  *                   n <- {weighted least-conn node};
28  *                   add n to serverSet[dest_ip];
29  *               if |serverSet[dest_ip]| > 1 AND
30  *                   now - serverSet[dest_ip].lastMod > T then
31  *                   m <- {most conn node in serverSet[dest_ip]};
32  *                   remove m from serverSet[dest_ip];
33  *       if serverSet[dest_ip] changed then
34  *               serverSet[dest_ip].lastMod <- now;
35  *
36  *       return n;
37  *
38  */
39
40 #define KMSG_COMPONENT "IPVS"
41 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
42
43 #include <linux/ip.h>
44 #include <linux/module.h>
45 #include <linux/kernel.h>
46 #include <linux/skbuff.h>
47 #include <linux/jiffies.h>
48 #include <linux/list.h>
49 #include <linux/slab.h>
50
51 /* for sysctl */
52 #include <linux/fs.h>
53 #include <linux/sysctl.h>
54 #include <net/net_namespace.h>
55
56 #include <net/ip_vs.h>
57
58
59 /*
60  *    It is for garbage collection of stale IPVS lblcr entries,
61  *    when the table is full.
62  */
63 #define CHECK_EXPIRE_INTERVAL   (60*HZ)
64 #define ENTRY_TIMEOUT           (6*60*HZ)
65
66 /*
67  *    It is for full expiration check.
68  *    When there is no partial expiration check (garbage collection)
69  *    in a half hour, do a full expiration check to collect stale
70  *    entries that haven't been touched for a day.
71  */
72 #define COUNT_FOR_FULL_EXPIRATION   30
73
74 /*
75  *     for IPVS lblcr entry hash table
76  */
77 #ifndef CONFIG_IP_VS_LBLCR_TAB_BITS
78 #define CONFIG_IP_VS_LBLCR_TAB_BITS      10
79 #endif
80 #define IP_VS_LBLCR_TAB_BITS     CONFIG_IP_VS_LBLCR_TAB_BITS
81 #define IP_VS_LBLCR_TAB_SIZE     (1 << IP_VS_LBLCR_TAB_BITS)
82 #define IP_VS_LBLCR_TAB_MASK     (IP_VS_LBLCR_TAB_SIZE - 1)
83
84
85 /*
86  *      IPVS destination set structure and operations
87  */
88 struct ip_vs_dest_set_elem {
89         struct list_head        list;          /* list link */
90         struct ip_vs_dest       *dest;          /* destination server */
91 };
92
93 struct ip_vs_dest_set {
94         atomic_t                size;           /* set size */
95         unsigned long           lastmod;        /* last modified time */
96         struct list_head        list;           /* destination list */
97         rwlock_t                lock;           /* lock for this list */
98 };
99
100
101 static struct ip_vs_dest_set_elem *
102 ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
103 {
104         struct ip_vs_dest_set_elem *e;
105
106         list_for_each_entry(e, &set->list, list) {
107                 if (e->dest == dest)
108                         /* already existed */
109                         return NULL;
110         }
111
112         e = kmalloc(sizeof(*e), GFP_ATOMIC);
113         if (e == NULL) {
114                 pr_err("%s(): no memory\n", __func__);
115                 return NULL;
116         }
117
118         atomic_inc(&dest->refcnt);
119         e->dest = dest;
120
121         list_add(&e->list, &set->list);
122         atomic_inc(&set->size);
123
124         set->lastmod = jiffies;
125         return e;
126 }
127
128 static void
129 ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
130 {
131         struct ip_vs_dest_set_elem *e;
132
133         list_for_each_entry(e, &set->list, list) {
134                 if (e->dest == dest) {
135                         /* HIT */
136                         atomic_dec(&set->size);
137                         set->lastmod = jiffies;
138                         atomic_dec(&e->dest->refcnt);
139                         list_del(&e->list);
140                         kfree(e);
141                         break;
142                 }
143         }
144 }
145
146 static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
147 {
148         struct ip_vs_dest_set_elem *e, *ep;
149
150         write_lock(&set->lock);
151         list_for_each_entry_safe(e, ep, &set->list, list) {
152                 /*
153                  * We don't kfree dest because it is refered either
154                  * by its service or by the trash dest list.
155                  */
156                 atomic_dec(&e->dest->refcnt);
157                 list_del(&e->list);
158                 kfree(e);
159         }
160         write_unlock(&set->lock);
161 }
162
163 /* get weighted least-connection node in the destination set */
164 static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
165 {
166         register struct ip_vs_dest_set_elem *e;
167         struct ip_vs_dest *dest, *least;
168         int loh, doh;
169
170         if (set == NULL)
171                 return NULL;
172
173         /* select the first destination server, whose weight > 0 */
174         list_for_each_entry(e, &set->list, list) {
175                 least = e->dest;
176                 if (least->flags & IP_VS_DEST_F_OVERLOAD)
177                         continue;
178
179                 if ((atomic_read(&least->weight) > 0)
180                     && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
181                         loh = ip_vs_dest_conn_overhead(least);
182                         goto nextstage;
183                 }
184         }
185         return NULL;
186
187         /* find the destination with the weighted least load */
188   nextstage:
189         list_for_each_entry(e, &set->list, list) {
190                 dest = e->dest;
191                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
192                         continue;
193
194                 doh = ip_vs_dest_conn_overhead(dest);
195                 if ((loh * atomic_read(&dest->weight) >
196                      doh * atomic_read(&least->weight))
197                     && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
198                         least = dest;
199                         loh = doh;
200                 }
201         }
202
203         IP_VS_DBG_BUF(6, "%s(): server %s:%d "
204                       "activeconns %d refcnt %d weight %d overhead %d\n",
205                       __func__,
206                       IP_VS_DBG_ADDR(least->af, &least->addr),
207                       ntohs(least->port),
208                       atomic_read(&least->activeconns),
209                       atomic_read(&least->refcnt),
210                       atomic_read(&least->weight), loh);
211         return least;
212 }
213
214
215 /* get weighted most-connection node in the destination set */
216 static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
217 {
218         register struct ip_vs_dest_set_elem *e;
219         struct ip_vs_dest *dest, *most;
220         int moh, doh;
221
222         if (set == NULL)
223                 return NULL;
224
225         /* select the first destination server, whose weight > 0 */
226         list_for_each_entry(e, &set->list, list) {
227                 most = e->dest;
228                 if (atomic_read(&most->weight) > 0) {
229                         moh = ip_vs_dest_conn_overhead(most);
230                         goto nextstage;
231                 }
232         }
233         return NULL;
234
235         /* find the destination with the weighted most load */
236   nextstage:
237         list_for_each_entry(e, &set->list, list) {
238                 dest = e->dest;
239                 doh = ip_vs_dest_conn_overhead(dest);
240                 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
241                 if ((moh * atomic_read(&dest->weight) <
242                      doh * atomic_read(&most->weight))
243                     && (atomic_read(&dest->weight) > 0)) {
244                         most = dest;
245                         moh = doh;
246                 }
247         }
248
249         IP_VS_DBG_BUF(6, "%s(): server %s:%d "
250                       "activeconns %d refcnt %d weight %d overhead %d\n",
251                       __func__,
252                       IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
253                       atomic_read(&most->activeconns),
254                       atomic_read(&most->refcnt),
255                       atomic_read(&most->weight), moh);
256         return most;
257 }
258
259
260 /*
261  *      IPVS lblcr entry represents an association between destination
262  *      IP address and its destination server set
263  */
264 struct ip_vs_lblcr_entry {
265         struct list_head        list;
266         int                     af;             /* address family */
267         union nf_inet_addr      addr;           /* destination IP address */
268         struct ip_vs_dest_set   set;            /* destination server set */
269         unsigned long           lastuse;        /* last used time */
270 };
271
272
273 /*
274  *      IPVS lblcr hash table
275  */
276 struct ip_vs_lblcr_table {
277         struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */
278         atomic_t                entries;        /* number of entries */
279         int                     max_size;       /* maximum size of entries */
280         struct timer_list       periodic_timer; /* collect stale entries */
281         int                     rover;          /* rover for expire check */
282         int                     counter;        /* counter for no expire */
283 };
284
285
286 /*
287  *      IPVS LBLCR sysctl table
288  */
289
290 static ctl_table vs_vars_table[] = {
291         {
292                 .procname       = "lblcr_expiration",
293                 .data           = NULL,
294                 .maxlen         = sizeof(int),
295                 .mode           = 0644,
296                 .proc_handler   = proc_dointvec_jiffies,
297         },
298         { }
299 };
300
301 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
302 {
303         list_del(&en->list);
304         ip_vs_dest_set_eraseall(&en->set);
305         kfree(en);
306 }
307
308
309 /*
310  *      Returns hash value for IPVS LBLCR entry
311  */
312 static inline unsigned
313 ip_vs_lblcr_hashkey(int af, const union nf_inet_addr *addr)
314 {
315         __be32 addr_fold = addr->ip;
316
317 #ifdef CONFIG_IP_VS_IPV6
318         if (af == AF_INET6)
319                 addr_fold = addr->ip6[0]^addr->ip6[1]^
320                             addr->ip6[2]^addr->ip6[3];
321 #endif
322         return (ntohl(addr_fold)*2654435761UL) & IP_VS_LBLCR_TAB_MASK;
323 }
324
325
326 /*
327  *      Hash an entry in the ip_vs_lblcr_table.
328  *      returns bool success.
329  */
330 static void
331 ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)
332 {
333         unsigned hash = ip_vs_lblcr_hashkey(en->af, &en->addr);
334
335         list_add(&en->list, &tbl->bucket[hash]);
336         atomic_inc(&tbl->entries);
337 }
338
339
340 /*
341  *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under
342  *  read lock.
343  */
344 static inline struct ip_vs_lblcr_entry *
345 ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,
346                 const union nf_inet_addr *addr)
347 {
348         unsigned hash = ip_vs_lblcr_hashkey(af, addr);
349         struct ip_vs_lblcr_entry *en;
350
351         list_for_each_entry(en, &tbl->bucket[hash], list)
352                 if (ip_vs_addr_equal(af, &en->addr, addr))
353                         return en;
354
355         return NULL;
356 }
357
358
359 /*
360  * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination
361  * IP address to a server. Called under write lock.
362  */
363 static inline struct ip_vs_lblcr_entry *
364 ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
365                 struct ip_vs_dest *dest)
366 {
367         struct ip_vs_lblcr_entry *en;
368
369         en = ip_vs_lblcr_get(dest->af, tbl, daddr);
370         if (!en) {
371                 en = kmalloc(sizeof(*en), GFP_ATOMIC);
372                 if (!en) {
373                         pr_err("%s(): no memory\n", __func__);
374                         return NULL;
375                 }
376
377                 en->af = dest->af;
378                 ip_vs_addr_copy(dest->af, &en->addr, daddr);
379                 en->lastuse = jiffies;
380
381                 /* initialize its dest set */
382                 atomic_set(&(en->set.size), 0);
383                 INIT_LIST_HEAD(&en->set.list);
384                 rwlock_init(&en->set.lock);
385
386                 ip_vs_lblcr_hash(tbl, en);
387         }
388
389         write_lock(&en->set.lock);
390         ip_vs_dest_set_insert(&en->set, dest);
391         write_unlock(&en->set.lock);
392
393         return en;
394 }
395
396
397 /*
398  *      Flush all the entries of the specified table.
399  */
400 static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
401 {
402         int i;
403         struct ip_vs_lblcr_entry *en, *nxt;
404
405         /* No locking required, only called during cleanup. */
406         for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
407                 list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) {
408                         ip_vs_lblcr_free(en);
409                 }
410         }
411 }
412
413
414 static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
415 {
416         struct ip_vs_lblcr_table *tbl = svc->sched_data;
417         unsigned long now = jiffies;
418         int i, j;
419         struct ip_vs_lblcr_entry *en, *nxt;
420         struct netns_ipvs *ipvs = net_ipvs(svc->net);
421
422         for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
423                 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
424
425                 write_lock(&svc->sched_lock);
426                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
427                         if (time_after(en->lastuse
428                                         + ipvs->sysctl_lblcr_expiration, now))
429                                 continue;
430
431                         ip_vs_lblcr_free(en);
432                         atomic_dec(&tbl->entries);
433                 }
434                 write_unlock(&svc->sched_lock);
435         }
436         tbl->rover = j;
437 }
438
439
440 /*
441  *      Periodical timer handler for IPVS lblcr table
442  *      It is used to collect stale entries when the number of entries
443  *      exceeds the maximum size of the table.
444  *
445  *      Fixme: we probably need more complicated algorithm to collect
446  *             entries that have not been used for a long time even
447  *             if the number of entries doesn't exceed the maximum size
448  *             of the table.
449  *      The full expiration check is for this purpose now.
450  */
451 static void ip_vs_lblcr_check_expire(unsigned long data)
452 {
453         struct ip_vs_service *svc = (struct ip_vs_service *) data;
454         struct ip_vs_lblcr_table *tbl = svc->sched_data;
455         unsigned long now = jiffies;
456         int goal;
457         int i, j;
458         struct ip_vs_lblcr_entry *en, *nxt;
459
460         if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {
461                 /* do full expiration check */
462                 ip_vs_lblcr_full_check(svc);
463                 tbl->counter = 1;
464                 goto out;
465         }
466
467         if (atomic_read(&tbl->entries) <= tbl->max_size) {
468                 tbl->counter++;
469                 goto out;
470         }
471
472         goal = (atomic_read(&tbl->entries) - tbl->max_size)*4/3;
473         if (goal > tbl->max_size/2)
474                 goal = tbl->max_size/2;
475
476         for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
477                 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
478
479                 write_lock(&svc->sched_lock);
480                 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
481                         if (time_before(now, en->lastuse+ENTRY_TIMEOUT))
482                                 continue;
483
484                         ip_vs_lblcr_free(en);
485                         atomic_dec(&tbl->entries);
486                         goal--;
487                 }
488                 write_unlock(&svc->sched_lock);
489                 if (goal <= 0)
490                         break;
491         }
492         tbl->rover = j;
493
494   out:
495         mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
496 }
497
498 static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
499 {
500         int i;
501         struct ip_vs_lblcr_table *tbl;
502
503         /*
504          *    Allocate the ip_vs_lblcr_table for this service
505          */
506         tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
507         if (tbl == NULL) {
508                 pr_err("%s(): no memory\n", __func__);
509                 return -ENOMEM;
510         }
511         svc->sched_data = tbl;
512         IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for "
513                   "current service\n", sizeof(*tbl));
514
515         /*
516          *    Initialize the hash buckets
517          */
518         for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
519                 INIT_LIST_HEAD(&tbl->bucket[i]);
520         }
521         tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
522         tbl->rover = 0;
523         tbl->counter = 1;
524
525         /*
526          *    Hook periodic timer for garbage collection
527          */
528         setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
529                         (unsigned long)svc);
530         mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL);
531
532         return 0;
533 }
534
535
536 static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)
537 {
538         struct ip_vs_lblcr_table *tbl = svc->sched_data;
539
540         /* remove periodic timer */
541         del_timer_sync(&tbl->periodic_timer);
542
543         /* got to clean up table entries here */
544         ip_vs_lblcr_flush(tbl);
545
546         /* release the table itself */
547         kfree(tbl);
548         IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",
549                   sizeof(*tbl));
550
551         return 0;
552 }
553
554
555 static inline struct ip_vs_dest *
556 __ip_vs_lblcr_schedule(struct ip_vs_service *svc)
557 {
558         struct ip_vs_dest *dest, *least;
559         int loh, doh;
560
561         /*
562          * We use the following formula to estimate the load:
563          *                (dest overhead) / dest->weight
564          *
565          * Remember -- no floats in kernel mode!!!
566          * The comparison of h1*w2 > h2*w1 is equivalent to that of
567          *                h1/w1 > h2/w2
568          * if every weight is larger than zero.
569          *
570          * The server with weight=0 is quiesced and will not receive any
571          * new connection.
572          */
573         list_for_each_entry(dest, &svc->destinations, n_list) {
574                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
575                         continue;
576
577                 if (atomic_read(&dest->weight) > 0) {
578                         least = dest;
579                         loh = ip_vs_dest_conn_overhead(least);
580                         goto nextstage;
581                 }
582         }
583         return NULL;
584
585         /*
586          *    Find the destination with the least load.
587          */
588   nextstage:
589         list_for_each_entry_continue(dest, &svc->destinations, n_list) {
590                 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
591                         continue;
592
593                 doh = ip_vs_dest_conn_overhead(dest);
594                 if (loh * atomic_read(&dest->weight) >
595                     doh * atomic_read(&least->weight)) {
596                         least = dest;
597                         loh = doh;
598                 }
599         }
600
601         IP_VS_DBG_BUF(6, "LBLCR: server %s:%d "
602                       "activeconns %d refcnt %d weight %d overhead %d\n",
603                       IP_VS_DBG_ADDR(least->af, &least->addr),
604                       ntohs(least->port),
605                       atomic_read(&least->activeconns),
606                       atomic_read(&least->refcnt),
607                       atomic_read(&least->weight), loh);
608
609         return least;
610 }
611
612
613 /*
614  *   If this destination server is overloaded and there is a less loaded
615  *   server, then return true.
616  */
617 static inline int
618 is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)
619 {
620         if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {
621                 struct ip_vs_dest *d;
622
623                 list_for_each_entry(d, &svc->destinations, n_list) {
624                         if (atomic_read(&d->activeconns)*2
625                             < atomic_read(&d->weight)) {
626                                 return 1;
627                         }
628                 }
629         }
630         return 0;
631 }
632
633
634 /*
635  *    Locality-Based (weighted) Least-Connection scheduling
636  */
637 static struct ip_vs_dest *
638 ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
639 {
640         struct ip_vs_lblcr_table *tbl = svc->sched_data;
641         struct ip_vs_iphdr iph;
642         struct ip_vs_dest *dest = NULL;
643         struct ip_vs_lblcr_entry *en;
644
645         ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
646
647         IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
648
649         /* First look in our cache */
650         read_lock(&svc->sched_lock);
651         en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
652         if (en) {
653                 struct netns_ipvs *ipvs = net_ipvs(svc->net);
654                 /* We only hold a read lock, but this is atomic */
655                 en->lastuse = jiffies;
656
657                 /* Get the least loaded destination */
658                 read_lock(&en->set.lock);
659                 dest = ip_vs_dest_set_min(&en->set);
660                 read_unlock(&en->set.lock);
661
662                 /* More than one destination + enough time passed by, cleanup */
663                 if (atomic_read(&en->set.size) > 1 &&
664                                 time_after(jiffies, en->set.lastmod +
665                                 ipvs->sysctl_lblcr_expiration)) {
666                         struct ip_vs_dest *m;
667
668                         write_lock(&en->set.lock);
669                         m = ip_vs_dest_set_max(&en->set);
670                         if (m)
671                                 ip_vs_dest_set_erase(&en->set, m);
672                         write_unlock(&en->set.lock);
673                 }
674
675                 /* If the destination is not overloaded, use it */
676                 if (dest && !is_overloaded(dest, svc)) {
677                         read_unlock(&svc->sched_lock);
678                         goto out;
679                 }
680
681                 /* The cache entry is invalid, time to schedule */
682                 dest = __ip_vs_lblcr_schedule(svc);
683                 if (!dest) {
684                         ip_vs_scheduler_err(svc, "no destination available");
685                         read_unlock(&svc->sched_lock);
686                         return NULL;
687                 }
688
689                 /* Update our cache entry */
690                 write_lock(&en->set.lock);
691                 ip_vs_dest_set_insert(&en->set, dest);
692                 write_unlock(&en->set.lock);
693         }
694         read_unlock(&svc->sched_lock);
695
696         if (dest)
697                 goto out;
698
699         /* No cache entry, time to schedule */
700         dest = __ip_vs_lblcr_schedule(svc);
701         if (!dest) {
702                 IP_VS_DBG(1, "no destination available\n");
703                 return NULL;
704         }
705
706         /* If we fail to create a cache entry, we'll just use the valid dest */
707         write_lock(&svc->sched_lock);
708         ip_vs_lblcr_new(tbl, &iph.daddr, dest);
709         write_unlock(&svc->sched_lock);
710
711 out:
712         IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n",
713                       IP_VS_DBG_ADDR(svc->af, &iph.daddr),
714                       IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port));
715
716         return dest;
717 }
718
719
720 /*
721  *      IPVS LBLCR Scheduler structure
722  */
723 static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
724 {
725         .name =                 "lblcr",
726         .refcnt =               ATOMIC_INIT(0),
727         .module =               THIS_MODULE,
728         .n_list =               LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
729         .init_service =         ip_vs_lblcr_init_svc,
730         .done_service =         ip_vs_lblcr_done_svc,
731         .schedule =             ip_vs_lblcr_schedule,
732 };
733
734 /*
735  *  per netns init.
736  */
737 static int __net_init __ip_vs_lblcr_init(struct net *net)
738 {
739         struct netns_ipvs *ipvs = net_ipvs(net);
740
741         if (!net_eq(net, &init_net)) {
742                 ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
743                                                 sizeof(vs_vars_table),
744                                                 GFP_KERNEL);
745                 if (ipvs->lblcr_ctl_table == NULL)
746                         return -ENOMEM;
747         } else
748                 ipvs->lblcr_ctl_table = vs_vars_table;
749         ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
750         ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
751
752 #ifdef CONFIG_SYSCTL
753         ipvs->lblcr_ctl_header =
754                 register_net_sysctl_table(net, net_vs_ctl_path,
755                                           ipvs->lblcr_ctl_table);
756         if (!ipvs->lblcr_ctl_header) {
757                 if (!net_eq(net, &init_net))
758                         kfree(ipvs->lblcr_ctl_table);
759                 return -ENOMEM;
760         }
761 #endif
762
763         return 0;
764 }
765
766 static void __net_exit __ip_vs_lblcr_exit(struct net *net)
767 {
768         struct netns_ipvs *ipvs = net_ipvs(net);
769
770 #ifdef CONFIG_SYSCTL
771         unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
772 #endif
773
774         if (!net_eq(net, &init_net))
775                 kfree(ipvs->lblcr_ctl_table);
776 }
777
778 static struct pernet_operations ip_vs_lblcr_ops = {
779         .init = __ip_vs_lblcr_init,
780         .exit = __ip_vs_lblcr_exit,
781 };
782
783 static int __init ip_vs_lblcr_init(void)
784 {
785         int ret;
786
787         ret = register_pernet_subsys(&ip_vs_lblcr_ops);
788         if (ret)
789                 return ret;
790
791         ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
792         if (ret)
793                 unregister_pernet_subsys(&ip_vs_lblcr_ops);
794         return ret;
795 }
796
797 static void __exit ip_vs_lblcr_cleanup(void)
798 {
799         unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
800         unregister_pernet_subsys(&ip_vs_lblcr_ops);
801 }
802
803
804 module_init(ip_vs_lblcr_init);
805 module_exit(ip_vs_lblcr_cleanup);
806 MODULE_LICENSE("GPL");