2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
374 int uninitialized_var(ret);
375 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
377 #ifdef CONFIG_NET_CLS_ACT
381 if (ret == NET_XMIT_BYPASS)
387 #ifdef CONFIG_NET_CLS_ACT
388 cl->q->__parent = sch;
390 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
392 sch->bstats.packets++;
393 sch->bstats.bytes+=len;
394 cbq_mark_toplevel(q, cl);
396 cbq_activate_class(cl);
401 cbq_mark_toplevel(q, cl);
407 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
409 struct cbq_sched_data *q = qdisc_priv(sch);
410 struct cbq_class *cl;
413 if ((cl = q->tx_class) == NULL) {
420 cbq_mark_toplevel(q, cl);
422 #ifdef CONFIG_NET_CLS_ACT
424 cl->q->__parent = sch;
426 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
428 sch->qstats.requeues++;
430 cbq_activate_class(cl);
438 /* Overlimit actions */
440 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
442 static void cbq_ovl_classic(struct cbq_class *cl)
444 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
445 psched_tdiff_t delay = cl->undertime - q->now;
448 delay += cl->offtime;
451 Class goes to sleep, so that it will have no
452 chance to work avgidle. Let's forgive it 8)
454 BTW cbq-2.0 has a crap in this
455 place, apparently they forgot to shift it by cl->ewma_log.
458 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
459 if (cl->avgidle < cl->minidle)
460 cl->avgidle = cl->minidle;
463 cl->undertime = q->now + delay;
465 cl->xstats.overactions++;
468 if (q->wd_expires == 0 || q->wd_expires > delay)
469 q->wd_expires = delay;
471 /* Dirty work! We must schedule wakeups based on
472 real available rate, rather than leaf rate,
473 which may be tiny (even zero).
475 if (q->toplevel == TC_CBQ_MAXLEVEL) {
477 psched_tdiff_t base_delay = q->wd_expires;
479 for (b = cl->borrow; b; b = b->borrow) {
480 delay = b->undertime - q->now;
481 if (delay < base_delay) {
488 q->wd_expires = base_delay;
492 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
496 static void cbq_ovl_rclassic(struct cbq_class *cl)
498 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
499 struct cbq_class *this = cl;
502 if (cl->level > q->toplevel) {
506 } while ((cl = cl->borrow) != NULL);
513 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
515 static void cbq_ovl_delay(struct cbq_class *cl)
517 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
518 psched_tdiff_t delay = cl->undertime - q->now;
521 psched_time_t sched = q->now;
524 delay += cl->offtime;
526 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
527 if (cl->avgidle < cl->minidle)
528 cl->avgidle = cl->minidle;
529 cl->undertime = q->now + delay;
532 sched += delay + cl->penalty;
533 cl->penalized = sched;
534 cl->cpriority = TC_CBQ_MAXPRIO;
535 q->pmask |= (1<<TC_CBQ_MAXPRIO);
537 expires = ktime_set(0, 0);
538 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
539 if (hrtimer_try_to_cancel(&q->delay_timer) &&
540 ktime_to_ns(ktime_sub(q->delay_timer.expires,
542 q->delay_timer.expires = expires;
543 hrtimer_restart(&q->delay_timer);
545 cl->xstats.overactions++;
550 if (q->wd_expires == 0 || q->wd_expires > delay)
551 q->wd_expires = delay;
554 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
556 static void cbq_ovl_lowprio(struct cbq_class *cl)
558 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
560 cl->penalized = q->now + cl->penalty;
562 if (cl->cpriority != cl->priority2) {
563 cl->cpriority = cl->priority2;
564 q->pmask |= (1<<cl->cpriority);
565 cl->xstats.overactions++;
570 /* TC_CBQ_OVL_DROP: penalize class by dropping */
572 static void cbq_ovl_drop(struct cbq_class *cl)
574 if (cl->q->ops->drop)
575 if (cl->q->ops->drop(cl->q))
577 cl->xstats.overactions++;
581 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
584 struct cbq_class *cl;
585 struct cbq_class *cl_prev = q->active[prio];
586 psched_time_t sched = now;
592 cl = cl_prev->next_alive;
593 if (now - cl->penalized > 0) {
594 cl_prev->next_alive = cl->next_alive;
595 cl->next_alive = NULL;
596 cl->cpriority = cl->priority;
598 cbq_activate_class(cl);
600 if (cl == q->active[prio]) {
601 q->active[prio] = cl_prev;
602 if (cl == q->active[prio]) {
603 q->active[prio] = NULL;
608 cl = cl_prev->next_alive;
609 } else if (sched - cl->penalized > 0)
610 sched = cl->penalized;
611 } while ((cl_prev = cl) != q->active[prio]);
616 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
618 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
620 struct Qdisc *sch = q->watchdog.qdisc;
622 psched_tdiff_t delay = 0;
625 now = psched_get_time();
631 int prio = ffz(~pmask);
636 tmp = cbq_undelay_prio(q, prio, now);
639 if (tmp < delay || delay == 0)
647 time = ktime_set(0, 0);
648 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
649 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
652 sch->flags &= ~TCQ_F_THROTTLED;
653 netif_schedule(sch->dev);
654 return HRTIMER_NORESTART;
657 #ifdef CONFIG_NET_CLS_ACT
658 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
661 struct Qdisc *sch = child->__parent;
662 struct cbq_sched_data *q = qdisc_priv(sch);
663 struct cbq_class *cl = q->rx_class;
667 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
669 cbq_mark_toplevel(q, cl);
672 cl->q->__parent = sch;
674 if (cl->q->enqueue(skb, cl->q) == 0) {
676 sch->bstats.packets++;
677 sch->bstats.bytes+=len;
679 cbq_activate_class(cl);
692 It is mission critical procedure.
694 We "regenerate" toplevel cutoff, if transmitting class
695 has backlog and it is not regulated. It is not part of
696 original CBQ description, but looks more reasonable.
697 Probably, it is wrong. This question needs further investigation.
700 static __inline__ void
701 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
702 struct cbq_class *borrowed)
704 if (cl && q->toplevel >= borrowed->level) {
705 if (cl->q->q.qlen > 1) {
707 if (borrowed->undertime == PSCHED_PASTPERFECT) {
708 q->toplevel = borrowed->level;
711 } while ((borrowed=borrowed->borrow) != NULL);
714 /* It is not necessary now. Uncommenting it
715 will save CPU cycles, but decrease fairness.
717 q->toplevel = TC_CBQ_MAXLEVEL;
723 cbq_update(struct cbq_sched_data *q)
725 struct cbq_class *this = q->tx_class;
726 struct cbq_class *cl = this;
731 for ( ; cl; cl = cl->share) {
732 long avgidle = cl->avgidle;
735 cl->bstats.packets++;
736 cl->bstats.bytes += len;
739 (now - last) is total time between packet right edges.
740 (last_pktlen/rate) is "virtual" busy time, so that
742 idle = (now - last) - last_pktlen/rate
745 idle = q->now - cl->last;
746 if ((unsigned long)idle > 128*1024*1024) {
747 avgidle = cl->maxidle;
749 idle -= L2T(cl, len);
751 /* true_avgidle := (1-W)*true_avgidle + W*idle,
752 where W=2^{-ewma_log}. But cl->avgidle is scaled:
753 cl->avgidle == true_avgidle/W,
756 avgidle += idle - (avgidle>>cl->ewma_log);
760 /* Overlimit or at-limit */
762 if (avgidle < cl->minidle)
763 avgidle = cl->minidle;
765 cl->avgidle = avgidle;
767 /* Calculate expected time, when this class
768 will be allowed to send.
770 (1-W)*true_avgidle + W*delay = 0, i.e.
771 idle = (1/W - 1)*(-true_avgidle)
773 idle = (1 - W)*(-cl->avgidle);
775 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
779 To maintain the rate allocated to the class,
780 we add to undertime virtual clock,
781 necessary to complete transmitted packet.
782 (len/phys_bandwidth has been already passed
783 to the moment of cbq_update)
786 idle -= L2T(&q->link, len);
787 idle += L2T(cl, len);
789 cl->undertime = q->now + idle;
793 cl->undertime = PSCHED_PASTPERFECT;
794 if (avgidle > cl->maxidle)
795 cl->avgidle = cl->maxidle;
797 cl->avgidle = avgidle;
802 cbq_update_toplevel(q, this, q->tx_borrowed);
805 static __inline__ struct cbq_class *
806 cbq_under_limit(struct cbq_class *cl)
808 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
809 struct cbq_class *this_cl = cl;
811 if (cl->tparent == NULL)
814 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
820 /* It is very suspicious place. Now overlimit
821 action is generated for not bounded classes
822 only if link is completely congested.
823 Though it is in agree with ancestor-only paradigm,
824 it looks very stupid. Particularly,
825 it means that this chunk of code will either
826 never be called or result in strong amplification
827 of burstiness. Dangerous, silly, and, however,
828 no another solution exists.
830 if ((cl = cl->borrow) == NULL) {
831 this_cl->qstats.overlimits++;
832 this_cl->overlimit(this_cl);
835 if (cl->level > q->toplevel)
837 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
843 static __inline__ struct sk_buff *
844 cbq_dequeue_prio(struct Qdisc *sch, int prio)
846 struct cbq_sched_data *q = qdisc_priv(sch);
847 struct cbq_class *cl_tail, *cl_prev, *cl;
851 cl_tail = cl_prev = q->active[prio];
852 cl = cl_prev->next_alive;
859 struct cbq_class *borrow = cl;
862 (borrow = cbq_under_limit(cl)) == NULL)
865 if (cl->deficit <= 0) {
866 /* Class exhausted its allotment per
867 this round. Switch to the next one.
870 cl->deficit += cl->quantum;
874 skb = cl->q->dequeue(cl->q);
876 /* Class did not give us any skb :-(
877 It could occur even if cl->q->q.qlen != 0
878 f.e. if cl->q == "tbf"
883 cl->deficit -= skb->len;
885 q->tx_borrowed = borrow;
887 #ifndef CBQ_XSTATS_BORROWS_BYTES
888 borrow->xstats.borrows++;
889 cl->xstats.borrows++;
891 borrow->xstats.borrows += skb->len;
892 cl->xstats.borrows += skb->len;
895 q->tx_len = skb->len;
897 if (cl->deficit <= 0) {
898 q->active[prio] = cl;
900 cl->deficit += cl->quantum;
905 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
906 /* Class is empty or penalized.
907 Unlink it from active chain.
909 cl_prev->next_alive = cl->next_alive;
910 cl->next_alive = NULL;
912 /* Did cl_tail point to it? */
917 /* Was it the last class in this band? */
920 q->active[prio] = NULL;
921 q->activemask &= ~(1<<prio);
923 cbq_activate_class(cl);
927 q->active[prio] = cl_tail;
930 cbq_activate_class(cl);
938 } while (cl_prev != cl_tail);
941 q->active[prio] = cl_prev;
946 static __inline__ struct sk_buff *
947 cbq_dequeue_1(struct Qdisc *sch)
949 struct cbq_sched_data *q = qdisc_priv(sch);
953 activemask = q->activemask&0xFF;
955 int prio = ffz(~activemask);
956 activemask &= ~(1<<prio);
957 skb = cbq_dequeue_prio(sch, prio);
964 static struct sk_buff *
965 cbq_dequeue(struct Qdisc *sch)
968 struct cbq_sched_data *q = qdisc_priv(sch);
972 now = psched_get_time();
973 incr = now - q->now_rt;
976 psched_tdiff_t incr2;
977 /* Time integrator. We calculate EOS time
978 by adding expected packet transmission time.
979 If real time is greater, we warp artificial clock,
982 cbq_time = max(real_time, work);
984 incr2 = L2T(&q->link, q->tx_len);
987 if ((incr -= incr2) < 0)
996 skb = cbq_dequeue_1(sch);
999 sch->flags &= ~TCQ_F_THROTTLED;
1003 /* All the classes are overlimit.
1007 1. Scheduler is empty.
1008 2. Toplevel cutoff inhibited borrowing.
1009 3. Root class is overlimit.
1011 Reset 2d and 3d conditions and retry.
1013 Note, that NS and cbq-2.0 are buggy, peeking
1014 an arbitrary class is appropriate for ancestor-only
1015 sharing, but not for toplevel algorithm.
1017 Our version is better, but slower, because it requires
1018 two passes, but it is unavoidable with top-level sharing.
1021 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1022 q->link.undertime == PSCHED_PASTPERFECT)
1025 q->toplevel = TC_CBQ_MAXLEVEL;
1026 q->link.undertime = PSCHED_PASTPERFECT;
1029 /* No packets in scheduler or nobody wants to give them to us :-(
1030 Sigh... start watchdog timer in the last case. */
1033 sch->qstats.overlimits++;
1035 qdisc_watchdog_schedule(&q->watchdog,
1036 now + q->wd_expires);
1041 /* CBQ class maintanance routines */
1043 static void cbq_adjust_levels(struct cbq_class *this)
1050 struct cbq_class *cl;
1052 if ((cl = this->children) != NULL) {
1054 if (cl->level > level)
1056 } while ((cl = cl->sibling) != this->children);
1058 this->level = level+1;
1059 } while ((this = this->tparent) != NULL);
1062 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1064 struct cbq_class *cl;
1065 struct hlist_node *n;
1068 if (q->quanta[prio] == 0)
1071 for (h = 0; h < q->clhash.hashsize; h++) {
1072 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1073 /* BUGGGG... Beware! This expression suffer of
1074 arithmetic overflows!
1076 if (cl->priority == prio) {
1077 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1080 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1081 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1082 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1088 static void cbq_sync_defmap(struct cbq_class *cl)
1090 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1091 struct cbq_class *split = cl->split;
1098 for (i=0; i<=TC_PRIO_MAX; i++) {
1099 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1100 split->defaults[i] = NULL;
1103 for (i=0; i<=TC_PRIO_MAX; i++) {
1104 int level = split->level;
1106 if (split->defaults[i])
1109 for (h = 0; h < q->clhash.hashsize; h++) {
1110 struct hlist_node *n;
1111 struct cbq_class *c;
1113 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1115 if (c->split == split && c->level < level &&
1117 split->defaults[i] = c;
1125 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1127 struct cbq_class *split = NULL;
1130 if ((split = cl->split) == NULL)
1132 splitid = split->common.classid;
1135 if (split == NULL || split->common.classid != splitid) {
1136 for (split = cl->tparent; split; split = split->tparent)
1137 if (split->common.classid == splitid)
1144 if (cl->split != split) {
1146 cbq_sync_defmap(cl);
1148 cl->defmap = def&mask;
1150 cl->defmap = (cl->defmap&~mask)|(def&mask);
1152 cbq_sync_defmap(cl);
1155 static void cbq_unlink_class(struct cbq_class *this)
1157 struct cbq_class *cl, **clp;
1158 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1160 qdisc_class_hash_remove(&q->clhash, &this->common);
1162 if (this->tparent) {
1171 } while ((cl = *clp) != this->sibling);
1173 if (this->tparent->children == this) {
1174 this->tparent->children = this->sibling;
1175 if (this->sibling == this)
1176 this->tparent->children = NULL;
1179 BUG_TRAP(this->sibling == this);
1183 static void cbq_link_class(struct cbq_class *this)
1185 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1186 struct cbq_class *parent = this->tparent;
1188 this->sibling = this;
1189 qdisc_class_hash_insert(&q->clhash, &this->common);
1194 if (parent->children == NULL) {
1195 parent->children = this;
1197 this->sibling = parent->children->sibling;
1198 parent->children->sibling = this;
1202 static unsigned int cbq_drop(struct Qdisc* sch)
1204 struct cbq_sched_data *q = qdisc_priv(sch);
1205 struct cbq_class *cl, *cl_head;
1209 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1210 if ((cl_head = q->active[prio]) == NULL)
1215 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1218 cbq_deactivate_class(cl);
1221 } while ((cl = cl->next_alive) != cl_head);
1227 cbq_reset(struct Qdisc* sch)
1229 struct cbq_sched_data *q = qdisc_priv(sch);
1230 struct cbq_class *cl;
1231 struct hlist_node *n;
1238 q->tx_borrowed = NULL;
1239 qdisc_watchdog_cancel(&q->watchdog);
1240 hrtimer_cancel(&q->delay_timer);
1241 q->toplevel = TC_CBQ_MAXLEVEL;
1242 q->now = psched_get_time();
1245 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1246 q->active[prio] = NULL;
1248 for (h = 0; h < q->clhash.hashsize; h++) {
1249 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1252 cl->next_alive = NULL;
1253 cl->undertime = PSCHED_PASTPERFECT;
1254 cl->avgidle = cl->maxidle;
1255 cl->deficit = cl->quantum;
1256 cl->cpriority = cl->priority;
1263 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1265 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1266 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1267 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1269 if (lss->change&TCF_CBQ_LSS_EWMA)
1270 cl->ewma_log = lss->ewma_log;
1271 if (lss->change&TCF_CBQ_LSS_AVPKT)
1272 cl->avpkt = lss->avpkt;
1273 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1274 cl->minidle = -(long)lss->minidle;
1275 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1276 cl->maxidle = lss->maxidle;
1277 cl->avgidle = lss->maxidle;
1279 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1280 cl->offtime = lss->offtime;
1284 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1286 q->nclasses[cl->priority]--;
1287 q->quanta[cl->priority] -= cl->weight;
1288 cbq_normalize_quanta(q, cl->priority);
1291 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1293 q->nclasses[cl->priority]++;
1294 q->quanta[cl->priority] += cl->weight;
1295 cbq_normalize_quanta(q, cl->priority);
1298 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1303 cl->allot = wrr->allot;
1305 cl->weight = wrr->weight;
1306 if (wrr->priority) {
1307 cl->priority = wrr->priority-1;
1308 cl->cpriority = cl->priority;
1309 if (cl->priority >= cl->priority2)
1310 cl->priority2 = TC_CBQ_MAXPRIO-1;
1317 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1319 switch (ovl->strategy) {
1320 case TC_CBQ_OVL_CLASSIC:
1321 cl->overlimit = cbq_ovl_classic;
1323 case TC_CBQ_OVL_DELAY:
1324 cl->overlimit = cbq_ovl_delay;
1326 case TC_CBQ_OVL_LOWPRIO:
1327 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1328 ovl->priority2-1 <= cl->priority)
1330 cl->priority2 = ovl->priority2-1;
1331 cl->overlimit = cbq_ovl_lowprio;
1333 case TC_CBQ_OVL_DROP:
1334 cl->overlimit = cbq_ovl_drop;
1336 case TC_CBQ_OVL_RCLASSIC:
1337 cl->overlimit = cbq_ovl_rclassic;
1342 cl->penalty = ovl->penalty;
1346 #ifdef CONFIG_NET_CLS_ACT
1347 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1349 cl->police = p->police;
1351 if (cl->q->handle) {
1352 if (p->police == TC_POLICE_RECLASSIFY)
1353 cl->q->reshape_fail = cbq_reshape_fail;
1355 cl->q->reshape_fail = NULL;
1361 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1363 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1367 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1368 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1369 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1370 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1371 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1372 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1373 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1374 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1377 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1379 struct cbq_sched_data *q = qdisc_priv(sch);
1380 struct nlattr *tb[TCA_CBQ_MAX + 1];
1381 struct tc_ratespec *r;
1384 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1388 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1391 r = nla_data(tb[TCA_CBQ_RATE]);
1393 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1396 err = qdisc_class_hash_init(&q->clhash);
1401 q->link.sibling = &q->link;
1402 q->link.common.classid = sch->handle;
1403 q->link.qdisc = sch;
1404 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1406 q->link.q = &noop_qdisc;
1408 q->link.priority = TC_CBQ_MAXPRIO-1;
1409 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1410 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1411 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1412 q->link.overlimit = cbq_ovl_classic;
1413 q->link.allot = psched_mtu(sch->dev);
1414 q->link.quantum = q->link.allot;
1415 q->link.weight = q->link.R_tab->rate.rate;
1417 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1418 q->link.avpkt = q->link.allot/2;
1419 q->link.minidle = -0x7FFFFFFF;
1421 qdisc_watchdog_init(&q->watchdog, sch);
1422 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1423 q->delay_timer.function = cbq_undelay;
1424 q->toplevel = TC_CBQ_MAXLEVEL;
1425 q->now = psched_get_time();
1428 cbq_link_class(&q->link);
1430 if (tb[TCA_CBQ_LSSOPT])
1431 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1433 cbq_addprio(q, &q->link);
1437 qdisc_put_rtab(q->link.R_tab);
1441 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1443 unsigned char *b = skb_tail_pointer(skb);
1445 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1453 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1455 unsigned char *b = skb_tail_pointer(skb);
1456 struct tc_cbq_lssopt opt;
1459 if (cl->borrow == NULL)
1460 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461 if (cl->share == NULL)
1462 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463 opt.ewma_log = cl->ewma_log;
1464 opt.level = cl->level;
1465 opt.avpkt = cl->avpkt;
1466 opt.maxidle = cl->maxidle;
1467 opt.minidle = (u32)(-cl->minidle);
1468 opt.offtime = cl->offtime;
1470 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1478 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1480 unsigned char *b = skb_tail_pointer(skb);
1481 struct tc_cbq_wrropt opt;
1484 opt.allot = cl->allot;
1485 opt.priority = cl->priority+1;
1486 opt.cpriority = cl->cpriority+1;
1487 opt.weight = cl->weight;
1488 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1496 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1498 unsigned char *b = skb_tail_pointer(skb);
1499 struct tc_cbq_ovl opt;
1501 opt.strategy = cl->ovl_strategy;
1502 opt.priority2 = cl->priority2+1;
1504 opt.penalty = cl->penalty;
1505 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1513 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1515 unsigned char *b = skb_tail_pointer(skb);
1516 struct tc_cbq_fopt opt;
1518 if (cl->split || cl->defmap) {
1519 opt.split = cl->split ? cl->split->common.classid : 0;
1520 opt.defmap = cl->defmap;
1522 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1531 #ifdef CONFIG_NET_CLS_ACT
1532 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1534 unsigned char *b = skb_tail_pointer(skb);
1535 struct tc_cbq_police opt;
1538 opt.police = cl->police;
1541 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1551 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1553 if (cbq_dump_lss(skb, cl) < 0 ||
1554 cbq_dump_rate(skb, cl) < 0 ||
1555 cbq_dump_wrr(skb, cl) < 0 ||
1556 cbq_dump_ovl(skb, cl) < 0 ||
1557 #ifdef CONFIG_NET_CLS_ACT
1558 cbq_dump_police(skb, cl) < 0 ||
1560 cbq_dump_fopt(skb, cl) < 0)
1565 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1567 struct cbq_sched_data *q = qdisc_priv(sch);
1568 struct nlattr *nest;
1570 nest = nla_nest_start(skb, TCA_OPTIONS);
1572 goto nla_put_failure;
1573 if (cbq_dump_attr(skb, &q->link) < 0)
1574 goto nla_put_failure;
1575 nla_nest_end(skb, nest);
1579 nla_nest_cancel(skb, nest);
1584 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1586 struct cbq_sched_data *q = qdisc_priv(sch);
1588 q->link.xstats.avgidle = q->link.avgidle;
1589 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1593 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594 struct sk_buff *skb, struct tcmsg *tcm)
1596 struct cbq_class *cl = (struct cbq_class*)arg;
1597 struct nlattr *nest;
1600 tcm->tcm_parent = cl->tparent->common.classid;
1602 tcm->tcm_parent = TC_H_ROOT;
1603 tcm->tcm_handle = cl->common.classid;
1604 tcm->tcm_info = cl->q->handle;
1606 nest = nla_nest_start(skb, TCA_OPTIONS);
1608 goto nla_put_failure;
1609 if (cbq_dump_attr(skb, cl) < 0)
1610 goto nla_put_failure;
1611 nla_nest_end(skb, nest);
1615 nla_nest_cancel(skb, nest);
1620 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621 struct gnet_dump *d)
1623 struct cbq_sched_data *q = qdisc_priv(sch);
1624 struct cbq_class *cl = (struct cbq_class*)arg;
1626 cl->qstats.qlen = cl->q->q.qlen;
1627 cl->xstats.avgidle = cl->avgidle;
1628 cl->xstats.undertime = 0;
1630 if (cl->undertime != PSCHED_PASTPERFECT)
1631 cl->xstats.undertime = cl->undertime - q->now;
1633 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1634 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1635 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1638 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1641 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1644 struct cbq_class *cl = (struct cbq_class*)arg;
1648 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1649 cl->common.classid);
1653 #ifdef CONFIG_NET_CLS_ACT
1654 if (cl->police == TC_POLICE_RECLASSIFY)
1655 new->reshape_fail = cbq_reshape_fail;
1659 *old = xchg(&cl->q, new);
1660 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1662 sch_tree_unlock(sch);
1669 static struct Qdisc *
1670 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1672 struct cbq_class *cl = (struct cbq_class*)arg;
1674 return cl ? cl->q : NULL;
1677 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1679 struct cbq_class *cl = (struct cbq_class *)arg;
1681 if (cl->q->q.qlen == 0)
1682 cbq_deactivate_class(cl);
1685 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1687 struct cbq_sched_data *q = qdisc_priv(sch);
1688 struct cbq_class *cl = cbq_class_lookup(q, classid);
1692 return (unsigned long)cl;
1697 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1699 struct cbq_sched_data *q = qdisc_priv(sch);
1701 BUG_TRAP(!cl->filters);
1703 tcf_destroy_chain(&cl->filter_list);
1704 qdisc_destroy(cl->q);
1705 qdisc_put_rtab(cl->R_tab);
1706 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1712 cbq_destroy(struct Qdisc* sch)
1714 struct cbq_sched_data *q = qdisc_priv(sch);
1715 struct hlist_node *n, *next;
1716 struct cbq_class *cl;
1719 #ifdef CONFIG_NET_CLS_ACT
1723 * Filters must be destroyed first because we don't destroy the
1724 * classes from root to leafs which means that filters can still
1725 * be bound to classes which have been destroyed already. --TGR '04
1727 for (h = 0; h < q->clhash.hashsize; h++) {
1728 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1729 tcf_destroy_chain(&cl->filter_list);
1731 for (h = 0; h < q->clhash.hashsize; h++) {
1732 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1734 cbq_destroy_class(sch, cl);
1736 qdisc_class_hash_destroy(&q->clhash);
1739 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1741 struct cbq_class *cl = (struct cbq_class*)arg;
1743 if (--cl->refcnt == 0) {
1744 #ifdef CONFIG_NET_CLS_ACT
1745 struct cbq_sched_data *q = qdisc_priv(sch);
1747 spin_lock_bh(&sch->dev->queue_lock);
1748 if (q->rx_class == cl)
1750 spin_unlock_bh(&sch->dev->queue_lock);
1753 cbq_destroy_class(sch, cl);
1758 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1762 struct cbq_sched_data *q = qdisc_priv(sch);
1763 struct cbq_class *cl = (struct cbq_class*)*arg;
1764 struct nlattr *opt = tca[TCA_OPTIONS];
1765 struct nlattr *tb[TCA_CBQ_MAX + 1];
1766 struct cbq_class *parent;
1767 struct qdisc_rate_table *rtab = NULL;
1772 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1780 cl->tparent->common.classid != parentid)
1782 if (!cl->tparent && parentid != TC_H_ROOT)
1786 if (tb[TCA_CBQ_RATE]) {
1787 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1792 /* Change class parameters */
1795 if (cl->next_alive != NULL)
1796 cbq_deactivate_class(cl);
1799 rtab = xchg(&cl->R_tab, rtab);
1800 qdisc_put_rtab(rtab);
1803 if (tb[TCA_CBQ_LSSOPT])
1804 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1806 if (tb[TCA_CBQ_WRROPT]) {
1808 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1811 if (tb[TCA_CBQ_OVL_STRATEGY])
1812 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1814 #ifdef CONFIG_NET_CLS_ACT
1815 if (tb[TCA_CBQ_POLICE])
1816 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1819 if (tb[TCA_CBQ_FOPT])
1820 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1823 cbq_activate_class(cl);
1825 sch_tree_unlock(sch);
1828 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1829 &sch->dev->queue_lock,
1834 if (parentid == TC_H_ROOT)
1837 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1838 tb[TCA_CBQ_LSSOPT] == NULL)
1841 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1847 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1851 classid = TC_H_MAKE(sch->handle,0x8000);
1853 for (i=0; i<0x8000; i++) {
1854 if (++q->hgenerator >= 0x8000)
1856 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1862 classid = classid|q->hgenerator;
1867 parent = cbq_class_lookup(q, parentid);
1874 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1880 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1881 cl->q = &noop_qdisc;
1882 cl->common.classid = classid;
1883 cl->tparent = parent;
1885 cl->allot = parent->allot;
1886 cl->quantum = cl->allot;
1887 cl->weight = cl->R_tab->rate.rate;
1891 cl->borrow = cl->tparent;
1892 if (cl->tparent != &q->link)
1893 cl->share = cl->tparent;
1894 cbq_adjust_levels(parent);
1895 cl->minidle = -0x7FFFFFFF;
1896 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1897 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1898 if (cl->ewma_log==0)
1899 cl->ewma_log = q->link.ewma_log;
1901 cl->maxidle = q->link.maxidle;
1903 cl->avpkt = q->link.avpkt;
1904 cl->overlimit = cbq_ovl_classic;
1905 if (tb[TCA_CBQ_OVL_STRATEGY])
1906 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1907 #ifdef CONFIG_NET_CLS_ACT
1908 if (tb[TCA_CBQ_POLICE])
1909 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1911 if (tb[TCA_CBQ_FOPT])
1912 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1913 sch_tree_unlock(sch);
1915 qdisc_class_hash_grow(sch, &q->clhash);
1918 gen_new_estimator(&cl->bstats, &cl->rate_est,
1919 &sch->dev->queue_lock, tca[TCA_RATE]);
1921 *arg = (unsigned long)cl;
1925 qdisc_put_rtab(rtab);
1929 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1931 struct cbq_sched_data *q = qdisc_priv(sch);
1932 struct cbq_class *cl = (struct cbq_class*)arg;
1935 if (cl->filters || cl->children || cl == &q->link)
1940 qlen = cl->q->q.qlen;
1942 qdisc_tree_decrease_qlen(cl->q, qlen);
1945 cbq_deactivate_class(cl);
1947 if (q->tx_borrowed == cl)
1948 q->tx_borrowed = q->tx_class;
1949 if (q->tx_class == cl) {
1951 q->tx_borrowed = NULL;
1953 #ifdef CONFIG_NET_CLS_ACT
1954 if (q->rx_class == cl)
1958 cbq_unlink_class(cl);
1959 cbq_adjust_levels(cl->tparent);
1961 cbq_sync_defmap(cl);
1964 sch_tree_unlock(sch);
1966 if (--cl->refcnt == 0)
1967 cbq_destroy_class(sch, cl);
1972 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1974 struct cbq_sched_data *q = qdisc_priv(sch);
1975 struct cbq_class *cl = (struct cbq_class *)arg;
1980 return &cl->filter_list;
1983 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1986 struct cbq_sched_data *q = qdisc_priv(sch);
1987 struct cbq_class *p = (struct cbq_class*)parent;
1988 struct cbq_class *cl = cbq_class_lookup(q, classid);
1991 if (p && p->level <= cl->level)
1994 return (unsigned long)cl;
1999 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2001 struct cbq_class *cl = (struct cbq_class*)arg;
2006 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2008 struct cbq_sched_data *q = qdisc_priv(sch);
2009 struct cbq_class *cl;
2010 struct hlist_node *n;
2016 for (h = 0; h < q->clhash.hashsize; h++) {
2017 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018 if (arg->count < arg->skip) {
2022 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2031 static const struct Qdisc_class_ops cbq_class_ops = {
2034 .qlen_notify = cbq_qlen_notify,
2037 .change = cbq_change_class,
2038 .delete = cbq_delete,
2040 .tcf_chain = cbq_find_tcf,
2041 .bind_tcf = cbq_bind_filter,
2042 .unbind_tcf = cbq_unbind_filter,
2043 .dump = cbq_dump_class,
2044 .dump_stats = cbq_dump_class_stats,
2047 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2049 .cl_ops = &cbq_class_ops,
2051 .priv_size = sizeof(struct cbq_sched_data),
2052 .enqueue = cbq_enqueue,
2053 .dequeue = cbq_dequeue,
2054 .requeue = cbq_requeue,
2058 .destroy = cbq_destroy,
2061 .dump_stats = cbq_dump_stats,
2062 .owner = THIS_MODULE,
2065 static int __init cbq_module_init(void)
2067 return register_qdisc(&cbq_qdisc_ops);
2069 static void __exit cbq_module_exit(void)
2071 unregister_qdisc(&cbq_qdisc_ops);
2073 module_init(cbq_module_init)
2074 module_exit(cbq_module_exit)
2075 MODULE_LICENSE("GPL");