2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct Qdisc_class_common common;
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
80 unsigned char priority; /* class priority */
81 unsigned char priority2; /* priority to be used after overlimit */
82 unsigned char ewma_log; /* time constant for idle time calculation */
83 unsigned char ovl_strategy;
84 #ifdef CONFIG_NET_CLS_ACT
90 /* Link-sharing scheduler parameters */
91 long maxidle; /* Class parameters: see below. */
95 struct qdisc_rate_table *R_tab;
97 /* Overlimit strategy parameters */
98 void (*overlimit)(struct cbq_class *cl);
99 psched_tdiff_t penalty;
101 /* General scheduler (WRR) parameters */
103 long quantum; /* Allotment per WRR round */
104 long weight; /* Relative allotment: see below */
106 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
107 struct cbq_class *split; /* Ptr to split node */
108 struct cbq_class *share; /* Ptr to LS parent in the class tree */
109 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
110 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 struct cbq_class *sibling; /* Sibling chain */
113 struct cbq_class *children; /* Pointer to children chain */
115 struct Qdisc *q; /* Elementary queueing discipline */
119 unsigned char cpriority; /* Effective priority */
120 unsigned char delayed;
121 unsigned char level; /* level of the class in hierarchy:
122 0 for leaf classes, and maximal
123 level of children + 1 for nodes.
126 psched_time_t last; /* Last end of service */
127 psched_time_t undertime;
129 long deficit; /* Saved deficit for WRR */
130 psched_time_t penalized;
131 struct gnet_stats_basic bstats;
132 struct gnet_stats_queue qstats;
133 struct gnet_stats_rate_est rate_est;
134 struct tc_cbq_xstats xstats;
136 struct tcf_proto *filter_list;
141 struct cbq_class *defaults[TC_PRIO_MAX+1];
144 struct cbq_sched_data
146 struct Qdisc_class_hash clhash; /* Hash table of all classes */
147 int nclasses[TC_CBQ_MAXPRIO+1];
148 unsigned quanta[TC_CBQ_MAXPRIO+1];
150 struct cbq_class link;
153 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
156 #ifdef CONFIG_NET_CLS_ACT
157 struct cbq_class *rx_class;
159 struct cbq_class *tx_class;
160 struct cbq_class *tx_borrowed;
162 psched_time_t now; /* Cached timestamp */
163 psched_time_t now_rt; /* Cached real time */
166 struct hrtimer delay_timer;
167 struct qdisc_watchdog watchdog; /* Watchdog timer,
171 psched_tdiff_t wd_expires;
177 #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len)
179 static __inline__ struct cbq_class *
180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182 struct Qdisc_class_common *clc;
184 clc = qdisc_class_find(&q->clhash, classid);
187 return container_of(clc, struct cbq_class, common);
190 #ifdef CONFIG_NET_CLS_ACT
192 static struct cbq_class *
193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195 struct cbq_class *cl, *new;
197 for (cl = this->tparent; cl; cl = cl->tparent)
198 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
206 /* Classify packet. The procedure is pretty complicated, but
207 it allows us to combine link sharing and priority scheduling
210 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211 so that it resolves to split nodes. Then packets are classified
212 by logical priority, or a more specific classifier may be attached
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
219 struct cbq_sched_data *q = qdisc_priv(sch);
220 struct cbq_class *head = &q->link;
221 struct cbq_class **defmap;
222 struct cbq_class *cl = NULL;
223 u32 prio = skb->priority;
224 struct tcf_result res;
227 * Step 1. If skb->priority points to one of our classes, use it.
229 if (TC_H_MAJ(prio^sch->handle) == 0 &&
230 (cl = cbq_class_lookup(q, prio)) != NULL)
233 *qerr = NET_XMIT_BYPASS;
236 defmap = head->defaults;
239 * Step 2+n. Apply classifier.
241 if (!head->filter_list ||
242 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
245 if ((cl = (void*)res.class) == NULL) {
246 if (TC_H_MAJ(res.classid))
247 cl = cbq_class_lookup(q, res.classid);
248 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
249 cl = defmap[TC_PRIO_BESTEFFORT];
251 if (cl == NULL || cl->level >= head->level)
255 #ifdef CONFIG_NET_CLS_ACT
259 *qerr = NET_XMIT_SUCCESS;
262 case TC_ACT_RECLASSIFY:
263 return cbq_reclassify(skb, cl);
270 * Step 3+n. If classifier selected a link sharing class,
271 * apply agency specific classifier.
272 * Repeat this procdure until we hit a leaf node.
281 * Step 4. No success...
283 if (TC_H_MAJ(prio) == 0 &&
284 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
285 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
292 A packet has just been enqueued on the empty class.
293 cbq_activate_class adds it to the tail of active class list
294 of its priority band.
297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
299 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
300 int prio = cl->cpriority;
301 struct cbq_class *cl_tail;
303 cl_tail = q->active[prio];
304 q->active[prio] = cl;
306 if (cl_tail != NULL) {
307 cl->next_alive = cl_tail->next_alive;
308 cl_tail->next_alive = cl;
311 q->activemask |= (1<<prio);
316 Unlink class from active chain.
317 Note that this same procedure is done directly in cbq_dequeue*
318 during round-robin procedure.
321 static void cbq_deactivate_class(struct cbq_class *this)
323 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
324 int prio = this->cpriority;
325 struct cbq_class *cl;
326 struct cbq_class *cl_prev = q->active[prio];
329 cl = cl_prev->next_alive;
331 cl_prev->next_alive = cl->next_alive;
332 cl->next_alive = NULL;
334 if (cl == q->active[prio]) {
335 q->active[prio] = cl_prev;
336 if (cl == q->active[prio]) {
337 q->active[prio] = NULL;
338 q->activemask &= ~(1<<prio);
344 } while ((cl_prev = cl) != q->active[prio]);
348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350 int toplevel = q->toplevel;
352 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
356 now = psched_get_time();
357 incr = now - q->now_rt;
361 if (cl->undertime < now) {
362 q->toplevel = cl->level;
365 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
372 struct cbq_sched_data *q = qdisc_priv(sch);
374 int uninitialized_var(ret);
375 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
377 #ifdef CONFIG_NET_CLS_ACT
381 if (ret == NET_XMIT_BYPASS)
387 #ifdef CONFIG_NET_CLS_ACT
388 cl->q->__parent = sch;
390 ret = qdisc_enqueue(skb, cl->q);
391 if (ret == NET_XMIT_SUCCESS) {
393 sch->bstats.packets++;
394 sch->bstats.bytes+=len;
395 cbq_mark_toplevel(q, cl);
397 cbq_activate_class(cl);
402 cbq_mark_toplevel(q, cl);
408 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
410 struct cbq_sched_data *q = qdisc_priv(sch);
411 struct cbq_class *cl;
414 if ((cl = q->tx_class) == NULL) {
421 cbq_mark_toplevel(q, cl);
423 #ifdef CONFIG_NET_CLS_ACT
425 cl->q->__parent = sch;
427 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
429 sch->qstats.requeues++;
431 cbq_activate_class(cl);
439 /* Overlimit actions */
441 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
443 static void cbq_ovl_classic(struct cbq_class *cl)
445 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
446 psched_tdiff_t delay = cl->undertime - q->now;
449 delay += cl->offtime;
452 Class goes to sleep, so that it will have no
453 chance to work avgidle. Let's forgive it 8)
455 BTW cbq-2.0 has a crap in this
456 place, apparently they forgot to shift it by cl->ewma_log.
459 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
460 if (cl->avgidle < cl->minidle)
461 cl->avgidle = cl->minidle;
464 cl->undertime = q->now + delay;
466 cl->xstats.overactions++;
469 if (q->wd_expires == 0 || q->wd_expires > delay)
470 q->wd_expires = delay;
472 /* Dirty work! We must schedule wakeups based on
473 real available rate, rather than leaf rate,
474 which may be tiny (even zero).
476 if (q->toplevel == TC_CBQ_MAXLEVEL) {
478 psched_tdiff_t base_delay = q->wd_expires;
480 for (b = cl->borrow; b; b = b->borrow) {
481 delay = b->undertime - q->now;
482 if (delay < base_delay) {
489 q->wd_expires = base_delay;
493 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
497 static void cbq_ovl_rclassic(struct cbq_class *cl)
499 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
500 struct cbq_class *this = cl;
503 if (cl->level > q->toplevel) {
507 } while ((cl = cl->borrow) != NULL);
514 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
516 static void cbq_ovl_delay(struct cbq_class *cl)
518 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
519 psched_tdiff_t delay = cl->undertime - q->now;
522 psched_time_t sched = q->now;
525 delay += cl->offtime;
527 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
528 if (cl->avgidle < cl->minidle)
529 cl->avgidle = cl->minidle;
530 cl->undertime = q->now + delay;
533 sched += delay + cl->penalty;
534 cl->penalized = sched;
535 cl->cpriority = TC_CBQ_MAXPRIO;
536 q->pmask |= (1<<TC_CBQ_MAXPRIO);
538 expires = ktime_set(0, 0);
539 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
540 if (hrtimer_try_to_cancel(&q->delay_timer) &&
541 ktime_to_ns(ktime_sub(q->delay_timer.expires,
543 q->delay_timer.expires = expires;
544 hrtimer_restart(&q->delay_timer);
546 cl->xstats.overactions++;
551 if (q->wd_expires == 0 || q->wd_expires > delay)
552 q->wd_expires = delay;
555 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
557 static void cbq_ovl_lowprio(struct cbq_class *cl)
559 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
561 cl->penalized = q->now + cl->penalty;
563 if (cl->cpriority != cl->priority2) {
564 cl->cpriority = cl->priority2;
565 q->pmask |= (1<<cl->cpriority);
566 cl->xstats.overactions++;
571 /* TC_CBQ_OVL_DROP: penalize class by dropping */
573 static void cbq_ovl_drop(struct cbq_class *cl)
575 if (cl->q->ops->drop)
576 if (cl->q->ops->drop(cl->q))
578 cl->xstats.overactions++;
582 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
585 struct cbq_class *cl;
586 struct cbq_class *cl_prev = q->active[prio];
587 psched_time_t sched = now;
593 cl = cl_prev->next_alive;
594 if (now - cl->penalized > 0) {
595 cl_prev->next_alive = cl->next_alive;
596 cl->next_alive = NULL;
597 cl->cpriority = cl->priority;
599 cbq_activate_class(cl);
601 if (cl == q->active[prio]) {
602 q->active[prio] = cl_prev;
603 if (cl == q->active[prio]) {
604 q->active[prio] = NULL;
609 cl = cl_prev->next_alive;
610 } else if (sched - cl->penalized > 0)
611 sched = cl->penalized;
612 } while ((cl_prev = cl) != q->active[prio]);
617 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
619 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
621 struct Qdisc *sch = q->watchdog.qdisc;
623 psched_tdiff_t delay = 0;
626 now = psched_get_time();
632 int prio = ffz(~pmask);
637 tmp = cbq_undelay_prio(q, prio, now);
640 if (tmp < delay || delay == 0)
648 time = ktime_set(0, 0);
649 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
650 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
653 sch->flags &= ~TCQ_F_THROTTLED;
654 __netif_schedule(sch);
655 return HRTIMER_NORESTART;
658 #ifdef CONFIG_NET_CLS_ACT
659 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
662 struct Qdisc *sch = child->__parent;
663 struct cbq_sched_data *q = qdisc_priv(sch);
664 struct cbq_class *cl = q->rx_class;
668 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
670 cbq_mark_toplevel(q, cl);
673 cl->q->__parent = sch;
675 if (qdisc_enqueue(skb, cl->q) == 0) {
677 sch->bstats.packets++;
678 sch->bstats.bytes+=len;
680 cbq_activate_class(cl);
693 It is mission critical procedure.
695 We "regenerate" toplevel cutoff, if transmitting class
696 has backlog and it is not regulated. It is not part of
697 original CBQ description, but looks more reasonable.
698 Probably, it is wrong. This question needs further investigation.
701 static __inline__ void
702 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
703 struct cbq_class *borrowed)
705 if (cl && q->toplevel >= borrowed->level) {
706 if (cl->q->q.qlen > 1) {
708 if (borrowed->undertime == PSCHED_PASTPERFECT) {
709 q->toplevel = borrowed->level;
712 } while ((borrowed=borrowed->borrow) != NULL);
715 /* It is not necessary now. Uncommenting it
716 will save CPU cycles, but decrease fairness.
718 q->toplevel = TC_CBQ_MAXLEVEL;
724 cbq_update(struct cbq_sched_data *q)
726 struct cbq_class *this = q->tx_class;
727 struct cbq_class *cl = this;
732 for ( ; cl; cl = cl->share) {
733 long avgidle = cl->avgidle;
736 cl->bstats.packets++;
737 cl->bstats.bytes += len;
740 (now - last) is total time between packet right edges.
741 (last_pktlen/rate) is "virtual" busy time, so that
743 idle = (now - last) - last_pktlen/rate
746 idle = q->now - cl->last;
747 if ((unsigned long)idle > 128*1024*1024) {
748 avgidle = cl->maxidle;
750 idle -= L2T(cl, len);
752 /* true_avgidle := (1-W)*true_avgidle + W*idle,
753 where W=2^{-ewma_log}. But cl->avgidle is scaled:
754 cl->avgidle == true_avgidle/W,
757 avgidle += idle - (avgidle>>cl->ewma_log);
761 /* Overlimit or at-limit */
763 if (avgidle < cl->minidle)
764 avgidle = cl->minidle;
766 cl->avgidle = avgidle;
768 /* Calculate expected time, when this class
769 will be allowed to send.
771 (1-W)*true_avgidle + W*delay = 0, i.e.
772 idle = (1/W - 1)*(-true_avgidle)
774 idle = (1 - W)*(-cl->avgidle);
776 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
780 To maintain the rate allocated to the class,
781 we add to undertime virtual clock,
782 necessary to complete transmitted packet.
783 (len/phys_bandwidth has been already passed
784 to the moment of cbq_update)
787 idle -= L2T(&q->link, len);
788 idle += L2T(cl, len);
790 cl->undertime = q->now + idle;
794 cl->undertime = PSCHED_PASTPERFECT;
795 if (avgidle > cl->maxidle)
796 cl->avgidle = cl->maxidle;
798 cl->avgidle = avgidle;
803 cbq_update_toplevel(q, this, q->tx_borrowed);
806 static __inline__ struct cbq_class *
807 cbq_under_limit(struct cbq_class *cl)
809 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
810 struct cbq_class *this_cl = cl;
812 if (cl->tparent == NULL)
815 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
821 /* It is very suspicious place. Now overlimit
822 action is generated for not bounded classes
823 only if link is completely congested.
824 Though it is in agree with ancestor-only paradigm,
825 it looks very stupid. Particularly,
826 it means that this chunk of code will either
827 never be called or result in strong amplification
828 of burstiness. Dangerous, silly, and, however,
829 no another solution exists.
831 if ((cl = cl->borrow) == NULL) {
832 this_cl->qstats.overlimits++;
833 this_cl->overlimit(this_cl);
836 if (cl->level > q->toplevel)
838 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
844 static __inline__ struct sk_buff *
845 cbq_dequeue_prio(struct Qdisc *sch, int prio)
847 struct cbq_sched_data *q = qdisc_priv(sch);
848 struct cbq_class *cl_tail, *cl_prev, *cl;
852 cl_tail = cl_prev = q->active[prio];
853 cl = cl_prev->next_alive;
860 struct cbq_class *borrow = cl;
863 (borrow = cbq_under_limit(cl)) == NULL)
866 if (cl->deficit <= 0) {
867 /* Class exhausted its allotment per
868 this round. Switch to the next one.
871 cl->deficit += cl->quantum;
875 skb = cl->q->dequeue(cl->q);
877 /* Class did not give us any skb :-(
878 It could occur even if cl->q->q.qlen != 0
879 f.e. if cl->q == "tbf"
884 cl->deficit -= skb->len;
886 q->tx_borrowed = borrow;
888 #ifndef CBQ_XSTATS_BORROWS_BYTES
889 borrow->xstats.borrows++;
890 cl->xstats.borrows++;
892 borrow->xstats.borrows += skb->len;
893 cl->xstats.borrows += skb->len;
896 q->tx_len = skb->len;
898 if (cl->deficit <= 0) {
899 q->active[prio] = cl;
901 cl->deficit += cl->quantum;
906 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
907 /* Class is empty or penalized.
908 Unlink it from active chain.
910 cl_prev->next_alive = cl->next_alive;
911 cl->next_alive = NULL;
913 /* Did cl_tail point to it? */
918 /* Was it the last class in this band? */
921 q->active[prio] = NULL;
922 q->activemask &= ~(1<<prio);
924 cbq_activate_class(cl);
928 q->active[prio] = cl_tail;
931 cbq_activate_class(cl);
939 } while (cl_prev != cl_tail);
942 q->active[prio] = cl_prev;
947 static __inline__ struct sk_buff *
948 cbq_dequeue_1(struct Qdisc *sch)
950 struct cbq_sched_data *q = qdisc_priv(sch);
954 activemask = q->activemask&0xFF;
956 int prio = ffz(~activemask);
957 activemask &= ~(1<<prio);
958 skb = cbq_dequeue_prio(sch, prio);
965 static struct sk_buff *
966 cbq_dequeue(struct Qdisc *sch)
969 struct cbq_sched_data *q = qdisc_priv(sch);
973 now = psched_get_time();
974 incr = now - q->now_rt;
977 psched_tdiff_t incr2;
978 /* Time integrator. We calculate EOS time
979 by adding expected packet transmission time.
980 If real time is greater, we warp artificial clock,
983 cbq_time = max(real_time, work);
985 incr2 = L2T(&q->link, q->tx_len);
988 if ((incr -= incr2) < 0)
997 skb = cbq_dequeue_1(sch);
1000 sch->flags &= ~TCQ_F_THROTTLED;
1004 /* All the classes are overlimit.
1008 1. Scheduler is empty.
1009 2. Toplevel cutoff inhibited borrowing.
1010 3. Root class is overlimit.
1012 Reset 2d and 3d conditions and retry.
1014 Note, that NS and cbq-2.0 are buggy, peeking
1015 an arbitrary class is appropriate for ancestor-only
1016 sharing, but not for toplevel algorithm.
1018 Our version is better, but slower, because it requires
1019 two passes, but it is unavoidable with top-level sharing.
1022 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1023 q->link.undertime == PSCHED_PASTPERFECT)
1026 q->toplevel = TC_CBQ_MAXLEVEL;
1027 q->link.undertime = PSCHED_PASTPERFECT;
1030 /* No packets in scheduler or nobody wants to give them to us :-(
1031 Sigh... start watchdog timer in the last case. */
1034 sch->qstats.overlimits++;
1036 qdisc_watchdog_schedule(&q->watchdog,
1037 now + q->wd_expires);
1042 /* CBQ class maintanance routines */
1044 static void cbq_adjust_levels(struct cbq_class *this)
1051 struct cbq_class *cl;
1053 if ((cl = this->children) != NULL) {
1055 if (cl->level > level)
1057 } while ((cl = cl->sibling) != this->children);
1059 this->level = level+1;
1060 } while ((this = this->tparent) != NULL);
1063 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1065 struct cbq_class *cl;
1066 struct hlist_node *n;
1069 if (q->quanta[prio] == 0)
1072 for (h = 0; h < q->clhash.hashsize; h++) {
1073 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1074 /* BUGGGG... Beware! This expression suffer of
1075 arithmetic overflows!
1077 if (cl->priority == prio) {
1078 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1081 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1082 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1083 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1089 static void cbq_sync_defmap(struct cbq_class *cl)
1091 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1092 struct cbq_class *split = cl->split;
1099 for (i=0; i<=TC_PRIO_MAX; i++) {
1100 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1101 split->defaults[i] = NULL;
1104 for (i=0; i<=TC_PRIO_MAX; i++) {
1105 int level = split->level;
1107 if (split->defaults[i])
1110 for (h = 0; h < q->clhash.hashsize; h++) {
1111 struct hlist_node *n;
1112 struct cbq_class *c;
1114 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1116 if (c->split == split && c->level < level &&
1118 split->defaults[i] = c;
1126 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1128 struct cbq_class *split = NULL;
1131 if ((split = cl->split) == NULL)
1133 splitid = split->common.classid;
1136 if (split == NULL || split->common.classid != splitid) {
1137 for (split = cl->tparent; split; split = split->tparent)
1138 if (split->common.classid == splitid)
1145 if (cl->split != split) {
1147 cbq_sync_defmap(cl);
1149 cl->defmap = def&mask;
1151 cl->defmap = (cl->defmap&~mask)|(def&mask);
1153 cbq_sync_defmap(cl);
1156 static void cbq_unlink_class(struct cbq_class *this)
1158 struct cbq_class *cl, **clp;
1159 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1161 qdisc_class_hash_remove(&q->clhash, &this->common);
1163 if (this->tparent) {
1172 } while ((cl = *clp) != this->sibling);
1174 if (this->tparent->children == this) {
1175 this->tparent->children = this->sibling;
1176 if (this->sibling == this)
1177 this->tparent->children = NULL;
1180 BUG_TRAP(this->sibling == this);
1184 static void cbq_link_class(struct cbq_class *this)
1186 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1187 struct cbq_class *parent = this->tparent;
1189 this->sibling = this;
1190 qdisc_class_hash_insert(&q->clhash, &this->common);
1195 if (parent->children == NULL) {
1196 parent->children = this;
1198 this->sibling = parent->children->sibling;
1199 parent->children->sibling = this;
1203 static unsigned int cbq_drop(struct Qdisc* sch)
1205 struct cbq_sched_data *q = qdisc_priv(sch);
1206 struct cbq_class *cl, *cl_head;
1210 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1211 if ((cl_head = q->active[prio]) == NULL)
1216 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1219 cbq_deactivate_class(cl);
1222 } while ((cl = cl->next_alive) != cl_head);
1228 cbq_reset(struct Qdisc* sch)
1230 struct cbq_sched_data *q = qdisc_priv(sch);
1231 struct cbq_class *cl;
1232 struct hlist_node *n;
1239 q->tx_borrowed = NULL;
1240 qdisc_watchdog_cancel(&q->watchdog);
1241 hrtimer_cancel(&q->delay_timer);
1242 q->toplevel = TC_CBQ_MAXLEVEL;
1243 q->now = psched_get_time();
1246 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1247 q->active[prio] = NULL;
1249 for (h = 0; h < q->clhash.hashsize; h++) {
1250 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1253 cl->next_alive = NULL;
1254 cl->undertime = PSCHED_PASTPERFECT;
1255 cl->avgidle = cl->maxidle;
1256 cl->deficit = cl->quantum;
1257 cl->cpriority = cl->priority;
1264 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1266 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1267 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1268 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1270 if (lss->change&TCF_CBQ_LSS_EWMA)
1271 cl->ewma_log = lss->ewma_log;
1272 if (lss->change&TCF_CBQ_LSS_AVPKT)
1273 cl->avpkt = lss->avpkt;
1274 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1275 cl->minidle = -(long)lss->minidle;
1276 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1277 cl->maxidle = lss->maxidle;
1278 cl->avgidle = lss->maxidle;
1280 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1281 cl->offtime = lss->offtime;
1285 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1287 q->nclasses[cl->priority]--;
1288 q->quanta[cl->priority] -= cl->weight;
1289 cbq_normalize_quanta(q, cl->priority);
1292 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1294 q->nclasses[cl->priority]++;
1295 q->quanta[cl->priority] += cl->weight;
1296 cbq_normalize_quanta(q, cl->priority);
1299 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1304 cl->allot = wrr->allot;
1306 cl->weight = wrr->weight;
1307 if (wrr->priority) {
1308 cl->priority = wrr->priority-1;
1309 cl->cpriority = cl->priority;
1310 if (cl->priority >= cl->priority2)
1311 cl->priority2 = TC_CBQ_MAXPRIO-1;
1318 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1320 switch (ovl->strategy) {
1321 case TC_CBQ_OVL_CLASSIC:
1322 cl->overlimit = cbq_ovl_classic;
1324 case TC_CBQ_OVL_DELAY:
1325 cl->overlimit = cbq_ovl_delay;
1327 case TC_CBQ_OVL_LOWPRIO:
1328 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1329 ovl->priority2-1 <= cl->priority)
1331 cl->priority2 = ovl->priority2-1;
1332 cl->overlimit = cbq_ovl_lowprio;
1334 case TC_CBQ_OVL_DROP:
1335 cl->overlimit = cbq_ovl_drop;
1337 case TC_CBQ_OVL_RCLASSIC:
1338 cl->overlimit = cbq_ovl_rclassic;
1343 cl->penalty = ovl->penalty;
1347 #ifdef CONFIG_NET_CLS_ACT
1348 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1350 cl->police = p->police;
1352 if (cl->q->handle) {
1353 if (p->police == TC_POLICE_RECLASSIFY)
1354 cl->q->reshape_fail = cbq_reshape_fail;
1356 cl->q->reshape_fail = NULL;
1362 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1364 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1368 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1369 [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
1370 [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
1371 [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
1372 [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
1373 [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
1374 [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1375 [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
1378 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1380 struct cbq_sched_data *q = qdisc_priv(sch);
1381 struct nlattr *tb[TCA_CBQ_MAX + 1];
1382 struct tc_ratespec *r;
1385 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1389 if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1392 r = nla_data(tb[TCA_CBQ_RATE]);
1394 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1397 err = qdisc_class_hash_init(&q->clhash);
1402 q->link.sibling = &q->link;
1403 q->link.common.classid = sch->handle;
1404 q->link.qdisc = sch;
1405 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1408 q->link.q = &noop_qdisc;
1410 q->link.priority = TC_CBQ_MAXPRIO-1;
1411 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1412 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1413 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1414 q->link.overlimit = cbq_ovl_classic;
1415 q->link.allot = psched_mtu(qdisc_dev(sch));
1416 q->link.quantum = q->link.allot;
1417 q->link.weight = q->link.R_tab->rate.rate;
1419 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1420 q->link.avpkt = q->link.allot/2;
1421 q->link.minidle = -0x7FFFFFFF;
1423 qdisc_watchdog_init(&q->watchdog, sch);
1424 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1425 q->delay_timer.function = cbq_undelay;
1426 q->toplevel = TC_CBQ_MAXLEVEL;
1427 q->now = psched_get_time();
1430 cbq_link_class(&q->link);
1432 if (tb[TCA_CBQ_LSSOPT])
1433 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1435 cbq_addprio(q, &q->link);
1439 qdisc_put_rtab(q->link.R_tab);
1443 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1445 unsigned char *b = skb_tail_pointer(skb);
1447 NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1455 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1457 unsigned char *b = skb_tail_pointer(skb);
1458 struct tc_cbq_lssopt opt;
1461 if (cl->borrow == NULL)
1462 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1463 if (cl->share == NULL)
1464 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1465 opt.ewma_log = cl->ewma_log;
1466 opt.level = cl->level;
1467 opt.avpkt = cl->avpkt;
1468 opt.maxidle = cl->maxidle;
1469 opt.minidle = (u32)(-cl->minidle);
1470 opt.offtime = cl->offtime;
1472 NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1480 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1482 unsigned char *b = skb_tail_pointer(skb);
1483 struct tc_cbq_wrropt opt;
1486 opt.allot = cl->allot;
1487 opt.priority = cl->priority+1;
1488 opt.cpriority = cl->cpriority+1;
1489 opt.weight = cl->weight;
1490 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1498 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1500 unsigned char *b = skb_tail_pointer(skb);
1501 struct tc_cbq_ovl opt;
1503 opt.strategy = cl->ovl_strategy;
1504 opt.priority2 = cl->priority2+1;
1506 opt.penalty = cl->penalty;
1507 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1515 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1517 unsigned char *b = skb_tail_pointer(skb);
1518 struct tc_cbq_fopt opt;
1520 if (cl->split || cl->defmap) {
1521 opt.split = cl->split ? cl->split->common.classid : 0;
1522 opt.defmap = cl->defmap;
1524 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1533 #ifdef CONFIG_NET_CLS_ACT
1534 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1536 unsigned char *b = skb_tail_pointer(skb);
1537 struct tc_cbq_police opt;
1540 opt.police = cl->police;
1543 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1553 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1555 if (cbq_dump_lss(skb, cl) < 0 ||
1556 cbq_dump_rate(skb, cl) < 0 ||
1557 cbq_dump_wrr(skb, cl) < 0 ||
1558 cbq_dump_ovl(skb, cl) < 0 ||
1559 #ifdef CONFIG_NET_CLS_ACT
1560 cbq_dump_police(skb, cl) < 0 ||
1562 cbq_dump_fopt(skb, cl) < 0)
1567 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1569 struct cbq_sched_data *q = qdisc_priv(sch);
1570 struct nlattr *nest;
1572 nest = nla_nest_start(skb, TCA_OPTIONS);
1574 goto nla_put_failure;
1575 if (cbq_dump_attr(skb, &q->link) < 0)
1576 goto nla_put_failure;
1577 nla_nest_end(skb, nest);
1581 nla_nest_cancel(skb, nest);
1586 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1588 struct cbq_sched_data *q = qdisc_priv(sch);
1590 q->link.xstats.avgidle = q->link.avgidle;
1591 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1595 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1596 struct sk_buff *skb, struct tcmsg *tcm)
1598 struct cbq_class *cl = (struct cbq_class*)arg;
1599 struct nlattr *nest;
1602 tcm->tcm_parent = cl->tparent->common.classid;
1604 tcm->tcm_parent = TC_H_ROOT;
1605 tcm->tcm_handle = cl->common.classid;
1606 tcm->tcm_info = cl->q->handle;
1608 nest = nla_nest_start(skb, TCA_OPTIONS);
1610 goto nla_put_failure;
1611 if (cbq_dump_attr(skb, cl) < 0)
1612 goto nla_put_failure;
1613 nla_nest_end(skb, nest);
1617 nla_nest_cancel(skb, nest);
1622 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1623 struct gnet_dump *d)
1625 struct cbq_sched_data *q = qdisc_priv(sch);
1626 struct cbq_class *cl = (struct cbq_class*)arg;
1628 cl->qstats.qlen = cl->q->q.qlen;
1629 cl->xstats.avgidle = cl->avgidle;
1630 cl->xstats.undertime = 0;
1632 if (cl->undertime != PSCHED_PASTPERFECT)
1633 cl->xstats.undertime = cl->undertime - q->now;
1635 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1636 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1637 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1640 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1643 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1646 struct cbq_class *cl = (struct cbq_class*)arg;
1650 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1652 cl->common.classid);
1656 #ifdef CONFIG_NET_CLS_ACT
1657 if (cl->police == TC_POLICE_RECLASSIFY)
1658 new->reshape_fail = cbq_reshape_fail;
1662 *old = xchg(&cl->q, new);
1663 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1665 sch_tree_unlock(sch);
1672 static struct Qdisc *
1673 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1675 struct cbq_class *cl = (struct cbq_class*)arg;
1677 return cl ? cl->q : NULL;
1680 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1682 struct cbq_class *cl = (struct cbq_class *)arg;
1684 if (cl->q->q.qlen == 0)
1685 cbq_deactivate_class(cl);
1688 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1690 struct cbq_sched_data *q = qdisc_priv(sch);
1691 struct cbq_class *cl = cbq_class_lookup(q, classid);
1695 return (unsigned long)cl;
1700 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1702 struct cbq_sched_data *q = qdisc_priv(sch);
1704 BUG_TRAP(!cl->filters);
1706 tcf_destroy_chain(&cl->filter_list);
1707 qdisc_destroy(cl->q);
1708 qdisc_put_rtab(cl->R_tab);
1709 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1715 cbq_destroy(struct Qdisc* sch)
1717 struct cbq_sched_data *q = qdisc_priv(sch);
1718 struct hlist_node *n, *next;
1719 struct cbq_class *cl;
1722 #ifdef CONFIG_NET_CLS_ACT
1726 * Filters must be destroyed first because we don't destroy the
1727 * classes from root to leafs which means that filters can still
1728 * be bound to classes which have been destroyed already. --TGR '04
1730 for (h = 0; h < q->clhash.hashsize; h++) {
1731 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1732 tcf_destroy_chain(&cl->filter_list);
1734 for (h = 0; h < q->clhash.hashsize; h++) {
1735 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1737 cbq_destroy_class(sch, cl);
1739 qdisc_class_hash_destroy(&q->clhash);
1742 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1744 struct cbq_class *cl = (struct cbq_class*)arg;
1746 if (--cl->refcnt == 0) {
1747 #ifdef CONFIG_NET_CLS_ACT
1748 spinlock_t *root_lock = qdisc_root_lock(sch);
1749 struct cbq_sched_data *q = qdisc_priv(sch);
1751 spin_lock_bh(root_lock);
1752 if (q->rx_class == cl)
1754 spin_unlock_bh(root_lock);
1757 cbq_destroy_class(sch, cl);
1762 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1766 struct cbq_sched_data *q = qdisc_priv(sch);
1767 struct cbq_class *cl = (struct cbq_class*)*arg;
1768 struct nlattr *opt = tca[TCA_OPTIONS];
1769 struct nlattr *tb[TCA_CBQ_MAX + 1];
1770 struct cbq_class *parent;
1771 struct qdisc_rate_table *rtab = NULL;
1776 err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1784 cl->tparent->common.classid != parentid)
1786 if (!cl->tparent && parentid != TC_H_ROOT)
1790 if (tb[TCA_CBQ_RATE]) {
1791 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1796 /* Change class parameters */
1799 if (cl->next_alive != NULL)
1800 cbq_deactivate_class(cl);
1803 rtab = xchg(&cl->R_tab, rtab);
1804 qdisc_put_rtab(rtab);
1807 if (tb[TCA_CBQ_LSSOPT])
1808 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1810 if (tb[TCA_CBQ_WRROPT]) {
1812 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1815 if (tb[TCA_CBQ_OVL_STRATEGY])
1816 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1818 #ifdef CONFIG_NET_CLS_ACT
1819 if (tb[TCA_CBQ_POLICE])
1820 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1823 if (tb[TCA_CBQ_FOPT])
1824 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1827 cbq_activate_class(cl);
1829 sch_tree_unlock(sch);
1832 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1833 qdisc_root_lock(sch),
1838 if (parentid == TC_H_ROOT)
1841 if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1842 tb[TCA_CBQ_LSSOPT] == NULL)
1845 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1851 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1855 classid = TC_H_MAKE(sch->handle,0x8000);
1857 for (i=0; i<0x8000; i++) {
1858 if (++q->hgenerator >= 0x8000)
1860 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1866 classid = classid|q->hgenerator;
1871 parent = cbq_class_lookup(q, parentid);
1878 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1884 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1885 &pfifo_qdisc_ops, classid)))
1886 cl->q = &noop_qdisc;
1887 cl->common.classid = classid;
1888 cl->tparent = parent;
1890 cl->allot = parent->allot;
1891 cl->quantum = cl->allot;
1892 cl->weight = cl->R_tab->rate.rate;
1896 cl->borrow = cl->tparent;
1897 if (cl->tparent != &q->link)
1898 cl->share = cl->tparent;
1899 cbq_adjust_levels(parent);
1900 cl->minidle = -0x7FFFFFFF;
1901 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1902 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1903 if (cl->ewma_log==0)
1904 cl->ewma_log = q->link.ewma_log;
1906 cl->maxidle = q->link.maxidle;
1908 cl->avpkt = q->link.avpkt;
1909 cl->overlimit = cbq_ovl_classic;
1910 if (tb[TCA_CBQ_OVL_STRATEGY])
1911 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1912 #ifdef CONFIG_NET_CLS_ACT
1913 if (tb[TCA_CBQ_POLICE])
1914 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1916 if (tb[TCA_CBQ_FOPT])
1917 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1918 sch_tree_unlock(sch);
1920 qdisc_class_hash_grow(sch, &q->clhash);
1923 gen_new_estimator(&cl->bstats, &cl->rate_est,
1924 qdisc_root_lock(sch), tca[TCA_RATE]);
1926 *arg = (unsigned long)cl;
1930 qdisc_put_rtab(rtab);
1934 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1936 struct cbq_sched_data *q = qdisc_priv(sch);
1937 struct cbq_class *cl = (struct cbq_class*)arg;
1940 if (cl->filters || cl->children || cl == &q->link)
1945 qlen = cl->q->q.qlen;
1947 qdisc_tree_decrease_qlen(cl->q, qlen);
1950 cbq_deactivate_class(cl);
1952 if (q->tx_borrowed == cl)
1953 q->tx_borrowed = q->tx_class;
1954 if (q->tx_class == cl) {
1956 q->tx_borrowed = NULL;
1958 #ifdef CONFIG_NET_CLS_ACT
1959 if (q->rx_class == cl)
1963 cbq_unlink_class(cl);
1964 cbq_adjust_levels(cl->tparent);
1966 cbq_sync_defmap(cl);
1969 sch_tree_unlock(sch);
1971 if (--cl->refcnt == 0)
1972 cbq_destroy_class(sch, cl);
1977 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1979 struct cbq_sched_data *q = qdisc_priv(sch);
1980 struct cbq_class *cl = (struct cbq_class *)arg;
1985 return &cl->filter_list;
1988 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1991 struct cbq_sched_data *q = qdisc_priv(sch);
1992 struct cbq_class *p = (struct cbq_class*)parent;
1993 struct cbq_class *cl = cbq_class_lookup(q, classid);
1996 if (p && p->level <= cl->level)
1999 return (unsigned long)cl;
2004 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2006 struct cbq_class *cl = (struct cbq_class*)arg;
2011 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2013 struct cbq_sched_data *q = qdisc_priv(sch);
2014 struct cbq_class *cl;
2015 struct hlist_node *n;
2021 for (h = 0; h < q->clhash.hashsize; h++) {
2022 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2023 if (arg->count < arg->skip) {
2027 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2036 static const struct Qdisc_class_ops cbq_class_ops = {
2039 .qlen_notify = cbq_qlen_notify,
2042 .change = cbq_change_class,
2043 .delete = cbq_delete,
2045 .tcf_chain = cbq_find_tcf,
2046 .bind_tcf = cbq_bind_filter,
2047 .unbind_tcf = cbq_unbind_filter,
2048 .dump = cbq_dump_class,
2049 .dump_stats = cbq_dump_class_stats,
2052 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2054 .cl_ops = &cbq_class_ops,
2056 .priv_size = sizeof(struct cbq_sched_data),
2057 .enqueue = cbq_enqueue,
2058 .dequeue = cbq_dequeue,
2059 .requeue = cbq_requeue,
2063 .destroy = cbq_destroy,
2066 .dump_stats = cbq_dump_stats,
2067 .owner = THIS_MODULE,
2070 static int __init cbq_module_init(void)
2072 return register_qdisc(&cbq_qdisc_ops);
2074 static void __exit cbq_module_exit(void)
2076 unregister_qdisc(&cbq_qdisc_ops);
2078 module_init(cbq_module_init)
2079 module_exit(cbq_module_exit)
2080 MODULE_LICENSE("GPL");