net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int len = skb->len;
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret == NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         ret = qdisc_enqueue(skb, cl->q);
 391         if (ret == NET_XMIT_SUCCESS) {
 392                 sch->q.qlen++;
 393                 sch->bstats.packets++;
 394                 sch->bstats.bytes+=len;
 395                 cbq_mark_toplevel(q, cl);
 396                 if (!cl->next_alive)
 397                         cbq_activate_class(cl);
 398                 return ret;
 399         }
 400
 401         sch->qstats.drops++;
 402         cbq_mark_toplevel(q, cl);
 403         cl->qstats.drops++;
 404         return ret;
 405 }
 406
 407 static int
 408 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 409 {
 410         struct cbq_sched_data *q = qdisc_priv(sch);
 411         struct cbq_class *cl;
 412         int ret;
 413
 414         if ((cl = q->tx_class) == NULL) {
 415                 kfree_skb(skb);
 416                 sch->qstats.drops++;
 417                 return NET_XMIT_CN;
 418         }
 419         q->tx_class = NULL;
 420
 421         cbq_mark_toplevel(q, cl);
 422
 423 #ifdef CONFIG_NET_CLS_ACT
 424         q->rx_class = cl;
 425         cl->q->__parent = sch;
 426 #endif
 427         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 428                 sch->q.qlen++;
 429                 sch->qstats.requeues++;
 430                 if (!cl->next_alive)
 431                         cbq_activate_class(cl);
 432                 return 0;
 433         }
 434         sch->qstats.drops++;
 435         cl->qstats.drops++;
 436         return ret;
 437 }
 438
 439 /* Overlimit actions */
 440
 441 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 442
 443 static void cbq_ovl_classic(struct cbq_class *cl)
 444 {
 445         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 446         psched_tdiff_t delay = cl->undertime - q->now;
 447
 448         if (!cl->delayed) {
 449                 delay += cl->offtime;
 450
 451                 /*
 452                    Class goes to sleep, so that it will have no
 453                    chance to work avgidle. Let's forgive it 8)
 454
 455                    BTW cbq-2.0 has a crap in this
 456                    place, apparently they forgot to shift it by cl->ewma_log.
 457                  */
 458                 if (cl->avgidle < 0)
 459                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 460                 if (cl->avgidle < cl->minidle)
 461                         cl->avgidle = cl->minidle;
 462                 if (delay <= 0)
 463                         delay = 1;
 464                 cl->undertime = q->now + delay;
 465
 466                 cl->xstats.overactions++;
 467                 cl->delayed = 1;
 468         }
 469         if (q->wd_expires == 0 || q->wd_expires > delay)
 470                 q->wd_expires = delay;
 471
 472         /* Dirty work! We must schedule wakeups based on
 473            real available rate, rather than leaf rate,
 474            which may be tiny (even zero).
 475          */
 476         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 477                 struct cbq_class *b;
 478                 psched_tdiff_t base_delay = q->wd_expires;
 479
 480                 for (b = cl->borrow; b; b = b->borrow) {
 481                         delay = b->undertime - q->now;
 482                         if (delay < base_delay) {
 483                                 if (delay <= 0)
 484                                         delay = 1;
 485                                 base_delay = delay;
 486                         }
 487                 }
 488
 489                 q->wd_expires = base_delay;
 490         }
 491 }
 492
 493 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 494    they go overlimit
 495  */
 496
 497 static void cbq_ovl_rclassic(struct cbq_class *cl)
 498 {
 499         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 500         struct cbq_class *this = cl;
 501
 502         do {
 503                 if (cl->level > q->toplevel) {
 504                         cl = NULL;
 505                         break;
 506                 }
 507         } while ((cl = cl->borrow) != NULL);
 508
 509         if (cl == NULL)
 510                 cl = this;
 511         cbq_ovl_classic(cl);
 512 }
 513
 514 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 515
 516 static void cbq_ovl_delay(struct cbq_class *cl)
 517 {
 518         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 519         psched_tdiff_t delay = cl->undertime - q->now;
 520
 521         if (!cl->delayed) {
 522                 psched_time_t sched = q->now;
 523                 ktime_t expires;
 524
 525                 delay += cl->offtime;
 526                 if (cl->avgidle < 0)
 527                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 528                 if (cl->avgidle < cl->minidle)
 529                         cl->avgidle = cl->minidle;
 530                 cl->undertime = q->now + delay;
 531
 532                 if (delay > 0) {
 533                         sched += delay + cl->penalty;
 534                         cl->penalized = sched;
 535                         cl->cpriority = TC_CBQ_MAXPRIO;
 536                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 537
 538                         expires = ktime_set(0, 0);
 539                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 540                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 541                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 542                                                   expires)) > 0)
 543                                 q->delay_timer.expires = expires;
 544                         hrtimer_restart(&q->delay_timer);
 545                         cl->delayed = 1;
 546                         cl->xstats.overactions++;
 547                         return;
 548                 }
 549                 delay = 1;
 550         }
 551         if (q->wd_expires == 0 || q->wd_expires > delay)
 552                 q->wd_expires = delay;
 553 }
 554
 555 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 556
 557 static void cbq_ovl_lowprio(struct cbq_class *cl)
 558 {
 559         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 560
 561         cl->penalized = q->now + cl->penalty;
 562
 563         if (cl->cpriority != cl->priority2) {
 564                 cl->cpriority = cl->priority2;
 565                 q->pmask |= (1<<cl->cpriority);
 566                 cl->xstats.overactions++;
 567         }
 568         cbq_ovl_classic(cl);
 569 }
 570
 571 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 572
 573 static void cbq_ovl_drop(struct cbq_class *cl)
 574 {
 575         if (cl->q->ops->drop)
 576                 if (cl->q->ops->drop(cl->q))
 577                         cl->qdisc->q.qlen--;
 578         cl->xstats.overactions++;
 579         cbq_ovl_classic(cl);
 580 }
 581
 582 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 583                                        psched_time_t now)
 584 {
 585         struct cbq_class *cl;
 586         struct cbq_class *cl_prev = q->active[prio];
 587         psched_time_t sched = now;
 588
 589         if (cl_prev == NULL)
 590                 return 0;
 591
 592         do {
 593                 cl = cl_prev->next_alive;
 594                 if (now - cl->penalized > 0) {
 595                         cl_prev->next_alive = cl->next_alive;
 596                         cl->next_alive = NULL;
 597                         cl->cpriority = cl->priority;
 598                         cl->delayed = 0;
 599                         cbq_activate_class(cl);
 600
 601                         if (cl == q->active[prio]) {
 602                                 q->active[prio] = cl_prev;
 603                                 if (cl == q->active[prio]) {
 604                                         q->active[prio] = NULL;
 605                                         return 0;
 606                                 }
 607                         }
 608
 609                         cl = cl_prev->next_alive;
 610                 } else if (sched - cl->penalized > 0)
 611                         sched = cl->penalized;
 612         } while ((cl_prev = cl) != q->active[prio]);
 613
 614         return sched - now;
 615 }
 616
 617 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 618 {
 619         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 620                                                 delay_timer);
 621         struct Qdisc *sch = q->watchdog.qdisc;
 622         psched_time_t now;
 623         psched_tdiff_t delay = 0;
 624         unsigned pmask;
 625
 626         now = psched_get_time();
 627
 628         pmask = q->pmask;
 629         q->pmask = 0;
 630
 631         while (pmask) {
 632                 int prio = ffz(~pmask);
 633                 psched_tdiff_t tmp;
 634
 635                 pmask &= ~(1<<prio);
 636
 637                 tmp = cbq_undelay_prio(q, prio, now);
 638                 if (tmp > 0) {
 639                         q->pmask |= 1<<prio;
 640                         if (tmp < delay || delay == 0)
 641                                 delay = tmp;
 642                 }
 643         }
 644
 645         if (delay) {
 646                 ktime_t time;
 647
 648                 time = ktime_set(0, 0);
 649                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 650                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 651         }
 652
 653         sch->flags &= ~TCQ_F_THROTTLED;
 654         __netif_schedule(sch);
 655         return HRTIMER_NORESTART;
 656 }
 657
 658 #ifdef CONFIG_NET_CLS_ACT
 659 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 660 {
 661         int len = skb->len;
 662         struct Qdisc *sch = child->__parent;
 663         struct cbq_sched_data *q = qdisc_priv(sch);
 664         struct cbq_class *cl = q->rx_class;
 665
 666         q->rx_class = NULL;
 667
 668         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 669
 670                 cbq_mark_toplevel(q, cl);
 671
 672                 q->rx_class = cl;
 673                 cl->q->__parent = sch;
 674
 675                 if (qdisc_enqueue(skb, cl->q) == 0) {
 676                         sch->q.qlen++;
 677                         sch->bstats.packets++;
 678                         sch->bstats.bytes+=len;
 679                         if (!cl->next_alive)
 680                                 cbq_activate_class(cl);
 681                         return 0;
 682                 }
 683                 sch->qstats.drops++;
 684                 return 0;
 685         }
 686
 687         sch->qstats.drops++;
 688         return -1;
 689 }
 690 #endif
 691
 692 /*
 693    It is mission critical procedure.
 694
 695    We "regenerate" toplevel cutoff, if transmitting class
 696    has backlog and it is not regulated. It is not part of
 697    original CBQ description, but looks more reasonable.
 698    Probably, it is wrong. This question needs further investigation.
 699 */
 700
 701 static __inline__ void
 702 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 703                     struct cbq_class *borrowed)
 704 {
 705         if (cl && q->toplevel >= borrowed->level) {
 706                 if (cl->q->q.qlen > 1) {
 707                         do {
 708                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 709                                         q->toplevel = borrowed->level;
 710                                         return;
 711                                 }
 712                         } while ((borrowed=borrowed->borrow) != NULL);
 713                 }
 714 #if 0
 715         /* It is not necessary now. Uncommenting it
 716            will save CPU cycles, but decrease fairness.
 717          */
 718                 q->toplevel = TC_CBQ_MAXLEVEL;
 719 #endif
 720         }
 721 }
 722
 723 static void
 724 cbq_update(struct cbq_sched_data *q)
 725 {
 726         struct cbq_class *this = q->tx_class;
 727         struct cbq_class *cl = this;
 728         int len = q->tx_len;
 729
 730         q->tx_class = NULL;
 731
 732         for ( ; cl; cl = cl->share) {
 733                 long avgidle = cl->avgidle;
 734                 long idle;
 735
 736                 cl->bstats.packets++;
 737                 cl->bstats.bytes += len;
 738
 739                 /*
 740                    (now - last) is total time between packet right edges.
 741                    (last_pktlen/rate) is "virtual" busy time, so that
 742
 743                          idle = (now - last) - last_pktlen/rate
 744                  */
 745
 746                 idle = q->now - cl->last;
 747                 if ((unsigned long)idle > 128*1024*1024) {
 748                         avgidle = cl->maxidle;
 749                 } else {
 750                         idle -= L2T(cl, len);
 751
 752                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 753                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 754                    cl->avgidle == true_avgidle/W,
 755                    hence:
 756                  */
 757                         avgidle += idle - (avgidle>>cl->ewma_log);
 758                 }
 759
 760                 if (avgidle <= 0) {
 761                         /* Overlimit or at-limit */
 762
 763                         if (avgidle < cl->minidle)
 764                                 avgidle = cl->minidle;
 765
 766                         cl->avgidle = avgidle;
 767
 768                         /* Calculate expected time, when this class
 769                            will be allowed to send.
 770                            It will occur, when:
 771                            (1-W)*true_avgidle + W*delay = 0, i.e.
 772                            idle = (1/W - 1)*(-true_avgidle)
 773                            or
 774                            idle = (1 - W)*(-cl->avgidle);
 775                          */
 776                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 777
 778                         /*
 779                            That is not all.
 780                            To maintain the rate allocated to the class,
 781                            we add to undertime virtual clock,
 782                            necessary to complete transmitted packet.
 783                            (len/phys_bandwidth has been already passed
 784                            to the moment of cbq_update)
 785                          */
 786
 787                         idle -= L2T(&q->link, len);
 788                         idle += L2T(cl, len);
 789
 790                         cl->undertime = q->now + idle;
 791                 } else {
 792                         /* Underlimit */
 793
 794                         cl->undertime = PSCHED_PASTPERFECT;
 795                         if (avgidle > cl->maxidle)
 796                                 cl->avgidle = cl->maxidle;
 797                         else
 798                                 cl->avgidle = avgidle;
 799                 }
 800                 cl->last = q->now;
 801         }
 802
 803         cbq_update_toplevel(q, this, q->tx_borrowed);
 804 }
 805
 806 static __inline__ struct cbq_class *
 807 cbq_under_limit(struct cbq_class *cl)
 808 {
 809         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 810         struct cbq_class *this_cl = cl;
 811
 812         if (cl->tparent == NULL)
 813                 return cl;
 814
 815         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 816                 cl->delayed = 0;
 817                 return cl;
 818         }
 819
 820         do {
 821                 /* It is very suspicious place. Now overlimit
 822                    action is generated for not bounded classes
 823                    only if link is completely congested.
 824                    Though it is in agree with ancestor-only paradigm,
 825                    it looks very stupid. Particularly,
 826                    it means that this chunk of code will either
 827                    never be called or result in strong amplification
 828                    of burstiness. Dangerous, silly, and, however,
 829                    no another solution exists.
 830                  */
 831                 if ((cl = cl->borrow) == NULL) {
 832                         this_cl->qstats.overlimits++;
 833                         this_cl->overlimit(this_cl);
 834                         return NULL;
 835                 }
 836                 if (cl->level > q->toplevel)
 837                         return NULL;
 838         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 839
 840         cl->delayed = 0;
 841         return cl;
 842 }
 843
 844 static __inline__ struct sk_buff *
 845 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 846 {
 847         struct cbq_sched_data *q = qdisc_priv(sch);
 848         struct cbq_class *cl_tail, *cl_prev, *cl;
 849         struct sk_buff *skb;
 850         int deficit;
 851
 852         cl_tail = cl_prev = q->active[prio];
 853         cl = cl_prev->next_alive;
 854
 855         do {
 856                 deficit = 0;
 857
 858                 /* Start round */
 859                 do {
 860                         struct cbq_class *borrow = cl;
 861
 862                         if (cl->q->q.qlen &&
 863                             (borrow = cbq_under_limit(cl)) == NULL)
 864                                 goto skip_class;
 865
 866                         if (cl->deficit <= 0) {
 867                                 /* Class exhausted its allotment per
 868                                    this round. Switch to the next one.
 869                                  */
 870                                 deficit = 1;
 871                                 cl->deficit += cl->quantum;
 872                                 goto next_class;
 873                         }
 874
 875                         skb = cl->q->dequeue(cl->q);
 876
 877                         /* Class did not give us any skb :-(
 878                            It could occur even if cl->q->q.qlen != 0
 879                            f.e. if cl->q == "tbf"
 880                          */
 881                         if (skb == NULL)
 882                                 goto skip_class;
 883
 884                         cl->deficit -= skb->len;
 885                         q->tx_class = cl;
 886                         q->tx_borrowed = borrow;
 887                         if (borrow != cl) {
 888 #ifndef CBQ_XSTATS_BORROWS_BYTES
 889                                 borrow->xstats.borrows++;
 890                                 cl->xstats.borrows++;
 891 #else
 892                                 borrow->xstats.borrows += skb->len;
 893                                 cl->xstats.borrows += skb->len;
 894 #endif
 895                         }
 896                         q->tx_len = skb->len;
 897
 898                         if (cl->deficit <= 0) {
 899                                 q->active[prio] = cl;
 900                                 cl = cl->next_alive;
 901                                 cl->deficit += cl->quantum;
 902                         }
 903                         return skb;
 904
 905 skip_class:
 906                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 907                                 /* Class is empty or penalized.
 908                                    Unlink it from active chain.
 909                                  */
 910                                 cl_prev->next_alive = cl->next_alive;
 911                                 cl->next_alive = NULL;
 912
 913                                 /* Did cl_tail point to it? */
 914                                 if (cl == cl_tail) {
 915                                         /* Repair it! */
 916                                         cl_tail = cl_prev;
 917
 918                                         /* Was it the last class in this band? */
 919                                         if (cl == cl_tail) {
 920                                                 /* Kill the band! */
 921                                                 q->active[prio] = NULL;
 922                                                 q->activemask &= ~(1<<prio);
 923                                                 if (cl->q->q.qlen)
 924                                                         cbq_activate_class(cl);
 925                                                 return NULL;
 926                                         }
 927
 928                                         q->active[prio] = cl_tail;
 929                                 }
 930                                 if (cl->q->q.qlen)
 931                                         cbq_activate_class(cl);
 932
 933                                 cl = cl_prev;
 934                         }
 935
 936 next_class:
 937                         cl_prev = cl;
 938                         cl = cl->next_alive;
 939                 } while (cl_prev != cl_tail);
 940         } while (deficit);
 941
 942         q->active[prio] = cl_prev;
 943
 944         return NULL;
 945 }
 946
 947 static __inline__ struct sk_buff *
 948 cbq_dequeue_1(struct Qdisc *sch)
 949 {
 950         struct cbq_sched_data *q = qdisc_priv(sch);
 951         struct sk_buff *skb;
 952         unsigned activemask;
 953
 954         activemask = q->activemask&0xFF;
 955         while (activemask) {
 956                 int prio = ffz(~activemask);
 957                 activemask &= ~(1<<prio);
 958                 skb = cbq_dequeue_prio(sch, prio);
 959                 if (skb)
 960                         return skb;
 961         }
 962         return NULL;
 963 }
 964
 965 static struct sk_buff *
 966 cbq_dequeue(struct Qdisc *sch)
 967 {
 968         struct sk_buff *skb;
 969         struct cbq_sched_data *q = qdisc_priv(sch);
 970         psched_time_t now;
 971         psched_tdiff_t incr;
 972
 973         now = psched_get_time();
 974         incr = now - q->now_rt;
 975
 976         if (q->tx_class) {
 977                 psched_tdiff_t incr2;
 978                 /* Time integrator. We calculate EOS time
 979                    by adding expected packet transmission time.
 980                    If real time is greater, we warp artificial clock,
 981                    so that:
 982
 983                    cbq_time = max(real_time, work);
 984                  */
 985                 incr2 = L2T(&q->link, q->tx_len);
 986                 q->now += incr2;
 987                 cbq_update(q);
 988                 if ((incr -= incr2) < 0)
 989                         incr = 0;
 990         }
 991         q->now += incr;
 992         q->now_rt = now;
 993
 994         for (;;) {
 995                 q->wd_expires = 0;
 996
 997                 skb = cbq_dequeue_1(sch);
 998                 if (skb) {
 999                         sch->q.qlen--;
1000                         sch->flags &= ~TCQ_F_THROTTLED;
1001                         return skb;
1002                 }
1003
1004                 /* All the classes are overlimit.
1005
1006                    It is possible, if:
1007
1008                    1. Scheduler is empty.
1009                    2. Toplevel cutoff inhibited borrowing.
1010                    3. Root class is overlimit.
1011
1012                    Reset 2d and 3d conditions and retry.
1013
1014                    Note, that NS and cbq-2.0 are buggy, peeking
1015                    an arbitrary class is appropriate for ancestor-only
1016                    sharing, but not for toplevel algorithm.
1017
1018                    Our version is better, but slower, because it requires
1019                    two passes, but it is unavoidable with top-level sharing.
1020                 */
1021
1022                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1023                     q->link.undertime == PSCHED_PASTPERFECT)
1024                         break;
1025
1026                 q->toplevel = TC_CBQ_MAXLEVEL;
1027                 q->link.undertime = PSCHED_PASTPERFECT;
1028         }
1029
1030         /* No packets in scheduler or nobody wants to give them to us :-(
1031            Sigh... start watchdog timer in the last case. */
1032
1033         if (sch->q.qlen) {
1034                 sch->qstats.overlimits++;
1035                 if (q->wd_expires)
1036                         qdisc_watchdog_schedule(&q->watchdog,
1037                                                 now + q->wd_expires);
1038         }
1039         return NULL;
1040 }
1041
1042 /* CBQ class maintanance routines */
1043
1044 static void cbq_adjust_levels(struct cbq_class *this)
1045 {
1046         if (this == NULL)
1047                 return;
1048
1049         do {
1050                 int level = 0;
1051                 struct cbq_class *cl;
1052
1053                 if ((cl = this->children) != NULL) {
1054                         do {
1055                                 if (cl->level > level)
1056                                         level = cl->level;
1057                         } while ((cl = cl->sibling) != this->children);
1058                 }
1059                 this->level = level+1;
1060         } while ((this = this->tparent) != NULL);
1061 }
1062
1063 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1064 {
1065         struct cbq_class *cl;
1066         struct hlist_node *n;
1067         unsigned int h;
1068
1069         if (q->quanta[prio] == 0)
1070                 return;
1071
1072         for (h = 0; h < q->clhash.hashsize; h++) {
1073                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1074                         /* BUGGGG... Beware! This expression suffer of
1075                            arithmetic overflows!
1076                          */
1077                         if (cl->priority == prio) {
1078                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1079                                         q->quanta[prio];
1080                         }
1081                         if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1082                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1083                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1084                         }
1085                 }
1086         }
1087 }
1088
1089 static void cbq_sync_defmap(struct cbq_class *cl)
1090 {
1091         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1092         struct cbq_class *split = cl->split;
1093         unsigned h;
1094         int i;
1095
1096         if (split == NULL)
1097                 return;
1098
1099         for (i=0; i<=TC_PRIO_MAX; i++) {
1100                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1101                         split->defaults[i] = NULL;
1102         }
1103
1104         for (i=0; i<=TC_PRIO_MAX; i++) {
1105                 int level = split->level;
1106
1107                 if (split->defaults[i])
1108                         continue;
1109
1110                 for (h = 0; h < q->clhash.hashsize; h++) {
1111                         struct hlist_node *n;
1112                         struct cbq_class *c;
1113
1114                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1115                                              common.hnode) {
1116                                 if (c->split == split && c->level < level &&
1117                                     c->defmap&(1<<i)) {
1118                                         split->defaults[i] = c;
1119                                         level = c->level;
1120                                 }
1121                         }
1122                 }
1123         }
1124 }
1125
1126 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1127 {
1128         struct cbq_class *split = NULL;
1129
1130         if (splitid == 0) {
1131                 if ((split = cl->split) == NULL)
1132                         return;
1133                 splitid = split->common.classid;
1134         }
1135
1136         if (split == NULL || split->common.classid != splitid) {
1137                 for (split = cl->tparent; split; split = split->tparent)
1138                         if (split->common.classid == splitid)
1139                                 break;
1140         }
1141
1142         if (split == NULL)
1143                 return;
1144
1145         if (cl->split != split) {
1146                 cl->defmap = 0;
1147                 cbq_sync_defmap(cl);
1148                 cl->split = split;
1149                 cl->defmap = def&mask;
1150         } else
1151                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1152
1153         cbq_sync_defmap(cl);
1154 }
1155
1156 static void cbq_unlink_class(struct cbq_class *this)
1157 {
1158         struct cbq_class *cl, **clp;
1159         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1160
1161         qdisc_class_hash_remove(&q->clhash, &this->common);
1162
1163         if (this->tparent) {
1164                 clp=&this->sibling;
1165                 cl = *clp;
1166                 do {
1167                         if (cl == this) {
1168                                 *clp = cl->sibling;
1169                                 break;
1170                         }
1171                         clp = &cl->sibling;
1172                 } while ((cl = *clp) != this->sibling);
1173
1174                 if (this->tparent->children == this) {
1175                         this->tparent->children = this->sibling;
1176                         if (this->sibling == this)
1177                                 this->tparent->children = NULL;
1178                 }
1179         } else {
1180                 BUG_TRAP(this->sibling == this);
1181         }
1182 }
1183
1184 static void cbq_link_class(struct cbq_class *this)
1185 {
1186         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1187         struct cbq_class *parent = this->tparent;
1188
1189         this->sibling = this;
1190         qdisc_class_hash_insert(&q->clhash, &this->common);
1191
1192         if (parent == NULL)
1193                 return;
1194
1195         if (parent->children == NULL) {
1196                 parent->children = this;
1197         } else {
1198                 this->sibling = parent->children->sibling;
1199                 parent->children->sibling = this;
1200         }
1201 }
1202
1203 static unsigned int cbq_drop(struct Qdisc* sch)
1204 {
1205         struct cbq_sched_data *q = qdisc_priv(sch);
1206         struct cbq_class *cl, *cl_head;
1207         int prio;
1208         unsigned int len;
1209
1210         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1211                 if ((cl_head = q->active[prio]) == NULL)
1212                         continue;
1213
1214                 cl = cl_head;
1215                 do {
1216                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1217                                 sch->q.qlen--;
1218                                 if (!cl->q->q.qlen)
1219                                         cbq_deactivate_class(cl);
1220                                 return len;
1221                         }
1222                 } while ((cl = cl->next_alive) != cl_head);
1223         }
1224         return 0;
1225 }
1226
1227 static void
1228 cbq_reset(struct Qdisc* sch)
1229 {
1230         struct cbq_sched_data *q = qdisc_priv(sch);
1231         struct cbq_class *cl;
1232         struct hlist_node *n;
1233         int prio;
1234         unsigned h;
1235
1236         q->activemask = 0;
1237         q->pmask = 0;
1238         q->tx_class = NULL;
1239         q->tx_borrowed = NULL;
1240         qdisc_watchdog_cancel(&q->watchdog);
1241         hrtimer_cancel(&q->delay_timer);
1242         q->toplevel = TC_CBQ_MAXLEVEL;
1243         q->now = psched_get_time();
1244         q->now_rt = q->now;
1245
1246         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1247                 q->active[prio] = NULL;
1248
1249         for (h = 0; h < q->clhash.hashsize; h++) {
1250                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1251                         qdisc_reset(cl->q);
1252
1253                         cl->next_alive = NULL;
1254                         cl->undertime = PSCHED_PASTPERFECT;
1255                         cl->avgidle = cl->maxidle;
1256                         cl->deficit = cl->quantum;
1257                         cl->cpriority = cl->priority;
1258                 }
1259         }
1260         sch->q.qlen = 0;
1261 }
1262
1263
1264 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1265 {
1266         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1267                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1268                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1269         }
1270         if (lss->change&TCF_CBQ_LSS_EWMA)
1271                 cl->ewma_log = lss->ewma_log;
1272         if (lss->change&TCF_CBQ_LSS_AVPKT)
1273                 cl->avpkt = lss->avpkt;
1274         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1275                 cl->minidle = -(long)lss->minidle;
1276         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1277                 cl->maxidle = lss->maxidle;
1278                 cl->avgidle = lss->maxidle;
1279         }
1280         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1281                 cl->offtime = lss->offtime;
1282         return 0;
1283 }
1284
1285 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1286 {
1287         q->nclasses[cl->priority]--;
1288         q->quanta[cl->priority] -= cl->weight;
1289         cbq_normalize_quanta(q, cl->priority);
1290 }
1291
1292 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1293 {
1294         q->nclasses[cl->priority]++;
1295         q->quanta[cl->priority] += cl->weight;
1296         cbq_normalize_quanta(q, cl->priority);
1297 }
1298
1299 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1300 {
1301         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1302
1303         if (wrr->allot)
1304                 cl->allot = wrr->allot;
1305         if (wrr->weight)
1306                 cl->weight = wrr->weight;
1307         if (wrr->priority) {
1308                 cl->priority = wrr->priority-1;
1309                 cl->cpriority = cl->priority;
1310                 if (cl->priority >= cl->priority2)
1311                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1312         }
1313
1314         cbq_addprio(q, cl);
1315         return 0;
1316 }
1317
1318 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1319 {
1320         switch (ovl->strategy) {
1321         case TC_CBQ_OVL_CLASSIC:
1322                 cl->overlimit = cbq_ovl_classic;
1323                 break;
1324         case TC_CBQ_OVL_DELAY:
1325                 cl->overlimit = cbq_ovl_delay;
1326                 break;
1327         case TC_CBQ_OVL_LOWPRIO:
1328                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1329                     ovl->priority2-1 <= cl->priority)
1330                         return -EINVAL;
1331                 cl->priority2 = ovl->priority2-1;
1332                 cl->overlimit = cbq_ovl_lowprio;
1333                 break;
1334         case TC_CBQ_OVL_DROP:
1335                 cl->overlimit = cbq_ovl_drop;
1336                 break;
1337         case TC_CBQ_OVL_RCLASSIC:
1338                 cl->overlimit = cbq_ovl_rclassic;
1339                 break;
1340         default:
1341                 return -EINVAL;
1342         }
1343         cl->penalty = ovl->penalty;
1344         return 0;
1345 }
1346
1347 #ifdef CONFIG_NET_CLS_ACT
1348 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1349 {
1350         cl->police = p->police;
1351
1352         if (cl->q->handle) {
1353                 if (p->police == TC_POLICE_RECLASSIFY)
1354                         cl->q->reshape_fail = cbq_reshape_fail;
1355                 else
1356                         cl->q->reshape_fail = NULL;
1357         }
1358         return 0;
1359 }
1360 #endif
1361
1362 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1363 {
1364         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1365         return 0;
1366 }
1367
1368 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1369         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1370         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1371         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1372         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1373         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1374         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1375         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1376 };
1377
1378 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1379 {
1380         struct cbq_sched_data *q = qdisc_priv(sch);
1381         struct nlattr *tb[TCA_CBQ_MAX + 1];
1382         struct tc_ratespec *r;
1383         int err;
1384
1385         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1386         if (err < 0)
1387                 return err;
1388
1389         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1390                 return -EINVAL;
1391
1392         r = nla_data(tb[TCA_CBQ_RATE]);
1393
1394         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1395                 return -EINVAL;
1396
1397         err = qdisc_class_hash_init(&q->clhash);
1398         if (err < 0)
1399                 goto put_rtab;
1400
1401         q->link.refcnt = 1;
1402         q->link.sibling = &q->link;
1403         q->link.common.classid = sch->handle;
1404         q->link.qdisc = sch;
1405         if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1406                                             &pfifo_qdisc_ops,
1407                                             sch->handle)))
1408                 q->link.q = &noop_qdisc;
1409
1410         q->link.priority = TC_CBQ_MAXPRIO-1;
1411         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1412         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1413         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1414         q->link.overlimit = cbq_ovl_classic;
1415         q->link.allot = psched_mtu(qdisc_dev(sch));
1416         q->link.quantum = q->link.allot;
1417         q->link.weight = q->link.R_tab->rate.rate;
1418
1419         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1420         q->link.avpkt = q->link.allot/2;
1421         q->link.minidle = -0x7FFFFFFF;
1422
1423         qdisc_watchdog_init(&q->watchdog, sch);
1424         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1425         q->delay_timer.function = cbq_undelay;
1426         q->toplevel = TC_CBQ_MAXLEVEL;
1427         q->now = psched_get_time();
1428         q->now_rt = q->now;
1429
1430         cbq_link_class(&q->link);
1431
1432         if (tb[TCA_CBQ_LSSOPT])
1433                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1434
1435         cbq_addprio(q, &q->link);
1436         return 0;
1437
1438 put_rtab:
1439         qdisc_put_rtab(q->link.R_tab);
1440         return err;
1441 }
1442
1443 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1444 {
1445         unsigned char *b = skb_tail_pointer(skb);
1446
1447         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1448         return skb->len;
1449
1450 nla_put_failure:
1451         nlmsg_trim(skb, b);
1452         return -1;
1453 }
1454
1455 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1456 {
1457         unsigned char *b = skb_tail_pointer(skb);
1458         struct tc_cbq_lssopt opt;
1459
1460         opt.flags = 0;
1461         if (cl->borrow == NULL)
1462                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1463         if (cl->share == NULL)
1464                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1465         opt.ewma_log = cl->ewma_log;
1466         opt.level = cl->level;
1467         opt.avpkt = cl->avpkt;
1468         opt.maxidle = cl->maxidle;
1469         opt.minidle = (u32)(-cl->minidle);
1470         opt.offtime = cl->offtime;
1471         opt.change = ~0;
1472         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1473         return skb->len;
1474
1475 nla_put_failure:
1476         nlmsg_trim(skb, b);
1477         return -1;
1478 }
1479
1480 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1481 {
1482         unsigned char *b = skb_tail_pointer(skb);
1483         struct tc_cbq_wrropt opt;
1484
1485         opt.flags = 0;
1486         opt.allot = cl->allot;
1487         opt.priority = cl->priority+1;
1488         opt.cpriority = cl->cpriority+1;
1489         opt.weight = cl->weight;
1490         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1491         return skb->len;
1492
1493 nla_put_failure:
1494         nlmsg_trim(skb, b);
1495         return -1;
1496 }
1497
1498 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1499 {
1500         unsigned char *b = skb_tail_pointer(skb);
1501         struct tc_cbq_ovl opt;
1502
1503         opt.strategy = cl->ovl_strategy;
1504         opt.priority2 = cl->priority2+1;
1505         opt.pad = 0;
1506         opt.penalty = cl->penalty;
1507         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1508         return skb->len;
1509
1510 nla_put_failure:
1511         nlmsg_trim(skb, b);
1512         return -1;
1513 }
1514
1515 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1516 {
1517         unsigned char *b = skb_tail_pointer(skb);
1518         struct tc_cbq_fopt opt;
1519
1520         if (cl->split || cl->defmap) {
1521                 opt.split = cl->split ? cl->split->common.classid : 0;
1522                 opt.defmap = cl->defmap;
1523                 opt.defchange = ~0;
1524                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1525         }
1526         return skb->len;
1527
1528 nla_put_failure:
1529         nlmsg_trim(skb, b);
1530         return -1;
1531 }
1532
1533 #ifdef CONFIG_NET_CLS_ACT
1534 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1535 {
1536         unsigned char *b = skb_tail_pointer(skb);
1537         struct tc_cbq_police opt;
1538
1539         if (cl->police) {
1540                 opt.police = cl->police;
1541                 opt.__res1 = 0;
1542                 opt.__res2 = 0;
1543                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1544         }
1545         return skb->len;
1546
1547 nla_put_failure:
1548         nlmsg_trim(skb, b);
1549         return -1;
1550 }
1551 #endif
1552
1553 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1554 {
1555         if (cbq_dump_lss(skb, cl) < 0 ||
1556             cbq_dump_rate(skb, cl) < 0 ||
1557             cbq_dump_wrr(skb, cl) < 0 ||
1558             cbq_dump_ovl(skb, cl) < 0 ||
1559 #ifdef CONFIG_NET_CLS_ACT
1560             cbq_dump_police(skb, cl) < 0 ||
1561 #endif
1562             cbq_dump_fopt(skb, cl) < 0)
1563                 return -1;
1564         return 0;
1565 }
1566
1567 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1568 {
1569         struct cbq_sched_data *q = qdisc_priv(sch);
1570         struct nlattr *nest;
1571
1572         nest = nla_nest_start(skb, TCA_OPTIONS);
1573         if (nest == NULL)
1574                 goto nla_put_failure;
1575         if (cbq_dump_attr(skb, &q->link) < 0)
1576                 goto nla_put_failure;
1577         nla_nest_end(skb, nest);
1578         return skb->len;
1579
1580 nla_put_failure:
1581         nla_nest_cancel(skb, nest);
1582         return -1;
1583 }
1584
1585 static int
1586 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1587 {
1588         struct cbq_sched_data *q = qdisc_priv(sch);
1589
1590         q->link.xstats.avgidle = q->link.avgidle;
1591         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1592 }
1593
1594 static int
1595 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1596                struct sk_buff *skb, struct tcmsg *tcm)
1597 {
1598         struct cbq_class *cl = (struct cbq_class*)arg;
1599         struct nlattr *nest;
1600
1601         if (cl->tparent)
1602                 tcm->tcm_parent = cl->tparent->common.classid;
1603         else
1604                 tcm->tcm_parent = TC_H_ROOT;
1605         tcm->tcm_handle = cl->common.classid;
1606         tcm->tcm_info = cl->q->handle;
1607
1608         nest = nla_nest_start(skb, TCA_OPTIONS);
1609         if (nest == NULL)
1610                 goto nla_put_failure;
1611         if (cbq_dump_attr(skb, cl) < 0)
1612                 goto nla_put_failure;
1613         nla_nest_end(skb, nest);
1614         return skb->len;
1615
1616 nla_put_failure:
1617         nla_nest_cancel(skb, nest);
1618         return -1;
1619 }
1620
1621 static int
1622 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1623         struct gnet_dump *d)
1624 {
1625         struct cbq_sched_data *q = qdisc_priv(sch);
1626         struct cbq_class *cl = (struct cbq_class*)arg;
1627
1628         cl->qstats.qlen = cl->q->q.qlen;
1629         cl->xstats.avgidle = cl->avgidle;
1630         cl->xstats.undertime = 0;
1631
1632         if (cl->undertime != PSCHED_PASTPERFECT)
1633                 cl->xstats.undertime = cl->undertime - q->now;
1634
1635         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1636             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1637             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1638                 return -1;
1639
1640         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1641 }
1642
1643 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1644                      struct Qdisc **old)
1645 {
1646         struct cbq_class *cl = (struct cbq_class*)arg;
1647
1648         if (cl) {
1649                 if (new == NULL) {
1650                         new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1651                                                 &pfifo_qdisc_ops,
1652                                                 cl->common.classid);
1653                         if (new == NULL)
1654                                 return -ENOBUFS;
1655                 } else {
1656 #ifdef CONFIG_NET_CLS_ACT
1657                         if (cl->police == TC_POLICE_RECLASSIFY)
1658                                 new->reshape_fail = cbq_reshape_fail;
1659 #endif
1660                 }
1661                 sch_tree_lock(sch);
1662                 *old = xchg(&cl->q, new);
1663                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1664                 qdisc_reset(*old);
1665                 sch_tree_unlock(sch);
1666
1667                 return 0;
1668         }
1669         return -ENOENT;
1670 }
1671
1672 static struct Qdisc *
1673 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1674 {
1675         struct cbq_class *cl = (struct cbq_class*)arg;
1676
1677         return cl ? cl->q : NULL;
1678 }
1679
1680 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1681 {
1682         struct cbq_class *cl = (struct cbq_class *)arg;
1683
1684         if (cl->q->q.qlen == 0)
1685                 cbq_deactivate_class(cl);
1686 }
1687
1688 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1689 {
1690         struct cbq_sched_data *q = qdisc_priv(sch);
1691         struct cbq_class *cl = cbq_class_lookup(q, classid);
1692
1693         if (cl) {
1694                 cl->refcnt++;
1695                 return (unsigned long)cl;
1696         }
1697         return 0;
1698 }
1699
1700 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1701 {
1702         struct cbq_sched_data *q = qdisc_priv(sch);
1703
1704         BUG_TRAP(!cl->filters);
1705
1706         tcf_destroy_chain(&cl->filter_list);
1707         qdisc_destroy(cl->q);
1708         qdisc_put_rtab(cl->R_tab);
1709         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1710         if (cl != &q->link)
1711                 kfree(cl);
1712 }
1713
1714 static void
1715 cbq_destroy(struct Qdisc* sch)
1716 {
1717         struct cbq_sched_data *q = qdisc_priv(sch);
1718         struct hlist_node *n, *next;
1719         struct cbq_class *cl;
1720         unsigned h;
1721
1722 #ifdef CONFIG_NET_CLS_ACT
1723         q->rx_class = NULL;
1724 #endif
1725         /*
1726          * Filters must be destroyed first because we don't destroy the
1727          * classes from root to leafs which means that filters can still
1728          * be bound to classes which have been destroyed already. --TGR '04
1729          */
1730         for (h = 0; h < q->clhash.hashsize; h++) {
1731                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1732                         tcf_destroy_chain(&cl->filter_list);
1733         }
1734         for (h = 0; h < q->clhash.hashsize; h++) {
1735                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1736                                           common.hnode)
1737                         cbq_destroy_class(sch, cl);
1738         }
1739         qdisc_class_hash_destroy(&q->clhash);
1740 }
1741
1742 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1743 {
1744         struct cbq_class *cl = (struct cbq_class*)arg;
1745
1746         if (--cl->refcnt == 0) {
1747 #ifdef CONFIG_NET_CLS_ACT
1748                 spinlock_t *root_lock = qdisc_root_lock(sch);
1749                 struct cbq_sched_data *q = qdisc_priv(sch);
1750
1751                 spin_lock_bh(root_lock);
1752                 if (q->rx_class == cl)
1753                         q->rx_class = NULL;
1754                 spin_unlock_bh(root_lock);
1755 #endif
1756
1757                 cbq_destroy_class(sch, cl);
1758         }
1759 }
1760
1761 static int
1762 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1763                  unsigned long *arg)
1764 {
1765         int err;
1766         struct cbq_sched_data *q = qdisc_priv(sch);
1767         struct cbq_class *cl = (struct cbq_class*)*arg;
1768         struct nlattr *opt = tca[TCA_OPTIONS];
1769         struct nlattr *tb[TCA_CBQ_MAX + 1];
1770         struct cbq_class *parent;
1771         struct qdisc_rate_table *rtab = NULL;
1772
1773         if (opt == NULL)
1774                 return -EINVAL;
1775
1776         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1777         if (err < 0)
1778                 return err;
1779
1780         if (cl) {
1781                 /* Check parent */
1782                 if (parentid) {
1783                         if (cl->tparent &&
1784                             cl->tparent->common.classid != parentid)
1785                                 return -EINVAL;
1786                         if (!cl->tparent && parentid != TC_H_ROOT)
1787                                 return -EINVAL;
1788                 }
1789
1790                 if (tb[TCA_CBQ_RATE]) {
1791                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1792                         if (rtab == NULL)
1793                                 return -EINVAL;
1794                 }
1795
1796                 /* Change class parameters */
1797                 sch_tree_lock(sch);
1798
1799                 if (cl->next_alive != NULL)
1800                         cbq_deactivate_class(cl);
1801
1802                 if (rtab) {
1803                         rtab = xchg(&cl->R_tab, rtab);
1804                         qdisc_put_rtab(rtab);
1805                 }
1806
1807                 if (tb[TCA_CBQ_LSSOPT])
1808                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1809
1810                 if (tb[TCA_CBQ_WRROPT]) {
1811                         cbq_rmprio(q, cl);
1812                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1813                 }
1814
1815                 if (tb[TCA_CBQ_OVL_STRATEGY])
1816                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1817
1818 #ifdef CONFIG_NET_CLS_ACT
1819                 if (tb[TCA_CBQ_POLICE])
1820                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1821 #endif
1822
1823                 if (tb[TCA_CBQ_FOPT])
1824                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1825
1826                 if (cl->q->q.qlen)
1827                         cbq_activate_class(cl);
1828
1829                 sch_tree_unlock(sch);
1830
1831                 if (tca[TCA_RATE])
1832                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1833                                               qdisc_root_lock(sch),
1834                                               tca[TCA_RATE]);
1835                 return 0;
1836         }
1837
1838         if (parentid == TC_H_ROOT)
1839                 return -EINVAL;
1840
1841         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1842             tb[TCA_CBQ_LSSOPT] == NULL)
1843                 return -EINVAL;
1844
1845         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1846         if (rtab == NULL)
1847                 return -EINVAL;
1848
1849         if (classid) {
1850                 err = -EINVAL;
1851                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1852                         goto failure;
1853         } else {
1854                 int i;
1855                 classid = TC_H_MAKE(sch->handle,0x8000);
1856
1857                 for (i=0; i<0x8000; i++) {
1858                         if (++q->hgenerator >= 0x8000)
1859                                 q->hgenerator = 1;
1860                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1861                                 break;
1862                 }
1863                 err = -ENOSR;
1864                 if (i >= 0x8000)
1865                         goto failure;
1866                 classid = classid|q->hgenerator;
1867         }
1868
1869         parent = &q->link;
1870         if (parentid) {
1871                 parent = cbq_class_lookup(q, parentid);
1872                 err = -EINVAL;
1873                 if (parent == NULL)
1874                         goto failure;
1875         }
1876
1877         err = -ENOBUFS;
1878         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1879         if (cl == NULL)
1880                 goto failure;
1881         cl->R_tab = rtab;
1882         rtab = NULL;
1883         cl->refcnt = 1;
1884         if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
1885                                         &pfifo_qdisc_ops, classid)))
1886                 cl->q = &noop_qdisc;
1887         cl->common.classid = classid;
1888         cl->tparent = parent;
1889         cl->qdisc = sch;
1890         cl->allot = parent->allot;
1891         cl->quantum = cl->allot;
1892         cl->weight = cl->R_tab->rate.rate;
1893
1894         sch_tree_lock(sch);
1895         cbq_link_class(cl);
1896         cl->borrow = cl->tparent;
1897         if (cl->tparent != &q->link)
1898                 cl->share = cl->tparent;
1899         cbq_adjust_levels(parent);
1900         cl->minidle = -0x7FFFFFFF;
1901         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1902         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1903         if (cl->ewma_log==0)
1904                 cl->ewma_log = q->link.ewma_log;
1905         if (cl->maxidle==0)
1906                 cl->maxidle = q->link.maxidle;
1907         if (cl->avpkt==0)
1908                 cl->avpkt = q->link.avpkt;
1909         cl->overlimit = cbq_ovl_classic;
1910         if (tb[TCA_CBQ_OVL_STRATEGY])
1911                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1912 #ifdef CONFIG_NET_CLS_ACT
1913         if (tb[TCA_CBQ_POLICE])
1914                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1915 #endif
1916         if (tb[TCA_CBQ_FOPT])
1917                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1918         sch_tree_unlock(sch);
1919
1920         qdisc_class_hash_grow(sch, &q->clhash);
1921
1922         if (tca[TCA_RATE])
1923                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1924                                   qdisc_root_lock(sch), tca[TCA_RATE]);
1925
1926         *arg = (unsigned long)cl;
1927         return 0;
1928
1929 failure:
1930         qdisc_put_rtab(rtab);
1931         return err;
1932 }
1933
1934 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1935 {
1936         struct cbq_sched_data *q = qdisc_priv(sch);
1937         struct cbq_class *cl = (struct cbq_class*)arg;
1938         unsigned int qlen;
1939
1940         if (cl->filters || cl->children || cl == &q->link)
1941                 return -EBUSY;
1942
1943         sch_tree_lock(sch);
1944
1945         qlen = cl->q->q.qlen;
1946         qdisc_reset(cl->q);
1947         qdisc_tree_decrease_qlen(cl->q, qlen);
1948
1949         if (cl->next_alive)
1950                 cbq_deactivate_class(cl);
1951
1952         if (q->tx_borrowed == cl)
1953                 q->tx_borrowed = q->tx_class;
1954         if (q->tx_class == cl) {
1955                 q->tx_class = NULL;
1956                 q->tx_borrowed = NULL;
1957         }
1958 #ifdef CONFIG_NET_CLS_ACT
1959         if (q->rx_class == cl)
1960                 q->rx_class = NULL;
1961 #endif
1962
1963         cbq_unlink_class(cl);
1964         cbq_adjust_levels(cl->tparent);
1965         cl->defmap = 0;
1966         cbq_sync_defmap(cl);
1967
1968         cbq_rmprio(q, cl);
1969         sch_tree_unlock(sch);
1970
1971         if (--cl->refcnt == 0)
1972                 cbq_destroy_class(sch, cl);
1973
1974         return 0;
1975 }
1976
1977 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1978 {
1979         struct cbq_sched_data *q = qdisc_priv(sch);
1980         struct cbq_class *cl = (struct cbq_class *)arg;
1981
1982         if (cl == NULL)
1983                 cl = &q->link;
1984
1985         return &cl->filter_list;
1986 }
1987
1988 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1989                                      u32 classid)
1990 {
1991         struct cbq_sched_data *q = qdisc_priv(sch);
1992         struct cbq_class *p = (struct cbq_class*)parent;
1993         struct cbq_class *cl = cbq_class_lookup(q, classid);
1994
1995         if (cl) {
1996                 if (p && p->level <= cl->level)
1997                         return 0;
1998                 cl->filters++;
1999                 return (unsigned long)cl;
2000         }
2001         return 0;
2002 }
2003
2004 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2005 {
2006         struct cbq_class *cl = (struct cbq_class*)arg;
2007
2008         cl->filters--;
2009 }
2010
2011 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2012 {
2013         struct cbq_sched_data *q = qdisc_priv(sch);
2014         struct cbq_class *cl;
2015         struct hlist_node *n;
2016         unsigned h;
2017
2018         if (arg->stop)
2019                 return;
2020
2021         for (h = 0; h < q->clhash.hashsize; h++) {
2022                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2023                         if (arg->count < arg->skip) {
2024                                 arg->count++;
2025                                 continue;
2026                         }
2027                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2028                                 arg->stop = 1;
2029                                 return;
2030                         }
2031                         arg->count++;
2032                 }
2033         }
2034 }
2035
2036 static const struct Qdisc_class_ops cbq_class_ops = {
2037         .graft          =       cbq_graft,
2038         .leaf           =       cbq_leaf,
2039         .qlen_notify    =       cbq_qlen_notify,
2040         .get            =       cbq_get,
2041         .put            =       cbq_put,
2042         .change         =       cbq_change_class,
2043         .delete         =       cbq_delete,
2044         .walk           =       cbq_walk,
2045         .tcf_chain      =       cbq_find_tcf,
2046         .bind_tcf       =       cbq_bind_filter,
2047         .unbind_tcf     =       cbq_unbind_filter,
2048         .dump           =       cbq_dump_class,
2049         .dump_stats     =       cbq_dump_class_stats,
2050 };
2051
2052 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2053         .next           =       NULL,
2054         .cl_ops         =       &cbq_class_ops,
2055         .id             =       "cbq",
2056         .priv_size      =       sizeof(struct cbq_sched_data),
2057         .enqueue        =       cbq_enqueue,
2058         .dequeue        =       cbq_dequeue,
2059         .requeue        =       cbq_requeue,
2060         .drop           =       cbq_drop,
2061         .init           =       cbq_init,
2062         .reset          =       cbq_reset,
2063         .destroy        =       cbq_destroy,
2064         .change         =       NULL,
2065         .dump           =       cbq_dump,
2066         .dump_stats     =       cbq_dump_stats,
2067         .owner          =       THIS_MODULE,
2068 };
2069
2070 static int __init cbq_module_init(void)
2071 {
2072         return register_qdisc(&cbq_qdisc_ops);
2073 }
2074 static void __exit cbq_module_exit(void)
2075 {
2076         unregister_qdisc(&cbq_qdisc_ops);
2077 }
2078 module_init(cbq_module_init)
2079 module_exit(cbq_module_exit)
2080 MODULE_LICENSE("GPL");