net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <linux/types.h>
  15 #include <linux/kernel.h>
  16 #include <linux/string.h>
  17 #include <linux/errno.h>
  18 #include <linux/skbuff.h>
  19 #include <net/netlink.h>
  20 #include <net/pkt_sched.h>
  21
  22
  23 /*      Class-Based Queueing (CBQ) algorithm.
  24         =======================================
  25
  26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  27                  Management Models for Packet Networks",
  28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  29
  30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  31
  32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  33                  Parameters", 1996
  34
  35                  [4] Sally Floyd and Michael Speer, "Experimental Results
  36                  for Class-Based Queueing", 1998, not published.
  37
  38         -----------------------------------------------------------------------
  39
  40         Algorithm skeleton was taken from NS simulator cbq.cc.
  41         If someone wants to check this code against the LBL version,
  42         he should take into account that ONLY the skeleton was borrowed,
  43         the implementation is different. Particularly:
  44
  45         --- The WRR algorithm is different. Our version looks more
  46         reasonable (I hope) and works when quanta are allowed to be
  47         less than MTU, which is always the case when real time classes
  48         have small rates. Note, that the statement of [3] is
  49         incomplete, delay may actually be estimated even if class
  50         per-round allotment is less than MTU. Namely, if per-round
  51         allotment is W*r_i, and r_1+...+r_k = r < 1
  52
  53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  54
  55         In the worst case we have IntServ estimate with D = W*r+k*MTU
  56         and C = MTU*r. The proof (if correct at all) is trivial.
  57
  58
  59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  60         interpret some places, which look like wrong translations
  61         from NS. Anyone is advised to find these differences
  62         and explain to me, why I am wrong 8).
  63
  64         --- Linux has no EOI event, so that we cannot estimate true class
  65         idle time. Workaround is to consider the next dequeue event
  66         as sign that previous packet is finished. This is wrong because of
  67         internal device queueing, but on a permanently loaded link it is true.
  68         Moreover, combined with clock integrator, this scheme looks
  69         very close to an ideal solution.  */
  70
  71 struct cbq_sched_data;
  72
  73
  74 struct cbq_class
  75 {
  76         struct Qdisc_class_common common;
  77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  78
  79 /* Parameters */
  80         unsigned char           priority;       /* class priority */
  81         unsigned char           priority2;      /* priority to be used after overlimit */
  82         unsigned char           ewma_log;       /* time constant for idle time calculation */
  83         unsigned char           ovl_strategy;
  84 #ifdef CONFIG_NET_CLS_ACT
  85         unsigned char           police;
  86 #endif
  87
  88         u32                     defmap;
  89
  90         /* Link-sharing scheduler parameters */
  91         long                    maxidle;        /* Class parameters: see below. */
  92         long                    offtime;
  93         long                    minidle;
  94         u32                     avpkt;
  95         struct qdisc_rate_table *R_tab;
  96
  97         /* Overlimit strategy parameters */
  98         void                    (*overlimit)(struct cbq_class *cl);
  99         psched_tdiff_t          penalty;
 100
 101         /* General scheduler (WRR) parameters */
 102         long                    allot;
 103         long                    quantum;        /* Allotment per WRR round */
 104         long                    weight;         /* Relative allotment: see below */
 105
 106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 107         struct cbq_class        *split;         /* Ptr to split node */
 108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 111                                                    parent otherwise */
 112         struct cbq_class        *sibling;       /* Sibling chain */
 113         struct cbq_class        *children;      /* Pointer to children chain */
 114
 115         struct Qdisc            *q;             /* Elementary queueing discipline */
 116
 117
 118 /* Variables */
 119         unsigned char           cpriority;      /* Effective priority */
 120         unsigned char           delayed;
 121         unsigned char           level;          /* level of the class in hierarchy:
 122                                                    0 for leaf classes, and maximal
 123                                                    level of children + 1 for nodes.
 124                                                  */
 125
 126         psched_time_t           last;           /* Last end of service */
 127         psched_time_t           undertime;
 128         long                    avgidle;
 129         long                    deficit;        /* Saved deficit for WRR */
 130         psched_time_t           penalized;
 131         struct gnet_stats_basic bstats;
 132         struct gnet_stats_queue qstats;
 133         struct gnet_stats_rate_est rate_est;
 134         struct tc_cbq_xstats    xstats;
 135
 136         struct tcf_proto        *filter_list;
 137
 138         int                     refcnt;
 139         int                     filters;
 140
 141         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 142 };
 143
 144 struct cbq_sched_data
 145 {
 146         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
 147         int                     nclasses[TC_CBQ_MAXPRIO+1];
 148         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 149
 150         struct cbq_class        link;
 151
 152         unsigned                activemask;
 153         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 154                                                                    with backlog */
 155
 156 #ifdef CONFIG_NET_CLS_ACT
 157         struct cbq_class        *rx_class;
 158 #endif
 159         struct cbq_class        *tx_class;
 160         struct cbq_class        *tx_borrowed;
 161         int                     tx_len;
 162         psched_time_t           now;            /* Cached timestamp */
 163         psched_time_t           now_rt;         /* Cached real time */
 164         unsigned                pmask;
 165
 166         struct hrtimer          delay_timer;
 167         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
 168                                                    started when CBQ has
 169                                                    backlog, but cannot
 170                                                    transmit just now */
 171         psched_tdiff_t          wd_expires;
 172         int                     toplevel;
 173         u32                     hgenerator;
 174 };
 175
 176
 177 #define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
 178
 179 static __inline__ struct cbq_class *
 180 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 181 {
 182         struct Qdisc_class_common *clc;
 183
 184         clc = qdisc_class_find(&q->clhash, classid);
 185         if (clc == NULL)
 186                 return NULL;
 187         return container_of(clc, struct cbq_class, common);
 188 }
 189
 190 #ifdef CONFIG_NET_CLS_ACT
 191
 192 static struct cbq_class *
 193 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 194 {
 195         struct cbq_class *cl, *new;
 196
 197         for (cl = this->tparent; cl; cl = cl->tparent)
 198                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 199                         return new;
 200
 201         return NULL;
 202 }
 203
 204 #endif
 205
 206 /* Classify packet. The procedure is pretty complicated, but
 207    it allows us to combine link sharing and priority scheduling
 208    transparently.
 209
 210    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 211    so that it resolves to split nodes. Then packets are classified
 212    by logical priority, or a more specific classifier may be attached
 213    to the split node.
 214  */
 215
 216 static struct cbq_class *
 217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 218 {
 219         struct cbq_sched_data *q = qdisc_priv(sch);
 220         struct cbq_class *head = &q->link;
 221         struct cbq_class **defmap;
 222         struct cbq_class *cl = NULL;
 223         u32 prio = skb->priority;
 224         struct tcf_result res;
 225
 226         /*
 227          *  Step 1. If skb->priority points to one of our classes, use it.
 228          */
 229         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 230             (cl = cbq_class_lookup(q, prio)) != NULL)
 231                 return cl;
 232
 233         *qerr = NET_XMIT_BYPASS;
 234         for (;;) {
 235                 int result = 0;
 236                 defmap = head->defaults;
 237
 238                 /*
 239                  * Step 2+n. Apply classifier.
 240                  */
 241                 if (!head->filter_list ||
 242                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
 243                         goto fallback;
 244
 245                 if ((cl = (void*)res.class) == NULL) {
 246                         if (TC_H_MAJ(res.classid))
 247                                 cl = cbq_class_lookup(q, res.classid);
 248                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 249                                 cl = defmap[TC_PRIO_BESTEFFORT];
 250
 251                         if (cl == NULL || cl->level >= head->level)
 252                                 goto fallback;
 253                 }
 254
 255 #ifdef CONFIG_NET_CLS_ACT
 256                 switch (result) {
 257                 case TC_ACT_QUEUED:
 258                 case TC_ACT_STOLEN:
 259                         *qerr = NET_XMIT_SUCCESS;
 260                 case TC_ACT_SHOT:
 261                         return NULL;
 262                 case TC_ACT_RECLASSIFY:
 263                         return cbq_reclassify(skb, cl);
 264                 }
 265 #endif
 266                 if (cl->level == 0)
 267                         return cl;
 268
 269                 /*
 270                  * Step 3+n. If classifier selected a link sharing class,
 271                  *         apply agency specific classifier.
 272                  *         Repeat this procdure until we hit a leaf node.
 273                  */
 274                 head = cl;
 275         }
 276
 277 fallback:
 278         cl = head;
 279
 280         /*
 281          * Step 4. No success...
 282          */
 283         if (TC_H_MAJ(prio) == 0 &&
 284             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 285             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 286                 return head;
 287
 288         return cl;
 289 }
 290
 291 /*
 292    A packet has just been enqueued on the empty class.
 293    cbq_activate_class adds it to the tail of active class list
 294    of its priority band.
 295  */
 296
 297 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 298 {
 299         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 300         int prio = cl->cpriority;
 301         struct cbq_class *cl_tail;
 302
 303         cl_tail = q->active[prio];
 304         q->active[prio] = cl;
 305
 306         if (cl_tail != NULL) {
 307                 cl->next_alive = cl_tail->next_alive;
 308                 cl_tail->next_alive = cl;
 309         } else {
 310                 cl->next_alive = cl;
 311                 q->activemask |= (1<<prio);
 312         }
 313 }
 314
 315 /*
 316    Unlink class from active chain.
 317    Note that this same procedure is done directly in cbq_dequeue*
 318    during round-robin procedure.
 319  */
 320
 321 static void cbq_deactivate_class(struct cbq_class *this)
 322 {
 323         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 324         int prio = this->cpriority;
 325         struct cbq_class *cl;
 326         struct cbq_class *cl_prev = q->active[prio];
 327
 328         do {
 329                 cl = cl_prev->next_alive;
 330                 if (cl == this) {
 331                         cl_prev->next_alive = cl->next_alive;
 332                         cl->next_alive = NULL;
 333
 334                         if (cl == q->active[prio]) {
 335                                 q->active[prio] = cl_prev;
 336                                 if (cl == q->active[prio]) {
 337                                         q->active[prio] = NULL;
 338                                         q->activemask &= ~(1<<prio);
 339                                         return;
 340                                 }
 341                         }
 342                         return;
 343                 }
 344         } while ((cl_prev = cl) != q->active[prio]);
 345 }
 346
 347 static void
 348 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 349 {
 350         int toplevel = q->toplevel;
 351
 352         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 353                 psched_time_t now;
 354                 psched_tdiff_t incr;
 355
 356                 now = psched_get_time();
 357                 incr = now - q->now_rt;
 358                 now = q->now + incr;
 359
 360                 do {
 361                         if (cl->undertime < now) {
 362                                 q->toplevel = cl->level;
 363                                 return;
 364                         }
 365                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 366         }
 367 }
 368
 369 static int
 370 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 371 {
 372         struct cbq_sched_data *q = qdisc_priv(sch);
 373         int len = skb->len;
 374         int uninitialized_var(ret);
 375         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 376
 377 #ifdef CONFIG_NET_CLS_ACT
 378         q->rx_class = cl;
 379 #endif
 380         if (cl == NULL) {
 381                 if (ret == NET_XMIT_BYPASS)
 382                         sch->qstats.drops++;
 383                 kfree_skb(skb);
 384                 return ret;
 385         }
 386
 387 #ifdef CONFIG_NET_CLS_ACT
 388         cl->q->__parent = sch;
 389 #endif
 390         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 391                 sch->q.qlen++;
 392                 sch->bstats.packets++;
 393                 sch->bstats.bytes+=len;
 394                 cbq_mark_toplevel(q, cl);
 395                 if (!cl->next_alive)
 396                         cbq_activate_class(cl);
 397                 return ret;
 398         }
 399
 400         sch->qstats.drops++;
 401         cbq_mark_toplevel(q, cl);
 402         cl->qstats.drops++;
 403         return ret;
 404 }
 405
 406 static int
 407 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 408 {
 409         struct cbq_sched_data *q = qdisc_priv(sch);
 410         struct cbq_class *cl;
 411         int ret;
 412
 413         if ((cl = q->tx_class) == NULL) {
 414                 kfree_skb(skb);
 415                 sch->qstats.drops++;
 416                 return NET_XMIT_CN;
 417         }
 418         q->tx_class = NULL;
 419
 420         cbq_mark_toplevel(q, cl);
 421
 422 #ifdef CONFIG_NET_CLS_ACT
 423         q->rx_class = cl;
 424         cl->q->__parent = sch;
 425 #endif
 426         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 427                 sch->q.qlen++;
 428                 sch->qstats.requeues++;
 429                 if (!cl->next_alive)
 430                         cbq_activate_class(cl);
 431                 return 0;
 432         }
 433         sch->qstats.drops++;
 434         cl->qstats.drops++;
 435         return ret;
 436 }
 437
 438 /* Overlimit actions */
 439
 440 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 441
 442 static void cbq_ovl_classic(struct cbq_class *cl)
 443 {
 444         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 445         psched_tdiff_t delay = cl->undertime - q->now;
 446
 447         if (!cl->delayed) {
 448                 delay += cl->offtime;
 449
 450                 /*
 451                    Class goes to sleep, so that it will have no
 452                    chance to work avgidle. Let's forgive it 8)
 453
 454                    BTW cbq-2.0 has a crap in this
 455                    place, apparently they forgot to shift it by cl->ewma_log.
 456                  */
 457                 if (cl->avgidle < 0)
 458                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 459                 if (cl->avgidle < cl->minidle)
 460                         cl->avgidle = cl->minidle;
 461                 if (delay <= 0)
 462                         delay = 1;
 463                 cl->undertime = q->now + delay;
 464
 465                 cl->xstats.overactions++;
 466                 cl->delayed = 1;
 467         }
 468         if (q->wd_expires == 0 || q->wd_expires > delay)
 469                 q->wd_expires = delay;
 470
 471         /* Dirty work! We must schedule wakeups based on
 472            real available rate, rather than leaf rate,
 473            which may be tiny (even zero).
 474          */
 475         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 476                 struct cbq_class *b;
 477                 psched_tdiff_t base_delay = q->wd_expires;
 478
 479                 for (b = cl->borrow; b; b = b->borrow) {
 480                         delay = b->undertime - q->now;
 481                         if (delay < base_delay) {
 482                                 if (delay <= 0)
 483                                         delay = 1;
 484                                 base_delay = delay;
 485                         }
 486                 }
 487
 488                 q->wd_expires = base_delay;
 489         }
 490 }
 491
 492 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 493    they go overlimit
 494  */
 495
 496 static void cbq_ovl_rclassic(struct cbq_class *cl)
 497 {
 498         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 499         struct cbq_class *this = cl;
 500
 501         do {
 502                 if (cl->level > q->toplevel) {
 503                         cl = NULL;
 504                         break;
 505                 }
 506         } while ((cl = cl->borrow) != NULL);
 507
 508         if (cl == NULL)
 509                 cl = this;
 510         cbq_ovl_classic(cl);
 511 }
 512
 513 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 514
 515 static void cbq_ovl_delay(struct cbq_class *cl)
 516 {
 517         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 518         psched_tdiff_t delay = cl->undertime - q->now;
 519
 520         if (!cl->delayed) {
 521                 psched_time_t sched = q->now;
 522                 ktime_t expires;
 523
 524                 delay += cl->offtime;
 525                 if (cl->avgidle < 0)
 526                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 527                 if (cl->avgidle < cl->minidle)
 528                         cl->avgidle = cl->minidle;
 529                 cl->undertime = q->now + delay;
 530
 531                 if (delay > 0) {
 532                         sched += delay + cl->penalty;
 533                         cl->penalized = sched;
 534                         cl->cpriority = TC_CBQ_MAXPRIO;
 535                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 536
 537                         expires = ktime_set(0, 0);
 538                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
 539                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
 540                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
 541                                                   expires)) > 0)
 542                                 q->delay_timer.expires = expires;
 543                         hrtimer_restart(&q->delay_timer);
 544                         cl->delayed = 1;
 545                         cl->xstats.overactions++;
 546                         return;
 547                 }
 548                 delay = 1;
 549         }
 550         if (q->wd_expires == 0 || q->wd_expires > delay)
 551                 q->wd_expires = delay;
 552 }
 553
 554 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 555
 556 static void cbq_ovl_lowprio(struct cbq_class *cl)
 557 {
 558         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 559
 560         cl->penalized = q->now + cl->penalty;
 561
 562         if (cl->cpriority != cl->priority2) {
 563                 cl->cpriority = cl->priority2;
 564                 q->pmask |= (1<<cl->cpriority);
 565                 cl->xstats.overactions++;
 566         }
 567         cbq_ovl_classic(cl);
 568 }
 569
 570 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 571
 572 static void cbq_ovl_drop(struct cbq_class *cl)
 573 {
 574         if (cl->q->ops->drop)
 575                 if (cl->q->ops->drop(cl->q))
 576                         cl->qdisc->q.qlen--;
 577         cl->xstats.overactions++;
 578         cbq_ovl_classic(cl);
 579 }
 580
 581 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 582                                        psched_time_t now)
 583 {
 584         struct cbq_class *cl;
 585         struct cbq_class *cl_prev = q->active[prio];
 586         psched_time_t sched = now;
 587
 588         if (cl_prev == NULL)
 589                 return 0;
 590
 591         do {
 592                 cl = cl_prev->next_alive;
 593                 if (now - cl->penalized > 0) {
 594                         cl_prev->next_alive = cl->next_alive;
 595                         cl->next_alive = NULL;
 596                         cl->cpriority = cl->priority;
 597                         cl->delayed = 0;
 598                         cbq_activate_class(cl);
 599
 600                         if (cl == q->active[prio]) {
 601                                 q->active[prio] = cl_prev;
 602                                 if (cl == q->active[prio]) {
 603                                         q->active[prio] = NULL;
 604                                         return 0;
 605                                 }
 606                         }
 607
 608                         cl = cl_prev->next_alive;
 609                 } else if (sched - cl->penalized > 0)
 610                         sched = cl->penalized;
 611         } while ((cl_prev = cl) != q->active[prio]);
 612
 613         return sched - now;
 614 }
 615
 616 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 617 {
 618         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
 619                                                 delay_timer);
 620         struct Qdisc *sch = q->watchdog.qdisc;
 621         psched_time_t now;
 622         psched_tdiff_t delay = 0;
 623         unsigned pmask;
 624
 625         now = psched_get_time();
 626
 627         pmask = q->pmask;
 628         q->pmask = 0;
 629
 630         while (pmask) {
 631                 int prio = ffz(~pmask);
 632                 psched_tdiff_t tmp;
 633
 634                 pmask &= ~(1<<prio);
 635
 636                 tmp = cbq_undelay_prio(q, prio, now);
 637                 if (tmp > 0) {
 638                         q->pmask |= 1<<prio;
 639                         if (tmp < delay || delay == 0)
 640                                 delay = tmp;
 641                 }
 642         }
 643
 644         if (delay) {
 645                 ktime_t time;
 646
 647                 time = ktime_set(0, 0);
 648                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
 649                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 650         }
 651
 652         sch->flags &= ~TCQ_F_THROTTLED;
 653         netif_schedule(sch->dev);
 654         return HRTIMER_NORESTART;
 655 }
 656
 657 #ifdef CONFIG_NET_CLS_ACT
 658 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 659 {
 660         int len = skb->len;
 661         struct Qdisc *sch = child->__parent;
 662         struct cbq_sched_data *q = qdisc_priv(sch);
 663         struct cbq_class *cl = q->rx_class;
 664
 665         q->rx_class = NULL;
 666
 667         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 668
 669                 cbq_mark_toplevel(q, cl);
 670
 671                 q->rx_class = cl;
 672                 cl->q->__parent = sch;
 673
 674                 if (cl->q->enqueue(skb, cl->q) == 0) {
 675                         sch->q.qlen++;
 676                         sch->bstats.packets++;
 677                         sch->bstats.bytes+=len;
 678                         if (!cl->next_alive)
 679                                 cbq_activate_class(cl);
 680                         return 0;
 681                 }
 682                 sch->qstats.drops++;
 683                 return 0;
 684         }
 685
 686         sch->qstats.drops++;
 687         return -1;
 688 }
 689 #endif
 690
 691 /*
 692    It is mission critical procedure.
 693
 694    We "regenerate" toplevel cutoff, if transmitting class
 695    has backlog and it is not regulated. It is not part of
 696    original CBQ description, but looks more reasonable.
 697    Probably, it is wrong. This question needs further investigation.
 698 */
 699
 700 static __inline__ void
 701 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 702                     struct cbq_class *borrowed)
 703 {
 704         if (cl && q->toplevel >= borrowed->level) {
 705                 if (cl->q->q.qlen > 1) {
 706                         do {
 707                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
 708                                         q->toplevel = borrowed->level;
 709                                         return;
 710                                 }
 711                         } while ((borrowed=borrowed->borrow) != NULL);
 712                 }
 713 #if 0
 714         /* It is not necessary now. Uncommenting it
 715            will save CPU cycles, but decrease fairness.
 716          */
 717                 q->toplevel = TC_CBQ_MAXLEVEL;
 718 #endif
 719         }
 720 }
 721
 722 static void
 723 cbq_update(struct cbq_sched_data *q)
 724 {
 725         struct cbq_class *this = q->tx_class;
 726         struct cbq_class *cl = this;
 727         int len = q->tx_len;
 728
 729         q->tx_class = NULL;
 730
 731         for ( ; cl; cl = cl->share) {
 732                 long avgidle = cl->avgidle;
 733                 long idle;
 734
 735                 cl->bstats.packets++;
 736                 cl->bstats.bytes += len;
 737
 738                 /*
 739                    (now - last) is total time between packet right edges.
 740                    (last_pktlen/rate) is "virtual" busy time, so that
 741
 742                          idle = (now - last) - last_pktlen/rate
 743                  */
 744
 745                 idle = q->now - cl->last;
 746                 if ((unsigned long)idle > 128*1024*1024) {
 747                         avgidle = cl->maxidle;
 748                 } else {
 749                         idle -= L2T(cl, len);
 750
 751                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 752                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 753                    cl->avgidle == true_avgidle/W,
 754                    hence:
 755                  */
 756                         avgidle += idle - (avgidle>>cl->ewma_log);
 757                 }
 758
 759                 if (avgidle <= 0) {
 760                         /* Overlimit or at-limit */
 761
 762                         if (avgidle < cl->minidle)
 763                                 avgidle = cl->minidle;
 764
 765                         cl->avgidle = avgidle;
 766
 767                         /* Calculate expected time, when this class
 768                            will be allowed to send.
 769                            It will occur, when:
 770                            (1-W)*true_avgidle + W*delay = 0, i.e.
 771                            idle = (1/W - 1)*(-true_avgidle)
 772                            or
 773                            idle = (1 - W)*(-cl->avgidle);
 774                          */
 775                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 776
 777                         /*
 778                            That is not all.
 779                            To maintain the rate allocated to the class,
 780                            we add to undertime virtual clock,
 781                            necessary to complete transmitted packet.
 782                            (len/phys_bandwidth has been already passed
 783                            to the moment of cbq_update)
 784                          */
 785
 786                         idle -= L2T(&q->link, len);
 787                         idle += L2T(cl, len);
 788
 789                         cl->undertime = q->now + idle;
 790                 } else {
 791                         /* Underlimit */
 792
 793                         cl->undertime = PSCHED_PASTPERFECT;
 794                         if (avgidle > cl->maxidle)
 795                                 cl->avgidle = cl->maxidle;
 796                         else
 797                                 cl->avgidle = avgidle;
 798                 }
 799                 cl->last = q->now;
 800         }
 801
 802         cbq_update_toplevel(q, this, q->tx_borrowed);
 803 }
 804
 805 static __inline__ struct cbq_class *
 806 cbq_under_limit(struct cbq_class *cl)
 807 {
 808         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 809         struct cbq_class *this_cl = cl;
 810
 811         if (cl->tparent == NULL)
 812                 return cl;
 813
 814         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
 815                 cl->delayed = 0;
 816                 return cl;
 817         }
 818
 819         do {
 820                 /* It is very suspicious place. Now overlimit
 821                    action is generated for not bounded classes
 822                    only if link is completely congested.
 823                    Though it is in agree with ancestor-only paradigm,
 824                    it looks very stupid. Particularly,
 825                    it means that this chunk of code will either
 826                    never be called or result in strong amplification
 827                    of burstiness. Dangerous, silly, and, however,
 828                    no another solution exists.
 829                  */
 830                 if ((cl = cl->borrow) == NULL) {
 831                         this_cl->qstats.overlimits++;
 832                         this_cl->overlimit(this_cl);
 833                         return NULL;
 834                 }
 835                 if (cl->level > q->toplevel)
 836                         return NULL;
 837         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
 838
 839         cl->delayed = 0;
 840         return cl;
 841 }
 842
 843 static __inline__ struct sk_buff *
 844 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 845 {
 846         struct cbq_sched_data *q = qdisc_priv(sch);
 847         struct cbq_class *cl_tail, *cl_prev, *cl;
 848         struct sk_buff *skb;
 849         int deficit;
 850
 851         cl_tail = cl_prev = q->active[prio];
 852         cl = cl_prev->next_alive;
 853
 854         do {
 855                 deficit = 0;
 856
 857                 /* Start round */
 858                 do {
 859                         struct cbq_class *borrow = cl;
 860
 861                         if (cl->q->q.qlen &&
 862                             (borrow = cbq_under_limit(cl)) == NULL)
 863                                 goto skip_class;
 864
 865                         if (cl->deficit <= 0) {
 866                                 /* Class exhausted its allotment per
 867                                    this round. Switch to the next one.
 868                                  */
 869                                 deficit = 1;
 870                                 cl->deficit += cl->quantum;
 871                                 goto next_class;
 872                         }
 873
 874                         skb = cl->q->dequeue(cl->q);
 875
 876                         /* Class did not give us any skb :-(
 877                            It could occur even if cl->q->q.qlen != 0
 878                            f.e. if cl->q == "tbf"
 879                          */
 880                         if (skb == NULL)
 881                                 goto skip_class;
 882
 883                         cl->deficit -= skb->len;
 884                         q->tx_class = cl;
 885                         q->tx_borrowed = borrow;
 886                         if (borrow != cl) {
 887 #ifndef CBQ_XSTATS_BORROWS_BYTES
 888                                 borrow->xstats.borrows++;
 889                                 cl->xstats.borrows++;
 890 #else
 891                                 borrow->xstats.borrows += skb->len;
 892                                 cl->xstats.borrows += skb->len;
 893 #endif
 894                         }
 895                         q->tx_len = skb->len;
 896
 897                         if (cl->deficit <= 0) {
 898                                 q->active[prio] = cl;
 899                                 cl = cl->next_alive;
 900                                 cl->deficit += cl->quantum;
 901                         }
 902                         return skb;
 903
 904 skip_class:
 905                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 906                                 /* Class is empty or penalized.
 907                                    Unlink it from active chain.
 908                                  */
 909                                 cl_prev->next_alive = cl->next_alive;
 910                                 cl->next_alive = NULL;
 911
 912                                 /* Did cl_tail point to it? */
 913                                 if (cl == cl_tail) {
 914                                         /* Repair it! */
 915                                         cl_tail = cl_prev;
 916
 917                                         /* Was it the last class in this band? */
 918                                         if (cl == cl_tail) {
 919                                                 /* Kill the band! */
 920                                                 q->active[prio] = NULL;
 921                                                 q->activemask &= ~(1<<prio);
 922                                                 if (cl->q->q.qlen)
 923                                                         cbq_activate_class(cl);
 924                                                 return NULL;
 925                                         }
 926
 927                                         q->active[prio] = cl_tail;
 928                                 }
 929                                 if (cl->q->q.qlen)
 930                                         cbq_activate_class(cl);
 931
 932                                 cl = cl_prev;
 933                         }
 934
 935 next_class:
 936                         cl_prev = cl;
 937                         cl = cl->next_alive;
 938                 } while (cl_prev != cl_tail);
 939         } while (deficit);
 940
 941         q->active[prio] = cl_prev;
 942
 943         return NULL;
 944 }
 945
 946 static __inline__ struct sk_buff *
 947 cbq_dequeue_1(struct Qdisc *sch)
 948 {
 949         struct cbq_sched_data *q = qdisc_priv(sch);
 950         struct sk_buff *skb;
 951         unsigned activemask;
 952
 953         activemask = q->activemask&0xFF;
 954         while (activemask) {
 955                 int prio = ffz(~activemask);
 956                 activemask &= ~(1<<prio);
 957                 skb = cbq_dequeue_prio(sch, prio);
 958                 if (skb)
 959                         return skb;
 960         }
 961         return NULL;
 962 }
 963
 964 static struct sk_buff *
 965 cbq_dequeue(struct Qdisc *sch)
 966 {
 967         struct sk_buff *skb;
 968         struct cbq_sched_data *q = qdisc_priv(sch);
 969         psched_time_t now;
 970         psched_tdiff_t incr;
 971
 972         now = psched_get_time();
 973         incr = now - q->now_rt;
 974
 975         if (q->tx_class) {
 976                 psched_tdiff_t incr2;
 977                 /* Time integrator. We calculate EOS time
 978                    by adding expected packet transmission time.
 979                    If real time is greater, we warp artificial clock,
 980                    so that:
 981
 982                    cbq_time = max(real_time, work);
 983                  */
 984                 incr2 = L2T(&q->link, q->tx_len);
 985                 q->now += incr2;
 986                 cbq_update(q);
 987                 if ((incr -= incr2) < 0)
 988                         incr = 0;
 989         }
 990         q->now += incr;
 991         q->now_rt = now;
 992
 993         for (;;) {
 994                 q->wd_expires = 0;
 995
 996                 skb = cbq_dequeue_1(sch);
 997                 if (skb) {
 998                         sch->q.qlen--;
 999                         sch->flags &= ~TCQ_F_THROTTLED;
1000                         return skb;
1001                 }
1002
1003                 /* All the classes are overlimit.
1004
1005                    It is possible, if:
1006
1007                    1. Scheduler is empty.
1008                    2. Toplevel cutoff inhibited borrowing.
1009                    3. Root class is overlimit.
1010
1011                    Reset 2d and 3d conditions and retry.
1012
1013                    Note, that NS and cbq-2.0 are buggy, peeking
1014                    an arbitrary class is appropriate for ancestor-only
1015                    sharing, but not for toplevel algorithm.
1016
1017                    Our version is better, but slower, because it requires
1018                    two passes, but it is unavoidable with top-level sharing.
1019                 */
1020
1021                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1022                     q->link.undertime == PSCHED_PASTPERFECT)
1023                         break;
1024
1025                 q->toplevel = TC_CBQ_MAXLEVEL;
1026                 q->link.undertime = PSCHED_PASTPERFECT;
1027         }
1028
1029         /* No packets in scheduler or nobody wants to give them to us :-(
1030            Sigh... start watchdog timer in the last case. */
1031
1032         if (sch->q.qlen) {
1033                 sch->qstats.overlimits++;
1034                 if (q->wd_expires)
1035                         qdisc_watchdog_schedule(&q->watchdog,
1036                                                 now + q->wd_expires);
1037         }
1038         return NULL;
1039 }
1040
1041 /* CBQ class maintanance routines */
1042
1043 static void cbq_adjust_levels(struct cbq_class *this)
1044 {
1045         if (this == NULL)
1046                 return;
1047
1048         do {
1049                 int level = 0;
1050                 struct cbq_class *cl;
1051
1052                 if ((cl = this->children) != NULL) {
1053                         do {
1054                                 if (cl->level > level)
1055                                         level = cl->level;
1056                         } while ((cl = cl->sibling) != this->children);
1057                 }
1058                 this->level = level+1;
1059         } while ((this = this->tparent) != NULL);
1060 }
1061
1062 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1063 {
1064         struct cbq_class *cl;
1065         struct hlist_node *n;
1066         unsigned int h;
1067
1068         if (q->quanta[prio] == 0)
1069                 return;
1070
1071         for (h = 0; h < q->clhash.hashsize; h++) {
1072                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1073                         /* BUGGGG... Beware! This expression suffer of
1074                            arithmetic overflows!
1075                          */
1076                         if (cl->priority == prio) {
1077                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1078                                         q->quanta[prio];
1079                         }
1080                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1081                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
1082                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1083                         }
1084                 }
1085         }
1086 }
1087
1088 static void cbq_sync_defmap(struct cbq_class *cl)
1089 {
1090         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1091         struct cbq_class *split = cl->split;
1092         unsigned h;
1093         int i;
1094
1095         if (split == NULL)
1096                 return;
1097
1098         for (i=0; i<=TC_PRIO_MAX; i++) {
1099                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1100                         split->defaults[i] = NULL;
1101         }
1102
1103         for (i=0; i<=TC_PRIO_MAX; i++) {
1104                 int level = split->level;
1105
1106                 if (split->defaults[i])
1107                         continue;
1108
1109                 for (h = 0; h < q->clhash.hashsize; h++) {
1110                         struct hlist_node *n;
1111                         struct cbq_class *c;
1112
1113                         hlist_for_each_entry(c, n, &q->clhash.hash[h],
1114                                              common.hnode) {
1115                                 if (c->split == split && c->level < level &&
1116                                     c->defmap&(1<<i)) {
1117                                         split->defaults[i] = c;
1118                                         level = c->level;
1119                                 }
1120                         }
1121                 }
1122         }
1123 }
1124
1125 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1126 {
1127         struct cbq_class *split = NULL;
1128
1129         if (splitid == 0) {
1130                 if ((split = cl->split) == NULL)
1131                         return;
1132                 splitid = split->common.classid;
1133         }
1134
1135         if (split == NULL || split->common.classid != splitid) {
1136                 for (split = cl->tparent; split; split = split->tparent)
1137                         if (split->common.classid == splitid)
1138                                 break;
1139         }
1140
1141         if (split == NULL)
1142                 return;
1143
1144         if (cl->split != split) {
1145                 cl->defmap = 0;
1146                 cbq_sync_defmap(cl);
1147                 cl->split = split;
1148                 cl->defmap = def&mask;
1149         } else
1150                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1151
1152         cbq_sync_defmap(cl);
1153 }
1154
1155 static void cbq_unlink_class(struct cbq_class *this)
1156 {
1157         struct cbq_class *cl, **clp;
1158         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1159
1160         qdisc_class_hash_remove(&q->clhash, &this->common);
1161
1162         if (this->tparent) {
1163                 clp=&this->sibling;
1164                 cl = *clp;
1165                 do {
1166                         if (cl == this) {
1167                                 *clp = cl->sibling;
1168                                 break;
1169                         }
1170                         clp = &cl->sibling;
1171                 } while ((cl = *clp) != this->sibling);
1172
1173                 if (this->tparent->children == this) {
1174                         this->tparent->children = this->sibling;
1175                         if (this->sibling == this)
1176                                 this->tparent->children = NULL;
1177                 }
1178         } else {
1179                 BUG_TRAP(this->sibling == this);
1180         }
1181 }
1182
1183 static void cbq_link_class(struct cbq_class *this)
1184 {
1185         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1186         struct cbq_class *parent = this->tparent;
1187
1188         this->sibling = this;
1189         qdisc_class_hash_insert(&q->clhash, &this->common);
1190
1191         if (parent == NULL)
1192                 return;
1193
1194         if (parent->children == NULL) {
1195                 parent->children = this;
1196         } else {
1197                 this->sibling = parent->children->sibling;
1198                 parent->children->sibling = this;
1199         }
1200 }
1201
1202 static unsigned int cbq_drop(struct Qdisc* sch)
1203 {
1204         struct cbq_sched_data *q = qdisc_priv(sch);
1205         struct cbq_class *cl, *cl_head;
1206         int prio;
1207         unsigned int len;
1208
1209         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1210                 if ((cl_head = q->active[prio]) == NULL)
1211                         continue;
1212
1213                 cl = cl_head;
1214                 do {
1215                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1216                                 sch->q.qlen--;
1217                                 if (!cl->q->q.qlen)
1218                                         cbq_deactivate_class(cl);
1219                                 return len;
1220                         }
1221                 } while ((cl = cl->next_alive) != cl_head);
1222         }
1223         return 0;
1224 }
1225
1226 static void
1227 cbq_reset(struct Qdisc* sch)
1228 {
1229         struct cbq_sched_data *q = qdisc_priv(sch);
1230         struct cbq_class *cl;
1231         struct hlist_node *n;
1232         int prio;
1233         unsigned h;
1234
1235         q->activemask = 0;
1236         q->pmask = 0;
1237         q->tx_class = NULL;
1238         q->tx_borrowed = NULL;
1239         qdisc_watchdog_cancel(&q->watchdog);
1240         hrtimer_cancel(&q->delay_timer);
1241         q->toplevel = TC_CBQ_MAXLEVEL;
1242         q->now = psched_get_time();
1243         q->now_rt = q->now;
1244
1245         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1246                 q->active[prio] = NULL;
1247
1248         for (h = 0; h < q->clhash.hashsize; h++) {
1249                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1250                         qdisc_reset(cl->q);
1251
1252                         cl->next_alive = NULL;
1253                         cl->undertime = PSCHED_PASTPERFECT;
1254                         cl->avgidle = cl->maxidle;
1255                         cl->deficit = cl->quantum;
1256                         cl->cpriority = cl->priority;
1257                 }
1258         }
1259         sch->q.qlen = 0;
1260 }
1261
1262
1263 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1264 {
1265         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1266                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1267                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1268         }
1269         if (lss->change&TCF_CBQ_LSS_EWMA)
1270                 cl->ewma_log = lss->ewma_log;
1271         if (lss->change&TCF_CBQ_LSS_AVPKT)
1272                 cl->avpkt = lss->avpkt;
1273         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1274                 cl->minidle = -(long)lss->minidle;
1275         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1276                 cl->maxidle = lss->maxidle;
1277                 cl->avgidle = lss->maxidle;
1278         }
1279         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1280                 cl->offtime = lss->offtime;
1281         return 0;
1282 }
1283
1284 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1285 {
1286         q->nclasses[cl->priority]--;
1287         q->quanta[cl->priority] -= cl->weight;
1288         cbq_normalize_quanta(q, cl->priority);
1289 }
1290
1291 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1292 {
1293         q->nclasses[cl->priority]++;
1294         q->quanta[cl->priority] += cl->weight;
1295         cbq_normalize_quanta(q, cl->priority);
1296 }
1297
1298 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1299 {
1300         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1301
1302         if (wrr->allot)
1303                 cl->allot = wrr->allot;
1304         if (wrr->weight)
1305                 cl->weight = wrr->weight;
1306         if (wrr->priority) {
1307                 cl->priority = wrr->priority-1;
1308                 cl->cpriority = cl->priority;
1309                 if (cl->priority >= cl->priority2)
1310                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1311         }
1312
1313         cbq_addprio(q, cl);
1314         return 0;
1315 }
1316
1317 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1318 {
1319         switch (ovl->strategy) {
1320         case TC_CBQ_OVL_CLASSIC:
1321                 cl->overlimit = cbq_ovl_classic;
1322                 break;
1323         case TC_CBQ_OVL_DELAY:
1324                 cl->overlimit = cbq_ovl_delay;
1325                 break;
1326         case TC_CBQ_OVL_LOWPRIO:
1327                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1328                     ovl->priority2-1 <= cl->priority)
1329                         return -EINVAL;
1330                 cl->priority2 = ovl->priority2-1;
1331                 cl->overlimit = cbq_ovl_lowprio;
1332                 break;
1333         case TC_CBQ_OVL_DROP:
1334                 cl->overlimit = cbq_ovl_drop;
1335                 break;
1336         case TC_CBQ_OVL_RCLASSIC:
1337                 cl->overlimit = cbq_ovl_rclassic;
1338                 break;
1339         default:
1340                 return -EINVAL;
1341         }
1342         cl->penalty = ovl->penalty;
1343         return 0;
1344 }
1345
1346 #ifdef CONFIG_NET_CLS_ACT
1347 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1348 {
1349         cl->police = p->police;
1350
1351         if (cl->q->handle) {
1352                 if (p->police == TC_POLICE_RECLASSIFY)
1353                         cl->q->reshape_fail = cbq_reshape_fail;
1354                 else
1355                         cl->q->reshape_fail = NULL;
1356         }
1357         return 0;
1358 }
1359 #endif
1360
1361 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1362 {
1363         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1364         return 0;
1365 }
1366
1367 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1368         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1369         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1370         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1371         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1372         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1373         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1374         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1375 };
1376
1377 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1378 {
1379         struct cbq_sched_data *q = qdisc_priv(sch);
1380         struct nlattr *tb[TCA_CBQ_MAX + 1];
1381         struct tc_ratespec *r;
1382         int err;
1383
1384         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1385         if (err < 0)
1386                 return err;
1387
1388         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1389                 return -EINVAL;
1390
1391         r = nla_data(tb[TCA_CBQ_RATE]);
1392
1393         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1394                 return -EINVAL;
1395
1396         err = qdisc_class_hash_init(&q->clhash);
1397         if (err < 0)
1398                 goto put_rtab;
1399
1400         q->link.refcnt = 1;
1401         q->link.sibling = &q->link;
1402         q->link.common.classid = sch->handle;
1403         q->link.qdisc = sch;
1404         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1405                                             sch->handle)))
1406                 q->link.q = &noop_qdisc;
1407
1408         q->link.priority = TC_CBQ_MAXPRIO-1;
1409         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1410         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1411         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1412         q->link.overlimit = cbq_ovl_classic;
1413         q->link.allot = psched_mtu(sch->dev);
1414         q->link.quantum = q->link.allot;
1415         q->link.weight = q->link.R_tab->rate.rate;
1416
1417         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1418         q->link.avpkt = q->link.allot/2;
1419         q->link.minidle = -0x7FFFFFFF;
1420
1421         qdisc_watchdog_init(&q->watchdog, sch);
1422         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1423         q->delay_timer.function = cbq_undelay;
1424         q->toplevel = TC_CBQ_MAXLEVEL;
1425         q->now = psched_get_time();
1426         q->now_rt = q->now;
1427
1428         cbq_link_class(&q->link);
1429
1430         if (tb[TCA_CBQ_LSSOPT])
1431                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1432
1433         cbq_addprio(q, &q->link);
1434         return 0;
1435
1436 put_rtab:
1437         qdisc_put_rtab(q->link.R_tab);
1438         return err;
1439 }
1440
1441 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1442 {
1443         unsigned char *b = skb_tail_pointer(skb);
1444
1445         NLA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1446         return skb->len;
1447
1448 nla_put_failure:
1449         nlmsg_trim(skb, b);
1450         return -1;
1451 }
1452
1453 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1454 {
1455         unsigned char *b = skb_tail_pointer(skb);
1456         struct tc_cbq_lssopt opt;
1457
1458         opt.flags = 0;
1459         if (cl->borrow == NULL)
1460                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1461         if (cl->share == NULL)
1462                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1463         opt.ewma_log = cl->ewma_log;
1464         opt.level = cl->level;
1465         opt.avpkt = cl->avpkt;
1466         opt.maxidle = cl->maxidle;
1467         opt.minidle = (u32)(-cl->minidle);
1468         opt.offtime = cl->offtime;
1469         opt.change = ~0;
1470         NLA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1471         return skb->len;
1472
1473 nla_put_failure:
1474         nlmsg_trim(skb, b);
1475         return -1;
1476 }
1477
1478 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1479 {
1480         unsigned char *b = skb_tail_pointer(skb);
1481         struct tc_cbq_wrropt opt;
1482
1483         opt.flags = 0;
1484         opt.allot = cl->allot;
1485         opt.priority = cl->priority+1;
1486         opt.cpriority = cl->cpriority+1;
1487         opt.weight = cl->weight;
1488         NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1489         return skb->len;
1490
1491 nla_put_failure:
1492         nlmsg_trim(skb, b);
1493         return -1;
1494 }
1495
1496 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498         unsigned char *b = skb_tail_pointer(skb);
1499         struct tc_cbq_ovl opt;
1500
1501         opt.strategy = cl->ovl_strategy;
1502         opt.priority2 = cl->priority2+1;
1503         opt.pad = 0;
1504         opt.penalty = cl->penalty;
1505         NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1506         return skb->len;
1507
1508 nla_put_failure:
1509         nlmsg_trim(skb, b);
1510         return -1;
1511 }
1512
1513 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1514 {
1515         unsigned char *b = skb_tail_pointer(skb);
1516         struct tc_cbq_fopt opt;
1517
1518         if (cl->split || cl->defmap) {
1519                 opt.split = cl->split ? cl->split->common.classid : 0;
1520                 opt.defmap = cl->defmap;
1521                 opt.defchange = ~0;
1522                 NLA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1523         }
1524         return skb->len;
1525
1526 nla_put_failure:
1527         nlmsg_trim(skb, b);
1528         return -1;
1529 }
1530
1531 #ifdef CONFIG_NET_CLS_ACT
1532 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1533 {
1534         unsigned char *b = skb_tail_pointer(skb);
1535         struct tc_cbq_police opt;
1536
1537         if (cl->police) {
1538                 opt.police = cl->police;
1539                 opt.__res1 = 0;
1540                 opt.__res2 = 0;
1541                 NLA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1542         }
1543         return skb->len;
1544
1545 nla_put_failure:
1546         nlmsg_trim(skb, b);
1547         return -1;
1548 }
1549 #endif
1550
1551 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1552 {
1553         if (cbq_dump_lss(skb, cl) < 0 ||
1554             cbq_dump_rate(skb, cl) < 0 ||
1555             cbq_dump_wrr(skb, cl) < 0 ||
1556             cbq_dump_ovl(skb, cl) < 0 ||
1557 #ifdef CONFIG_NET_CLS_ACT
1558             cbq_dump_police(skb, cl) < 0 ||
1559 #endif
1560             cbq_dump_fopt(skb, cl) < 0)
1561                 return -1;
1562         return 0;
1563 }
1564
1565 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1566 {
1567         struct cbq_sched_data *q = qdisc_priv(sch);
1568         struct nlattr *nest;
1569
1570         nest = nla_nest_start(skb, TCA_OPTIONS);
1571         if (nest == NULL)
1572                 goto nla_put_failure;
1573         if (cbq_dump_attr(skb, &q->link) < 0)
1574                 goto nla_put_failure;
1575         nla_nest_end(skb, nest);
1576         return skb->len;
1577
1578 nla_put_failure:
1579         nla_nest_cancel(skb, nest);
1580         return -1;
1581 }
1582
1583 static int
1584 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1585 {
1586         struct cbq_sched_data *q = qdisc_priv(sch);
1587
1588         q->link.xstats.avgidle = q->link.avgidle;
1589         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1590 }
1591
1592 static int
1593 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1594                struct sk_buff *skb, struct tcmsg *tcm)
1595 {
1596         struct cbq_class *cl = (struct cbq_class*)arg;
1597         struct nlattr *nest;
1598
1599         if (cl->tparent)
1600                 tcm->tcm_parent = cl->tparent->common.classid;
1601         else
1602                 tcm->tcm_parent = TC_H_ROOT;
1603         tcm->tcm_handle = cl->common.classid;
1604         tcm->tcm_info = cl->q->handle;
1605
1606         nest = nla_nest_start(skb, TCA_OPTIONS);
1607         if (nest == NULL)
1608                 goto nla_put_failure;
1609         if (cbq_dump_attr(skb, cl) < 0)
1610                 goto nla_put_failure;
1611         nla_nest_end(skb, nest);
1612         return skb->len;
1613
1614 nla_put_failure:
1615         nla_nest_cancel(skb, nest);
1616         return -1;
1617 }
1618
1619 static int
1620 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1621         struct gnet_dump *d)
1622 {
1623         struct cbq_sched_data *q = qdisc_priv(sch);
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625
1626         cl->qstats.qlen = cl->q->q.qlen;
1627         cl->xstats.avgidle = cl->avgidle;
1628         cl->xstats.undertime = 0;
1629
1630         if (cl->undertime != PSCHED_PASTPERFECT)
1631                 cl->xstats.undertime = cl->undertime - q->now;
1632
1633         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1634             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1635             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1636                 return -1;
1637
1638         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1639 }
1640
1641 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1642                      struct Qdisc **old)
1643 {
1644         struct cbq_class *cl = (struct cbq_class*)arg;
1645
1646         if (cl) {
1647                 if (new == NULL) {
1648                         new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1649                                                 cl->common.classid);
1650                         if (new == NULL)
1651                                 return -ENOBUFS;
1652                 } else {
1653 #ifdef CONFIG_NET_CLS_ACT
1654                         if (cl->police == TC_POLICE_RECLASSIFY)
1655                                 new->reshape_fail = cbq_reshape_fail;
1656 #endif
1657                 }
1658                 sch_tree_lock(sch);
1659                 *old = xchg(&cl->q, new);
1660                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1661                 qdisc_reset(*old);
1662                 sch_tree_unlock(sch);
1663
1664                 return 0;
1665         }
1666         return -ENOENT;
1667 }
1668
1669 static struct Qdisc *
1670 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1671 {
1672         struct cbq_class *cl = (struct cbq_class*)arg;
1673
1674         return cl ? cl->q : NULL;
1675 }
1676
1677 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1678 {
1679         struct cbq_class *cl = (struct cbq_class *)arg;
1680
1681         if (cl->q->q.qlen == 0)
1682                 cbq_deactivate_class(cl);
1683 }
1684
1685 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1686 {
1687         struct cbq_sched_data *q = qdisc_priv(sch);
1688         struct cbq_class *cl = cbq_class_lookup(q, classid);
1689
1690         if (cl) {
1691                 cl->refcnt++;
1692                 return (unsigned long)cl;
1693         }
1694         return 0;
1695 }
1696
1697 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1698 {
1699         struct cbq_sched_data *q = qdisc_priv(sch);
1700
1701         BUG_TRAP(!cl->filters);
1702
1703         tcf_destroy_chain(&cl->filter_list);
1704         qdisc_destroy(cl->q);
1705         qdisc_put_rtab(cl->R_tab);
1706         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1707         if (cl != &q->link)
1708                 kfree(cl);
1709 }
1710
1711 static void
1712 cbq_destroy(struct Qdisc* sch)
1713 {
1714         struct cbq_sched_data *q = qdisc_priv(sch);
1715         struct hlist_node *n, *next;
1716         struct cbq_class *cl;
1717         unsigned h;
1718
1719 #ifdef CONFIG_NET_CLS_ACT
1720         q->rx_class = NULL;
1721 #endif
1722         /*
1723          * Filters must be destroyed first because we don't destroy the
1724          * classes from root to leafs which means that filters can still
1725          * be bound to classes which have been destroyed already. --TGR '04
1726          */
1727         for (h = 0; h < q->clhash.hashsize; h++) {
1728                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode)
1729                         tcf_destroy_chain(&cl->filter_list);
1730         }
1731         for (h = 0; h < q->clhash.hashsize; h++) {
1732                 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[h],
1733                                           common.hnode)
1734                         cbq_destroy_class(sch, cl);
1735         }
1736         qdisc_class_hash_destroy(&q->clhash);
1737 }
1738
1739 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1740 {
1741         struct cbq_class *cl = (struct cbq_class*)arg;
1742
1743         if (--cl->refcnt == 0) {
1744 #ifdef CONFIG_NET_CLS_ACT
1745                 struct cbq_sched_data *q = qdisc_priv(sch);
1746
1747                 spin_lock_bh(&sch->dev->queue_lock);
1748                 if (q->rx_class == cl)
1749                         q->rx_class = NULL;
1750                 spin_unlock_bh(&sch->dev->queue_lock);
1751 #endif
1752
1753                 cbq_destroy_class(sch, cl);
1754         }
1755 }
1756
1757 static int
1758 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1759                  unsigned long *arg)
1760 {
1761         int err;
1762         struct cbq_sched_data *q = qdisc_priv(sch);
1763         struct cbq_class *cl = (struct cbq_class*)*arg;
1764         struct nlattr *opt = tca[TCA_OPTIONS];
1765         struct nlattr *tb[TCA_CBQ_MAX + 1];
1766         struct cbq_class *parent;
1767         struct qdisc_rate_table *rtab = NULL;
1768
1769         if (opt == NULL)
1770                 return -EINVAL;
1771
1772         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1773         if (err < 0)
1774                 return err;
1775
1776         if (cl) {
1777                 /* Check parent */
1778                 if (parentid) {
1779                         if (cl->tparent &&
1780                             cl->tparent->common.classid != parentid)
1781                                 return -EINVAL;
1782                         if (!cl->tparent && parentid != TC_H_ROOT)
1783                                 return -EINVAL;
1784                 }
1785
1786                 if (tb[TCA_CBQ_RATE]) {
1787                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1788                         if (rtab == NULL)
1789                                 return -EINVAL;
1790                 }
1791
1792                 /* Change class parameters */
1793                 sch_tree_lock(sch);
1794
1795                 if (cl->next_alive != NULL)
1796                         cbq_deactivate_class(cl);
1797
1798                 if (rtab) {
1799                         rtab = xchg(&cl->R_tab, rtab);
1800                         qdisc_put_rtab(rtab);
1801                 }
1802
1803                 if (tb[TCA_CBQ_LSSOPT])
1804                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1805
1806                 if (tb[TCA_CBQ_WRROPT]) {
1807                         cbq_rmprio(q, cl);
1808                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1809                 }
1810
1811                 if (tb[TCA_CBQ_OVL_STRATEGY])
1812                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1813
1814 #ifdef CONFIG_NET_CLS_ACT
1815                 if (tb[TCA_CBQ_POLICE])
1816                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1817 #endif
1818
1819                 if (tb[TCA_CBQ_FOPT])
1820                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1821
1822                 if (cl->q->q.qlen)
1823                         cbq_activate_class(cl);
1824
1825                 sch_tree_unlock(sch);
1826
1827                 if (tca[TCA_RATE])
1828                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1829                                               &sch->dev->queue_lock,
1830                                               tca[TCA_RATE]);
1831                 return 0;
1832         }
1833
1834         if (parentid == TC_H_ROOT)
1835                 return -EINVAL;
1836
1837         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1838             tb[TCA_CBQ_LSSOPT] == NULL)
1839                 return -EINVAL;
1840
1841         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1842         if (rtab == NULL)
1843                 return -EINVAL;
1844
1845         if (classid) {
1846                 err = -EINVAL;
1847                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1848                         goto failure;
1849         } else {
1850                 int i;
1851                 classid = TC_H_MAKE(sch->handle,0x8000);
1852
1853                 for (i=0; i<0x8000; i++) {
1854                         if (++q->hgenerator >= 0x8000)
1855                                 q->hgenerator = 1;
1856                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1857                                 break;
1858                 }
1859                 err = -ENOSR;
1860                 if (i >= 0x8000)
1861                         goto failure;
1862                 classid = classid|q->hgenerator;
1863         }
1864
1865         parent = &q->link;
1866         if (parentid) {
1867                 parent = cbq_class_lookup(q, parentid);
1868                 err = -EINVAL;
1869                 if (parent == NULL)
1870                         goto failure;
1871         }
1872
1873         err = -ENOBUFS;
1874         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1875         if (cl == NULL)
1876                 goto failure;
1877         cl->R_tab = rtab;
1878         rtab = NULL;
1879         cl->refcnt = 1;
1880         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1881                 cl->q = &noop_qdisc;
1882         cl->common.classid = classid;
1883         cl->tparent = parent;
1884         cl->qdisc = sch;
1885         cl->allot = parent->allot;
1886         cl->quantum = cl->allot;
1887         cl->weight = cl->R_tab->rate.rate;
1888
1889         sch_tree_lock(sch);
1890         cbq_link_class(cl);
1891         cl->borrow = cl->tparent;
1892         if (cl->tparent != &q->link)
1893                 cl->share = cl->tparent;
1894         cbq_adjust_levels(parent);
1895         cl->minidle = -0x7FFFFFFF;
1896         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1897         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1898         if (cl->ewma_log==0)
1899                 cl->ewma_log = q->link.ewma_log;
1900         if (cl->maxidle==0)
1901                 cl->maxidle = q->link.maxidle;
1902         if (cl->avpkt==0)
1903                 cl->avpkt = q->link.avpkt;
1904         cl->overlimit = cbq_ovl_classic;
1905         if (tb[TCA_CBQ_OVL_STRATEGY])
1906                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1907 #ifdef CONFIG_NET_CLS_ACT
1908         if (tb[TCA_CBQ_POLICE])
1909                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1910 #endif
1911         if (tb[TCA_CBQ_FOPT])
1912                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1913         sch_tree_unlock(sch);
1914
1915         qdisc_class_hash_grow(sch, &q->clhash);
1916
1917         if (tca[TCA_RATE])
1918                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1919                                   &sch->dev->queue_lock, tca[TCA_RATE]);
1920
1921         *arg = (unsigned long)cl;
1922         return 0;
1923
1924 failure:
1925         qdisc_put_rtab(rtab);
1926         return err;
1927 }
1928
1929 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1930 {
1931         struct cbq_sched_data *q = qdisc_priv(sch);
1932         struct cbq_class *cl = (struct cbq_class*)arg;
1933         unsigned int qlen;
1934
1935         if (cl->filters || cl->children || cl == &q->link)
1936                 return -EBUSY;
1937
1938         sch_tree_lock(sch);
1939
1940         qlen = cl->q->q.qlen;
1941         qdisc_reset(cl->q);
1942         qdisc_tree_decrease_qlen(cl->q, qlen);
1943
1944         if (cl->next_alive)
1945                 cbq_deactivate_class(cl);
1946
1947         if (q->tx_borrowed == cl)
1948                 q->tx_borrowed = q->tx_class;
1949         if (q->tx_class == cl) {
1950                 q->tx_class = NULL;
1951                 q->tx_borrowed = NULL;
1952         }
1953 #ifdef CONFIG_NET_CLS_ACT
1954         if (q->rx_class == cl)
1955                 q->rx_class = NULL;
1956 #endif
1957
1958         cbq_unlink_class(cl);
1959         cbq_adjust_levels(cl->tparent);
1960         cl->defmap = 0;
1961         cbq_sync_defmap(cl);
1962
1963         cbq_rmprio(q, cl);
1964         sch_tree_unlock(sch);
1965
1966         if (--cl->refcnt == 0)
1967                 cbq_destroy_class(sch, cl);
1968
1969         return 0;
1970 }
1971
1972 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1973 {
1974         struct cbq_sched_data *q = qdisc_priv(sch);
1975         struct cbq_class *cl = (struct cbq_class *)arg;
1976
1977         if (cl == NULL)
1978                 cl = &q->link;
1979
1980         return &cl->filter_list;
1981 }
1982
1983 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1984                                      u32 classid)
1985 {
1986         struct cbq_sched_data *q = qdisc_priv(sch);
1987         struct cbq_class *p = (struct cbq_class*)parent;
1988         struct cbq_class *cl = cbq_class_lookup(q, classid);
1989
1990         if (cl) {
1991                 if (p && p->level <= cl->level)
1992                         return 0;
1993                 cl->filters++;
1994                 return (unsigned long)cl;
1995         }
1996         return 0;
1997 }
1998
1999 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2000 {
2001         struct cbq_class *cl = (struct cbq_class*)arg;
2002
2003         cl->filters--;
2004 }
2005
2006 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2007 {
2008         struct cbq_sched_data *q = qdisc_priv(sch);
2009         struct cbq_class *cl;
2010         struct hlist_node *n;
2011         unsigned h;
2012
2013         if (arg->stop)
2014                 return;
2015
2016         for (h = 0; h < q->clhash.hashsize; h++) {
2017                 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
2018                         if (arg->count < arg->skip) {
2019                                 arg->count++;
2020                                 continue;
2021                         }
2022                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023                                 arg->stop = 1;
2024                                 return;
2025                         }
2026                         arg->count++;
2027                 }
2028         }
2029 }
2030
2031 static const struct Qdisc_class_ops cbq_class_ops = {
2032         .graft          =       cbq_graft,
2033         .leaf           =       cbq_leaf,
2034         .qlen_notify    =       cbq_qlen_notify,
2035         .get            =       cbq_get,
2036         .put            =       cbq_put,
2037         .change         =       cbq_change_class,
2038         .delete         =       cbq_delete,
2039         .walk           =       cbq_walk,
2040         .tcf_chain      =       cbq_find_tcf,
2041         .bind_tcf       =       cbq_bind_filter,
2042         .unbind_tcf     =       cbq_unbind_filter,
2043         .dump           =       cbq_dump_class,
2044         .dump_stats     =       cbq_dump_class_stats,
2045 };
2046
2047 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048         .next           =       NULL,
2049         .cl_ops         =       &cbq_class_ops,
2050         .id             =       "cbq",
2051         .priv_size      =       sizeof(struct cbq_sched_data),
2052         .enqueue        =       cbq_enqueue,
2053         .dequeue        =       cbq_dequeue,
2054         .requeue        =       cbq_requeue,
2055         .drop           =       cbq_drop,
2056         .init           =       cbq_init,
2057         .reset          =       cbq_reset,
2058         .destroy        =       cbq_destroy,
2059         .change         =       NULL,
2060         .dump           =       cbq_dump,
2061         .dump_stats     =       cbq_dump_stats,
2062         .owner          =       THIS_MODULE,
2063 };
2064
2065 static int __init cbq_module_init(void)
2066 {
2067         return register_qdisc(&cbq_qdisc_ops);
2068 }
2069 static void __exit cbq_module_exit(void)
2070 {
2071         unregister_qdisc(&cbq_qdisc_ops);
2072 }
2073 module_init(cbq_module_init)
2074 module_exit(cbq_module_exit)
2075 MODULE_LICENSE("GPL");