RDMA/cxgb4: Remove kfifo usage
[firefly-linux-kernel-4.4.55.git] / drivers / infiniband / hw / cxgb4 / device.c
1 /*
2  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/module.h>
33 #include <linux/moduleparam.h>
34 #include <linux/debugfs.h>
35
36 #include <rdma/ib_verbs.h>
37
38 #include "iw_cxgb4.h"
39
40 #define DRV_VERSION "0.1"
41
42 MODULE_AUTHOR("Steve Wise");
43 MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
44 MODULE_LICENSE("Dual BSD/GPL");
45 MODULE_VERSION(DRV_VERSION);
46
47 struct uld_ctx {
48         struct list_head entry;
49         struct cxgb4_lld_info lldi;
50         struct c4iw_dev *dev;
51 };
52
53 static LIST_HEAD(uld_ctx_list);
54 static DEFINE_MUTEX(dev_mutex);
55
56 static struct dentry *c4iw_debugfs_root;
57
58 struct c4iw_debugfs_data {
59         struct c4iw_dev *devp;
60         char *buf;
61         int bufsize;
62         int pos;
63 };
64
65 static int count_idrs(int id, void *p, void *data)
66 {
67         int *countp = data;
68
69         *countp = *countp + 1;
70         return 0;
71 }
72
73 static ssize_t debugfs_read(struct file *file, char __user *buf, size_t count,
74                             loff_t *ppos)
75 {
76         struct c4iw_debugfs_data *d = file->private_data;
77
78         return simple_read_from_buffer(buf, count, ppos, d->buf, d->pos);
79 }
80
81 static int dump_qp(int id, void *p, void *data)
82 {
83         struct c4iw_qp *qp = p;
84         struct c4iw_debugfs_data *qpd = data;
85         int space;
86         int cc;
87
88         if (id != qp->wq.sq.qid)
89                 return 0;
90
91         space = qpd->bufsize - qpd->pos - 1;
92         if (space == 0)
93                 return 1;
94
95         if (qp->ep)
96                 cc = snprintf(qpd->buf + qpd->pos, space,
97                              "qp sq id %u rq id %u state %u onchip %u "
98                              "ep tid %u state %u %pI4:%u->%pI4:%u\n",
99                              qp->wq.sq.qid, qp->wq.rq.qid, (int)qp->attr.state,
100                              qp->wq.sq.flags & T4_SQ_ONCHIP,
101                              qp->ep->hwtid, (int)qp->ep->com.state,
102                              &qp->ep->com.local_addr.sin_addr.s_addr,
103                              ntohs(qp->ep->com.local_addr.sin_port),
104                              &qp->ep->com.remote_addr.sin_addr.s_addr,
105                              ntohs(qp->ep->com.remote_addr.sin_port));
106         else
107                 cc = snprintf(qpd->buf + qpd->pos, space,
108                              "qp sq id %u rq id %u state %u onchip %u\n",
109                               qp->wq.sq.qid, qp->wq.rq.qid,
110                               (int)qp->attr.state,
111                               qp->wq.sq.flags & T4_SQ_ONCHIP);
112         if (cc < space)
113                 qpd->pos += cc;
114         return 0;
115 }
116
117 static int qp_release(struct inode *inode, struct file *file)
118 {
119         struct c4iw_debugfs_data *qpd = file->private_data;
120         if (!qpd) {
121                 printk(KERN_INFO "%s null qpd?\n", __func__);
122                 return 0;
123         }
124         vfree(qpd->buf);
125         kfree(qpd);
126         return 0;
127 }
128
129 static int qp_open(struct inode *inode, struct file *file)
130 {
131         struct c4iw_debugfs_data *qpd;
132         int ret = 0;
133         int count = 1;
134
135         qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
136         if (!qpd) {
137                 ret = -ENOMEM;
138                 goto out;
139         }
140         qpd->devp = inode->i_private;
141         qpd->pos = 0;
142
143         spin_lock_irq(&qpd->devp->lock);
144         idr_for_each(&qpd->devp->qpidr, count_idrs, &count);
145         spin_unlock_irq(&qpd->devp->lock);
146
147         qpd->bufsize = count * 128;
148         qpd->buf = vmalloc(qpd->bufsize);
149         if (!qpd->buf) {
150                 ret = -ENOMEM;
151                 goto err1;
152         }
153
154         spin_lock_irq(&qpd->devp->lock);
155         idr_for_each(&qpd->devp->qpidr, dump_qp, qpd);
156         spin_unlock_irq(&qpd->devp->lock);
157
158         qpd->buf[qpd->pos++] = 0;
159         file->private_data = qpd;
160         goto out;
161 err1:
162         kfree(qpd);
163 out:
164         return ret;
165 }
166
167 static const struct file_operations qp_debugfs_fops = {
168         .owner   = THIS_MODULE,
169         .open    = qp_open,
170         .release = qp_release,
171         .read    = debugfs_read,
172         .llseek  = default_llseek,
173 };
174
175 static int dump_stag(int id, void *p, void *data)
176 {
177         struct c4iw_debugfs_data *stagd = data;
178         int space;
179         int cc;
180
181         space = stagd->bufsize - stagd->pos - 1;
182         if (space == 0)
183                 return 1;
184
185         cc = snprintf(stagd->buf + stagd->pos, space, "0x%x\n", id<<8);
186         if (cc < space)
187                 stagd->pos += cc;
188         return 0;
189 }
190
191 static int stag_release(struct inode *inode, struct file *file)
192 {
193         struct c4iw_debugfs_data *stagd = file->private_data;
194         if (!stagd) {
195                 printk(KERN_INFO "%s null stagd?\n", __func__);
196                 return 0;
197         }
198         kfree(stagd->buf);
199         kfree(stagd);
200         return 0;
201 }
202
203 static int stag_open(struct inode *inode, struct file *file)
204 {
205         struct c4iw_debugfs_data *stagd;
206         int ret = 0;
207         int count = 1;
208
209         stagd = kmalloc(sizeof *stagd, GFP_KERNEL);
210         if (!stagd) {
211                 ret = -ENOMEM;
212                 goto out;
213         }
214         stagd->devp = inode->i_private;
215         stagd->pos = 0;
216
217         spin_lock_irq(&stagd->devp->lock);
218         idr_for_each(&stagd->devp->mmidr, count_idrs, &count);
219         spin_unlock_irq(&stagd->devp->lock);
220
221         stagd->bufsize = count * sizeof("0x12345678\n");
222         stagd->buf = kmalloc(stagd->bufsize, GFP_KERNEL);
223         if (!stagd->buf) {
224                 ret = -ENOMEM;
225                 goto err1;
226         }
227
228         spin_lock_irq(&stagd->devp->lock);
229         idr_for_each(&stagd->devp->mmidr, dump_stag, stagd);
230         spin_unlock_irq(&stagd->devp->lock);
231
232         stagd->buf[stagd->pos++] = 0;
233         file->private_data = stagd;
234         goto out;
235 err1:
236         kfree(stagd);
237 out:
238         return ret;
239 }
240
241 static const struct file_operations stag_debugfs_fops = {
242         .owner   = THIS_MODULE,
243         .open    = stag_open,
244         .release = stag_release,
245         .read    = debugfs_read,
246         .llseek  = default_llseek,
247 };
248
249 static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
250
251 static int stats_show(struct seq_file *seq, void *v)
252 {
253         struct c4iw_dev *dev = seq->private;
254
255         seq_printf(seq, "   Object: %10s %10s %10s %10s\n", "Total", "Current",
256                    "Max", "Fail");
257         seq_printf(seq, "     PDID: %10llu %10llu %10llu %10llu\n",
258                         dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
259                         dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
260         seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
261                         dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
262                         dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
263         seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
264                         dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
265                         dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
266         seq_printf(seq, "   PBLMEM: %10llu %10llu %10llu %10llu\n",
267                         dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
268                         dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
269         seq_printf(seq, "   RQTMEM: %10llu %10llu %10llu %10llu\n",
270                         dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
271                         dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
272         seq_printf(seq, "  OCQPMEM: %10llu %10llu %10llu %10llu\n",
273                         dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
274                         dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
275         seq_printf(seq, "  DB FULL: %10llu\n", dev->rdev.stats.db_full);
276         seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
277         seq_printf(seq, "  DB DROP: %10llu\n", dev->rdev.stats.db_drop);
278         seq_printf(seq, " DB State: %s Transitions %llu\n",
279                    db_state_str[dev->db_state],
280                    dev->rdev.stats.db_state_transitions);
281         return 0;
282 }
283
284 static int stats_open(struct inode *inode, struct file *file)
285 {
286         return single_open(file, stats_show, inode->i_private);
287 }
288
289 static ssize_t stats_clear(struct file *file, const char __user *buf,
290                 size_t count, loff_t *pos)
291 {
292         struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
293
294         mutex_lock(&dev->rdev.stats.lock);
295         dev->rdev.stats.pd.max = 0;
296         dev->rdev.stats.pd.fail = 0;
297         dev->rdev.stats.qid.max = 0;
298         dev->rdev.stats.qid.fail = 0;
299         dev->rdev.stats.stag.max = 0;
300         dev->rdev.stats.stag.fail = 0;
301         dev->rdev.stats.pbl.max = 0;
302         dev->rdev.stats.pbl.fail = 0;
303         dev->rdev.stats.rqt.max = 0;
304         dev->rdev.stats.rqt.fail = 0;
305         dev->rdev.stats.ocqp.max = 0;
306         dev->rdev.stats.ocqp.fail = 0;
307         dev->rdev.stats.db_full = 0;
308         dev->rdev.stats.db_empty = 0;
309         dev->rdev.stats.db_drop = 0;
310         dev->rdev.stats.db_state_transitions = 0;
311         mutex_unlock(&dev->rdev.stats.lock);
312         return count;
313 }
314
315 static const struct file_operations stats_debugfs_fops = {
316         .owner   = THIS_MODULE,
317         .open    = stats_open,
318         .release = single_release,
319         .read    = seq_read,
320         .llseek  = seq_lseek,
321         .write   = stats_clear,
322 };
323
324 static int setup_debugfs(struct c4iw_dev *devp)
325 {
326         struct dentry *de;
327
328         if (!devp->debugfs_root)
329                 return -1;
330
331         de = debugfs_create_file("qps", S_IWUSR, devp->debugfs_root,
332                                  (void *)devp, &qp_debugfs_fops);
333         if (de && de->d_inode)
334                 de->d_inode->i_size = 4096;
335
336         de = debugfs_create_file("stags", S_IWUSR, devp->debugfs_root,
337                                  (void *)devp, &stag_debugfs_fops);
338         if (de && de->d_inode)
339                 de->d_inode->i_size = 4096;
340
341         de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
342                         (void *)devp, &stats_debugfs_fops);
343         if (de && de->d_inode)
344                 de->d_inode->i_size = 4096;
345
346         return 0;
347 }
348
349 void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
350                                struct c4iw_dev_ucontext *uctx)
351 {
352         struct list_head *pos, *nxt;
353         struct c4iw_qid_list *entry;
354
355         mutex_lock(&uctx->lock);
356         list_for_each_safe(pos, nxt, &uctx->qpids) {
357                 entry = list_entry(pos, struct c4iw_qid_list, entry);
358                 list_del_init(&entry->entry);
359                 if (!(entry->qid & rdev->qpmask)) {
360                         c4iw_put_resource(&rdev->resource.qid_table,
361                                           entry->qid);
362                         mutex_lock(&rdev->stats.lock);
363                         rdev->stats.qid.cur -= rdev->qpmask + 1;
364                         mutex_unlock(&rdev->stats.lock);
365                 }
366                 kfree(entry);
367         }
368
369         list_for_each_safe(pos, nxt, &uctx->qpids) {
370                 entry = list_entry(pos, struct c4iw_qid_list, entry);
371                 list_del_init(&entry->entry);
372                 kfree(entry);
373         }
374         mutex_unlock(&uctx->lock);
375 }
376
377 void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
378                             struct c4iw_dev_ucontext *uctx)
379 {
380         INIT_LIST_HEAD(&uctx->qpids);
381         INIT_LIST_HEAD(&uctx->cqids);
382         mutex_init(&uctx->lock);
383 }
384
385 /* Caller takes care of locking if needed */
386 static int c4iw_rdev_open(struct c4iw_rdev *rdev)
387 {
388         int err;
389
390         c4iw_init_dev_ucontext(rdev, &rdev->uctx);
391
392         /*
393          * qpshift is the number of bits to shift the qpid left in order
394          * to get the correct address of the doorbell for that qp.
395          */
396         rdev->qpshift = PAGE_SHIFT - ilog2(rdev->lldi.udb_density);
397         rdev->qpmask = rdev->lldi.udb_density - 1;
398         rdev->cqshift = PAGE_SHIFT - ilog2(rdev->lldi.ucq_density);
399         rdev->cqmask = rdev->lldi.ucq_density - 1;
400         PDBG("%s dev %s stag start 0x%0x size 0x%0x num stags %d "
401              "pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x "
402              "qp qid start %u size %u cq qid start %u size %u\n",
403              __func__, pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
404              rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
405              rdev->lldi.vr->pbl.start,
406              rdev->lldi.vr->pbl.size, rdev->lldi.vr->rq.start,
407              rdev->lldi.vr->rq.size,
408              rdev->lldi.vr->qp.start,
409              rdev->lldi.vr->qp.size,
410              rdev->lldi.vr->cq.start,
411              rdev->lldi.vr->cq.size);
412         PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu "
413              "qpmask 0x%x cqshift %lu cqmask 0x%x\n",
414              (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
415              (void *)pci_resource_start(rdev->lldi.pdev, 2),
416              rdev->lldi.db_reg,
417              rdev->lldi.gts_reg,
418              rdev->qpshift, rdev->qpmask,
419              rdev->cqshift, rdev->cqmask);
420
421         if (c4iw_num_stags(rdev) == 0) {
422                 err = -EINVAL;
423                 goto err1;
424         }
425
426         rdev->stats.pd.total = T4_MAX_NUM_PD;
427         rdev->stats.stag.total = rdev->lldi.vr->stag.size;
428         rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
429         rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
430         rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
431         rdev->stats.qid.total = rdev->lldi.vr->qp.size;
432
433         err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
434         if (err) {
435                 printk(KERN_ERR MOD "error %d initializing resources\n", err);
436                 goto err1;
437         }
438         err = c4iw_pblpool_create(rdev);
439         if (err) {
440                 printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
441                 goto err2;
442         }
443         err = c4iw_rqtpool_create(rdev);
444         if (err) {
445                 printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
446                 goto err3;
447         }
448         err = c4iw_ocqp_pool_create(rdev);
449         if (err) {
450                 printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
451                 goto err4;
452         }
453         return 0;
454 err4:
455         c4iw_rqtpool_destroy(rdev);
456 err3:
457         c4iw_pblpool_destroy(rdev);
458 err2:
459         c4iw_destroy_resource(&rdev->resource);
460 err1:
461         return err;
462 }
463
464 static void c4iw_rdev_close(struct c4iw_rdev *rdev)
465 {
466         c4iw_pblpool_destroy(rdev);
467         c4iw_rqtpool_destroy(rdev);
468         c4iw_destroy_resource(&rdev->resource);
469 }
470
471 static void c4iw_dealloc(struct uld_ctx *ctx)
472 {
473         c4iw_rdev_close(&ctx->dev->rdev);
474         idr_destroy(&ctx->dev->cqidr);
475         idr_destroy(&ctx->dev->qpidr);
476         idr_destroy(&ctx->dev->mmidr);
477         iounmap(ctx->dev->rdev.oc_mw_kva);
478         ib_dealloc_device(&ctx->dev->ibdev);
479         ctx->dev = NULL;
480 }
481
482 static void c4iw_remove(struct uld_ctx *ctx)
483 {
484         PDBG("%s c4iw_dev %p\n", __func__,  ctx->dev);
485         c4iw_unregister_device(ctx->dev);
486         c4iw_dealloc(ctx);
487 }
488
489 static int rdma_supported(const struct cxgb4_lld_info *infop)
490 {
491         return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
492                infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
493                infop->vr->cq.size > 0 && infop->vr->ocq.size > 0;
494 }
495
496 static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
497 {
498         struct c4iw_dev *devp;
499         int ret;
500
501         if (!rdma_supported(infop)) {
502                 printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
503                        pci_name(infop->pdev));
504                 return ERR_PTR(-ENOSYS);
505         }
506         devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
507         if (!devp) {
508                 printk(KERN_ERR MOD "Cannot allocate ib device\n");
509                 return ERR_PTR(-ENOMEM);
510         }
511         devp->rdev.lldi = *infop;
512
513         devp->rdev.oc_mw_pa = pci_resource_start(devp->rdev.lldi.pdev, 2) +
514                 (pci_resource_len(devp->rdev.lldi.pdev, 2) -
515                  roundup_pow_of_two(devp->rdev.lldi.vr->ocq.size));
516         devp->rdev.oc_mw_kva = ioremap_wc(devp->rdev.oc_mw_pa,
517                                                devp->rdev.lldi.vr->ocq.size);
518
519         PDBG(KERN_INFO MOD "ocq memory: "
520                "hw_start 0x%x size %u mw_pa 0x%lx mw_kva %p\n",
521                devp->rdev.lldi.vr->ocq.start, devp->rdev.lldi.vr->ocq.size,
522                devp->rdev.oc_mw_pa, devp->rdev.oc_mw_kva);
523
524         ret = c4iw_rdev_open(&devp->rdev);
525         if (ret) {
526                 printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
527                 ib_dealloc_device(&devp->ibdev);
528                 return ERR_PTR(ret);
529         }
530
531         idr_init(&devp->cqidr);
532         idr_init(&devp->qpidr);
533         idr_init(&devp->mmidr);
534         spin_lock_init(&devp->lock);
535         mutex_init(&devp->rdev.stats.lock);
536         mutex_init(&devp->db_mutex);
537
538         if (c4iw_debugfs_root) {
539                 devp->debugfs_root = debugfs_create_dir(
540                                         pci_name(devp->rdev.lldi.pdev),
541                                         c4iw_debugfs_root);
542                 setup_debugfs(devp);
543         }
544         return devp;
545 }
546
547 static void *c4iw_uld_add(const struct cxgb4_lld_info *infop)
548 {
549         struct uld_ctx *ctx;
550         static int vers_printed;
551         int i;
552
553         if (!vers_printed++)
554                 printk(KERN_INFO MOD "Chelsio T4 RDMA Driver - version %s\n",
555                        DRV_VERSION);
556
557         ctx = kzalloc(sizeof *ctx, GFP_KERNEL);
558         if (!ctx) {
559                 ctx = ERR_PTR(-ENOMEM);
560                 goto out;
561         }
562         ctx->lldi = *infop;
563
564         PDBG("%s found device %s nchan %u nrxq %u ntxq %u nports %u\n",
565              __func__, pci_name(ctx->lldi.pdev),
566              ctx->lldi.nchan, ctx->lldi.nrxq,
567              ctx->lldi.ntxq, ctx->lldi.nports);
568
569         mutex_lock(&dev_mutex);
570         list_add_tail(&ctx->entry, &uld_ctx_list);
571         mutex_unlock(&dev_mutex);
572
573         for (i = 0; i < ctx->lldi.nrxq; i++)
574                 PDBG("rxqid[%u] %u\n", i, ctx->lldi.rxq_ids[i]);
575 out:
576         return ctx;
577 }
578
579 static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
580                         const struct pkt_gl *gl)
581 {
582         struct uld_ctx *ctx = handle;
583         struct c4iw_dev *dev = ctx->dev;
584         struct sk_buff *skb;
585         const struct cpl_act_establish *rpl;
586         unsigned int opcode;
587
588         if (gl == NULL) {
589                 /* omit RSS and rsp_ctrl at end of descriptor */
590                 unsigned int len = 64 - sizeof(struct rsp_ctrl) - 8;
591
592                 skb = alloc_skb(256, GFP_ATOMIC);
593                 if (!skb)
594                         goto nomem;
595                 __skb_put(skb, len);
596                 skb_copy_to_linear_data(skb, &rsp[1], len);
597         } else if (gl == CXGB4_MSG_AN) {
598                 const struct rsp_ctrl *rc = (void *)rsp;
599
600                 u32 qid = be32_to_cpu(rc->pldbuflen_qid);
601                 c4iw_ev_handler(dev, qid);
602                 return 0;
603         } else {
604                 skb = cxgb4_pktgl_to_skb(gl, 128, 128);
605                 if (unlikely(!skb))
606                         goto nomem;
607         }
608
609         rpl = cplhdr(skb);
610         opcode = rpl->ot.opcode;
611
612         if (c4iw_handlers[opcode])
613                 c4iw_handlers[opcode](dev, skb);
614         else
615                 printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__,
616                        opcode);
617
618         return 0;
619 nomem:
620         return -1;
621 }
622
623 static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
624 {
625         struct uld_ctx *ctx = handle;
626
627         PDBG("%s new_state %u\n", __func__, new_state);
628         switch (new_state) {
629         case CXGB4_STATE_UP:
630                 printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
631                 if (!ctx->dev) {
632                         int ret;
633
634                         ctx->dev = c4iw_alloc(&ctx->lldi);
635                         if (IS_ERR(ctx->dev)) {
636                                 printk(KERN_ERR MOD
637                                        "%s: initialization failed: %ld\n",
638                                        pci_name(ctx->lldi.pdev),
639                                        PTR_ERR(ctx->dev));
640                                 ctx->dev = NULL;
641                                 break;
642                         }
643                         ret = c4iw_register_device(ctx->dev);
644                         if (ret) {
645                                 printk(KERN_ERR MOD
646                                        "%s: RDMA registration failed: %d\n",
647                                        pci_name(ctx->lldi.pdev), ret);
648                                 c4iw_dealloc(ctx);
649                         }
650                 }
651                 break;
652         case CXGB4_STATE_DOWN:
653                 printk(KERN_INFO MOD "%s: Down\n",
654                        pci_name(ctx->lldi.pdev));
655                 if (ctx->dev)
656                         c4iw_remove(ctx);
657                 break;
658         case CXGB4_STATE_START_RECOVERY:
659                 printk(KERN_INFO MOD "%s: Fatal Error\n",
660                        pci_name(ctx->lldi.pdev));
661                 if (ctx->dev) {
662                         struct ib_event event;
663
664                         ctx->dev->rdev.flags |= T4_FATAL_ERROR;
665                         memset(&event, 0, sizeof event);
666                         event.event  = IB_EVENT_DEVICE_FATAL;
667                         event.device = &ctx->dev->ibdev;
668                         ib_dispatch_event(&event);
669                         c4iw_remove(ctx);
670                 }
671                 break;
672         case CXGB4_STATE_DETACH:
673                 printk(KERN_INFO MOD "%s: Detach\n",
674                        pci_name(ctx->lldi.pdev));
675                 if (ctx->dev)
676                         c4iw_remove(ctx);
677                 break;
678         }
679         return 0;
680 }
681
682 static int disable_qp_db(int id, void *p, void *data)
683 {
684         struct c4iw_qp *qp = p;
685
686         t4_disable_wq_db(&qp->wq);
687         return 0;
688 }
689
690 static void stop_queues(struct uld_ctx *ctx)
691 {
692         spin_lock_irq(&ctx->dev->lock);
693         if (ctx->dev->db_state == NORMAL) {
694                 ctx->dev->rdev.stats.db_state_transitions++;
695                 ctx->dev->db_state = FLOW_CONTROL;
696                 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
697         }
698         spin_unlock_irq(&ctx->dev->lock);
699 }
700
701 static int enable_qp_db(int id, void *p, void *data)
702 {
703         struct c4iw_qp *qp = p;
704
705         t4_enable_wq_db(&qp->wq);
706         return 0;
707 }
708
709 static void resume_queues(struct uld_ctx *ctx)
710 {
711         spin_lock_irq(&ctx->dev->lock);
712         if (ctx->dev->qpcnt <= db_fc_threshold &&
713             ctx->dev->db_state == FLOW_CONTROL) {
714                 ctx->dev->db_state = NORMAL;
715                 ctx->dev->rdev.stats.db_state_transitions++;
716                 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
717         }
718         spin_unlock_irq(&ctx->dev->lock);
719 }
720
721 struct qp_list {
722         unsigned idx;
723         struct c4iw_qp **qps;
724 };
725
726 static int add_and_ref_qp(int id, void *p, void *data)
727 {
728         struct qp_list *qp_listp = data;
729         struct c4iw_qp *qp = p;
730
731         c4iw_qp_add_ref(&qp->ibqp);
732         qp_listp->qps[qp_listp->idx++] = qp;
733         return 0;
734 }
735
736 static int count_qps(int id, void *p, void *data)
737 {
738         unsigned *countp = data;
739         (*countp)++;
740         return 0;
741 }
742
743 static void deref_qps(struct qp_list qp_list)
744 {
745         int idx;
746
747         for (idx = 0; idx < qp_list.idx; idx++)
748                 c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
749 }
750
751 static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
752 {
753         int idx;
754         int ret;
755
756         for (idx = 0; idx < qp_list->idx; idx++) {
757                 struct c4iw_qp *qp = qp_list->qps[idx];
758
759                 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
760                                           qp->wq.sq.qid,
761                                           t4_sq_host_wq_pidx(&qp->wq),
762                                           t4_sq_wq_size(&qp->wq));
763                 if (ret) {
764                         printk(KERN_ERR MOD "%s: Fatal error - "
765                                "DB overflow recovery failed - "
766                                "error syncing SQ qid %u\n",
767                                pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
768                         return;
769                 }
770
771                 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
772                                           qp->wq.rq.qid,
773                                           t4_rq_host_wq_pidx(&qp->wq),
774                                           t4_rq_wq_size(&qp->wq));
775
776                 if (ret) {
777                         printk(KERN_ERR MOD "%s: Fatal error - "
778                                "DB overflow recovery failed - "
779                                "error syncing RQ qid %u\n",
780                                pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
781                         return;
782                 }
783
784                 /* Wait for the dbfifo to drain */
785                 while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
786                         set_current_state(TASK_UNINTERRUPTIBLE);
787                         schedule_timeout(usecs_to_jiffies(10));
788                 }
789         }
790 }
791
792 static void recover_queues(struct uld_ctx *ctx)
793 {
794         int count = 0;
795         struct qp_list qp_list;
796         int ret;
797
798         /* lock out kernel db ringers */
799         mutex_lock(&ctx->dev->db_mutex);
800
801         /* put all queues in to recovery mode */
802         spin_lock_irq(&ctx->dev->lock);
803         ctx->dev->db_state = RECOVERY;
804         ctx->dev->rdev.stats.db_state_transitions++;
805         idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
806         spin_unlock_irq(&ctx->dev->lock);
807
808         /* slow everybody down */
809         set_current_state(TASK_UNINTERRUPTIBLE);
810         schedule_timeout(usecs_to_jiffies(1000));
811
812         /* Wait for the dbfifo to completely drain. */
813         while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
814                 set_current_state(TASK_UNINTERRUPTIBLE);
815                 schedule_timeout(usecs_to_jiffies(10));
816         }
817
818         /* flush the SGE contexts */
819         ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
820         if (ret) {
821                 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
822                        pci_name(ctx->lldi.pdev));
823                 goto out;
824         }
825
826         /* Count active queues so we can build a list of queues to recover */
827         spin_lock_irq(&ctx->dev->lock);
828         idr_for_each(&ctx->dev->qpidr, count_qps, &count);
829
830         qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
831         if (!qp_list.qps) {
832                 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
833                        pci_name(ctx->lldi.pdev));
834                 spin_unlock_irq(&ctx->dev->lock);
835                 goto out;
836         }
837         qp_list.idx = 0;
838
839         /* add and ref each qp so it doesn't get freed */
840         idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
841
842         spin_unlock_irq(&ctx->dev->lock);
843
844         /* now traverse the list in a safe context to recover the db state*/
845         recover_lost_dbs(ctx, &qp_list);
846
847         /* we're almost done!  deref the qps and clean up */
848         deref_qps(qp_list);
849         kfree(qp_list.qps);
850
851         /* Wait for the dbfifo to completely drain again */
852         while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
853                 set_current_state(TASK_UNINTERRUPTIBLE);
854                 schedule_timeout(usecs_to_jiffies(10));
855         }
856
857         /* resume the queues */
858         spin_lock_irq(&ctx->dev->lock);
859         if (ctx->dev->qpcnt > db_fc_threshold)
860                 ctx->dev->db_state = FLOW_CONTROL;
861         else {
862                 ctx->dev->db_state = NORMAL;
863                 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
864         }
865         ctx->dev->rdev.stats.db_state_transitions++;
866         spin_unlock_irq(&ctx->dev->lock);
867
868 out:
869         /* start up kernel db ringers again */
870         mutex_unlock(&ctx->dev->db_mutex);
871 }
872
873 static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
874 {
875         struct uld_ctx *ctx = handle;
876
877         switch (control) {
878         case CXGB4_CONTROL_DB_FULL:
879                 stop_queues(ctx);
880                 mutex_lock(&ctx->dev->rdev.stats.lock);
881                 ctx->dev->rdev.stats.db_full++;
882                 mutex_unlock(&ctx->dev->rdev.stats.lock);
883                 break;
884         case CXGB4_CONTROL_DB_EMPTY:
885                 resume_queues(ctx);
886                 mutex_lock(&ctx->dev->rdev.stats.lock);
887                 ctx->dev->rdev.stats.db_empty++;
888                 mutex_unlock(&ctx->dev->rdev.stats.lock);
889                 break;
890         case CXGB4_CONTROL_DB_DROP:
891                 recover_queues(ctx);
892                 mutex_lock(&ctx->dev->rdev.stats.lock);
893                 ctx->dev->rdev.stats.db_drop++;
894                 mutex_unlock(&ctx->dev->rdev.stats.lock);
895                 break;
896         default:
897                 printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
898                        pci_name(ctx->lldi.pdev), control);
899                 break;
900         }
901         return 0;
902 }
903
904 static struct cxgb4_uld_info c4iw_uld_info = {
905         .name = DRV_NAME,
906         .add = c4iw_uld_add,
907         .rx_handler = c4iw_uld_rx_handler,
908         .state_change = c4iw_uld_state_change,
909         .control = c4iw_uld_control,
910 };
911
912 static int __init c4iw_init_module(void)
913 {
914         int err;
915
916         err = c4iw_cm_init();
917         if (err)
918                 return err;
919
920         c4iw_debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
921         if (!c4iw_debugfs_root)
922                 printk(KERN_WARNING MOD
923                        "could not create debugfs entry, continuing\n");
924
925         cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
926
927         return 0;
928 }
929
930 static void __exit c4iw_exit_module(void)
931 {
932         struct uld_ctx *ctx, *tmp;
933
934         mutex_lock(&dev_mutex);
935         list_for_each_entry_safe(ctx, tmp, &uld_ctx_list, entry) {
936                 if (ctx->dev)
937                         c4iw_remove(ctx);
938                 kfree(ctx);
939         }
940         mutex_unlock(&dev_mutex);
941         cxgb4_unregister_uld(CXGB4_ULD_RDMA);
942         c4iw_cm_term();
943         debugfs_remove_recursive(c4iw_debugfs_root);
944 }
945
946 module_init(c4iw_init_module);
947 module_exit(c4iw_exit_module);