From 87116159517ecf6b9cf62a136f2935a63833c485 Mon Sep 17 00:00:00 2001 From: Andy Walls Date: Mon, 13 Apr 2009 22:42:43 -0300 Subject: [PATCH] V4L/DVB (11616): cx18: Add a work queue for deferring empty buffer handoffs to the firmware This change defers sending all CX18_CPU_DE_SET_MDL commands, for a stream with an ongoing capture, by adding a work queue to handle sending such commands when needed. This prevents any sleeps, caused by notifying the firmware of new usable buffers, when a V4L2 application read() is being satisfied or when an incoming buffer is processed by the cx18-NN-in work queue thread. Signed-off-by: Andy Walls Signed-off-by: Mauro Carvalho Chehab --- drivers/media/video/cx18/cx18-driver.c | 92 ++++++++++++++++++----- drivers/media/video/cx18/cx18-driver.h | 32 ++++++++ drivers/media/video/cx18/cx18-streams.c | 99 +++++++++++++++++++++++-- drivers/media/video/cx18/cx18-streams.h | 5 +- 4 files changed, 202 insertions(+), 26 deletions(-) diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c index 79750208e042..658cfbb1b97e 100644 --- a/drivers/media/video/cx18/cx18-driver.c +++ b/drivers/media/video/cx18/cx18-driver.c @@ -546,6 +546,47 @@ done: cx->card_i2c = cx->card->i2c; } +static int __devinit cx18_create_in_workq(struct cx18 *cx) +{ + snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in", + cx->v4l2_dev.name); + cx->in_work_queue = create_singlethread_workqueue(cx->in_workq_name); + if (cx->in_work_queue == NULL) { + CX18_ERR("Unable to create incoming mailbox handler thread\n"); + return -ENOMEM; + } + return 0; +} + +static int __devinit cx18_create_out_workq(struct cx18 *cx) +{ + snprintf(cx->out_workq_name, sizeof(cx->out_workq_name), "%s-out", + cx->v4l2_dev.name); + cx->out_work_queue = create_workqueue(cx->out_workq_name); + if (cx->out_work_queue == NULL) { + CX18_ERR("Unable to create outgoing mailbox handler threads\n"); + return -ENOMEM; + } + return 0; +} + +static void __devinit cx18_init_in_work_orders(struct cx18 *cx) +{ + int i; + for (i = 0; i < CX18_MAX_IN_WORK_ORDERS; i++) { + cx->in_work_order[i].cx = cx; + cx->in_work_order[i].str = cx->epu_debug_str; + INIT_WORK(&cx->in_work_order[i].work, cx18_in_work_handler); + } +} + +static void __devinit cx18_init_out_work_orders(struct cx18 *cx) +{ + int i; + for (i = 0; i < CX18_MAX_OUT_WORK_ORDERS; i++) + INIT_WORK(&cx->out_work_order[i].work, cx18_out_work_handler); +} + /* Precondition: the cx18 structure has been memset to 0. Only the dev and instance fields have been filled in. No assumptions on the card type may be made here (see cx18_init_struct2 @@ -553,7 +594,7 @@ done: */ static int __devinit cx18_init_struct1(struct cx18 *cx) { - int i; + int ret; cx->base_addr = pci_resource_start(cx->pci_dev, 0); @@ -562,20 +603,19 @@ static int __devinit cx18_init_struct1(struct cx18 *cx) mutex_init(&cx->epu2apu_mb_lock); mutex_init(&cx->epu2cpu_mb_lock); - snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in", - cx->v4l2_dev.name); - cx->in_work_queue = create_singlethread_workqueue(cx->in_workq_name); - if (cx->in_work_queue == NULL) { - CX18_ERR("Unable to create incoming mailbox handler thread\n"); - return -ENOMEM; - } + ret = cx18_create_out_workq(cx); + if (ret) + return ret; - for (i = 0; i < CX18_MAX_IN_WORK_ORDERS; i++) { - cx->in_work_order[i].cx = cx; - cx->in_work_order[i].str = cx->epu_debug_str; - INIT_WORK(&cx->in_work_order[i].work, cx18_in_work_handler); + ret = cx18_create_in_workq(cx); + if (ret) { + destroy_workqueue(cx->out_work_queue); + return ret; } + cx18_init_out_work_orders(cx); + cx18_init_in_work_orders(cx); + /* start counting open_id at 1 */ cx->open_id = 1; @@ -761,17 +801,17 @@ static int __devinit cx18_probe(struct pci_dev *pci_dev, retval = -ENODEV; goto err; } - if (cx18_init_struct1(cx)) { - retval = -ENOMEM; + + retval = cx18_init_struct1(cx); + if (retval) goto err; - } CX18_DEBUG_INFO("base addr: 0x%08x\n", cx->base_addr); /* PCI Device Setup */ retval = cx18_setup_pci(cx, pci_dev, pci_id); if (retval != 0) - goto free_workqueue; + goto free_workqueues; /* map io memory */ CX18_DEBUG_INFO("attempting ioremap at 0x%08x len 0x%08x\n", @@ -945,8 +985,9 @@ free_map: cx18_iounmap(cx); free_mem: release_mem_region(cx->base_addr, CX18_MEM_SIZE); -free_workqueue: +free_workqueues: destroy_workqueue(cx->in_work_queue); + destroy_workqueue(cx->out_work_queue); err: if (retval == 0) retval = -ENODEV; @@ -1075,15 +1116,26 @@ static void cx18_remove(struct pci_dev *pci_dev) if (atomic_read(&cx->tot_capturing) > 0) cx18_stop_all_captures(cx); - /* Interrupts */ + /* Stop interrupts that cause incoming work to be queued */ cx18_sw1_irq_disable(cx, IRQ_CPU_TO_EPU | IRQ_APU_TO_EPU); + + /* Incoming work can cause outgoing work, so clean up incoming first */ + cx18_cancel_in_work_orders(cx); + + /* + * An outgoing work order can have the only pointer to a dynamically + * allocated buffer, so we need to flush outgoing work and not just + * cancel it, so we don't lose the pointer and leak memory. + */ + flush_workqueue(cx->out_work_queue); + + /* Stop ack interrupts that may have been needed for work to finish */ cx18_sw2_irq_disable(cx, IRQ_CPU_TO_EPU_ACK | IRQ_APU_TO_EPU_ACK); cx18_halt_firmware(cx); - cx18_cancel_in_work_orders(cx); - destroy_workqueue(cx->in_work_queue); + destroy_workqueue(cx->out_work_queue); cx18_streams_cleanup(cx, 1); diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h index e6f42d0cb2b3..62dca432fdbb 100644 --- a/drivers/media/video/cx18/cx18-driver.h +++ b/drivers/media/video/cx18/cx18-driver.h @@ -254,6 +254,7 @@ struct cx18_options { #define CX18_F_S_INTERNAL_USE 5 /* this stream is used internally (sliced VBI processing) */ #define CX18_F_S_STREAMOFF 7 /* signal end of stream EOS */ #define CX18_F_S_APPL_IO 8 /* this stream is used read/written by an application */ +#define CX18_F_S_STOPPING 9 /* telling the fw to stop capturing */ /* per-cx18, i_flags */ #define CX18_F_I_LOADED_FW 0 /* Loaded firmware 1st time */ @@ -324,6 +325,33 @@ struct cx18_in_work_order { char *str; }; +/* + * There are 2 types of deferrable tasks that send messages out to the firmware: + * 1. Sending individual buffers back to the firmware + * 2. Sending as many free buffers for a stream from q_free as we can to the fw + * + * The worst case scenario for multiple simultaneous streams is + * TS, YUV, PCM, VBI, MPEG, and IDX all going at once. + * + * We try to load the firmware queue with as many free buffers as possible, + * whenever we get a buffer back for a stream. For the TS we return the single + * buffer to the firmware at that time as well. For all other streams, we + * return single buffers to the firmware as the application drains them. + * + * 6 streams * 2 sets of orders * (1 single buf + 1 load fw from q_free) + * = 24 work orders should cover our needs, provided the applications read + * at a fairly steady rate. If apps don't, we fall back to non-deferred + * operation, when no cx18_out_work_orders are available for use. + */ +#define CX18_MAX_OUT_WORK_ORDERS (24) + +struct cx18_out_work_order { + struct work_struct work; + atomic_t pending; + struct cx18_stream *s; + struct cx18_buffer *buf; /* buf == NULL, means load fw from q_free */ +}; + #define CX18_INVALID_TASK_HANDLE 0xffffffff struct cx18_stream { @@ -573,6 +601,10 @@ struct cx18 { struct cx18_in_work_order in_work_order[CX18_MAX_IN_WORK_ORDERS]; char epu_debug_str[256]; /* CX18_EPU_DEBUG is rare: use shared space */ + struct workqueue_struct *out_work_queue; + char out_workq_name[12]; /* "cx18-NN-out" */ + struct cx18_out_work_order out_work_order[CX18_MAX_OUT_WORK_ORDERS]; + /* i2c */ struct i2c_adapter i2c_adap[2]; struct i2c_algo_bit_data i2c_algo[2]; diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c index 0932b76b2373..bbeb01c5cf32 100644 --- a/drivers/media/video/cx18/cx18-streams.c +++ b/drivers/media/video/cx18/cx18-streams.c @@ -431,14 +431,16 @@ static void cx18_vbi_setup(struct cx18_stream *s) cx18_api(cx, CX18_CPU_SET_RAW_VBI_PARAM, 6, data); } -struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s, - struct cx18_buffer *buf) +static +struct cx18_queue *_cx18_stream_put_buf_fw(struct cx18_stream *s, + struct cx18_buffer *buf) { struct cx18 *cx = s->cx; struct cx18_queue *q; /* Don't give it to the firmware, if we're not running a capture */ if (s->handle == CX18_INVALID_TASK_HANDLE || + test_bit(CX18_F_S_STOPPING, &s->s_flags) || !test_bit(CX18_F_S_STREAMING, &s->s_flags)) return cx18_enqueue(s, buf, &s->q_free); @@ -453,7 +455,8 @@ struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s, return q; } -void cx18_stream_load_fw_queue(struct cx18_stream *s) +static +void _cx18_stream_load_fw_queue(struct cx18_stream *s) { struct cx18_queue *q; struct cx18_buffer *buf; @@ -467,11 +470,93 @@ void cx18_stream_load_fw_queue(struct cx18_stream *s) buf = cx18_dequeue(s, &s->q_free); if (buf == NULL) break; - q = cx18_stream_put_buf_fw(s, buf); + q = _cx18_stream_put_buf_fw(s, buf); } while (atomic_read(&s->q_busy.buffers) < CX18_MAX_FW_MDLS_PER_STREAM && q == &s->q_busy); } +static inline +void free_out_work_order(struct cx18_out_work_order *order) +{ + atomic_set(&order->pending, 0); +} + +void cx18_out_work_handler(struct work_struct *work) +{ + struct cx18_out_work_order *order = + container_of(work, struct cx18_out_work_order, work); + struct cx18_stream *s = order->s; + struct cx18_buffer *buf = order->buf; + + free_out_work_order(order); + + if (buf == NULL) + _cx18_stream_load_fw_queue(s); + else + _cx18_stream_put_buf_fw(s, buf); +} + +static +struct cx18_out_work_order *alloc_out_work_order(struct cx18 *cx) +{ + int i; + struct cx18_out_work_order *order = NULL; + + for (i = 0; i < CX18_MAX_OUT_WORK_ORDERS; i++) { + /* + * We need "pending" to be atomic to inspect & set its contents + * 1. "pending" is only set to 1 here, but needs multiple access + * protection + * 2. work handler threads only clear "pending" and only + * on one, particular work order at a time, per handler thread. + */ + if (atomic_add_unless(&cx->out_work_order[i].pending, 1, 1)) { + order = &cx->out_work_order[i]; + break; + } + } + return order; +} + +struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s, + struct cx18_buffer *buf) +{ + struct cx18 *cx = s->cx; + struct cx18_out_work_order *order; + + order = alloc_out_work_order(cx); + if (order == NULL) { + CX18_DEBUG_WARN("No blank, outgoing-mailbox, deferred-work, " + "order forms available; sending buffer %u back " + "to the firmware immediately for stream %s\n", + buf->id, s->name); + return _cx18_stream_put_buf_fw(s, buf); + } + order->s = s; + order->buf = buf; + queue_work(cx->out_work_queue, &order->work); + return NULL; +} + +void cx18_stream_load_fw_queue(struct cx18_stream *s) +{ + struct cx18 *cx = s->cx; + struct cx18_out_work_order *order; + + order = alloc_out_work_order(cx); + if (order == NULL) { + CX18_DEBUG_WARN("No blank, outgoing-mailbox, deferred-work, " + "order forms available; filling the firmware " + "buffer queue immediately for stream %s\n", + s->name); + _cx18_stream_load_fw_queue(s); + return; + } + order->s = s; + order->buf = NULL; /* Indicates to load the fw queue */ + queue_work(cx->out_work_queue, &order->work); +} + int cx18_start_v4l2_encode_stream(struct cx18_stream *s) { u32 data[MAX_MB_ARGUMENTS]; @@ -607,12 +692,13 @@ int cx18_start_v4l2_encode_stream(struct cx18_stream *s) cx18_writel(cx, s->buf_size, &cx->scb->cpu_mdl[buf->id].length); } mutex_unlock(&s->qlock); - cx18_stream_load_fw_queue(s); + _cx18_stream_load_fw_queue(s); /* begin_capture */ if (cx18_vapi(cx, CX18_CPU_CAPTURE_START, 1, s->handle)) { CX18_DEBUG_WARN("Error starting capture!\n"); /* Ensure we're really not capturing before releasing MDLs */ + set_bit(CX18_F_S_STOPPING, &s->s_flags); if (s->type == CX18_ENC_STREAM_TYPE_MPG) cx18_vapi(cx, CX18_CPU_CAPTURE_STOP, 2, s->handle, 1); else @@ -622,6 +708,7 @@ int cx18_start_v4l2_encode_stream(struct cx18_stream *s) cx18_vapi(cx, CX18_CPU_DE_RELEASE_MDL, 1, s->handle); cx18_vapi(cx, CX18_DESTROY_TASK, 1, s->handle); s->handle = CX18_INVALID_TASK_HANDLE; + clear_bit(CX18_F_S_STOPPING, &s->s_flags); if (atomic_read(&cx->tot_capturing) == 0) { set_bit(CX18_F_I_EOS, &cx->i_flags); cx18_write_reg(cx, 5, CX18_DSP0_INTERRUPT_MASK); @@ -666,6 +753,7 @@ int cx18_stop_v4l2_encode_stream(struct cx18_stream *s, int gop_end) if (atomic_read(&cx->tot_capturing) == 0) return 0; + set_bit(CX18_F_S_STOPPING, &s->s_flags); if (s->type == CX18_ENC_STREAM_TYPE_MPG) cx18_vapi(cx, CX18_CPU_CAPTURE_STOP, 2, s->handle, !gop_end); else @@ -689,6 +777,7 @@ int cx18_stop_v4l2_encode_stream(struct cx18_stream *s, int gop_end) cx18_vapi(cx, CX18_DESTROY_TASK, 1, s->handle); s->handle = CX18_INVALID_TASK_HANDLE; + clear_bit(CX18_F_S_STOPPING, &s->s_flags); if (atomic_read(&cx->tot_capturing) > 0) return 0; diff --git a/drivers/media/video/cx18/cx18-streams.h b/drivers/media/video/cx18/cx18-streams.h index 420e0a172945..1fdcfffb07ed 100644 --- a/drivers/media/video/cx18/cx18-streams.h +++ b/drivers/media/video/cx18/cx18-streams.h @@ -28,10 +28,13 @@ int cx18_streams_setup(struct cx18 *cx); int cx18_streams_register(struct cx18 *cx); void cx18_streams_cleanup(struct cx18 *cx, int unregister); -/* Capture related */ +/* Related to submission of buffers to firmware */ void cx18_stream_load_fw_queue(struct cx18_stream *s); struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s, struct cx18_buffer *buf); +void cx18_out_work_handler(struct work_struct *work); + +/* Capture related */ int cx18_start_v4l2_encode_stream(struct cx18_stream *s); int cx18_stop_v4l2_encode_stream(struct cx18_stream *s, int gop_end); -- 2.34.1