Skip to content

Commit 6c15d7a

Browse files
Ira Snyderozbenh
authored andcommitted
carma-fpga: fix race between data dumping and DMA callback
When the system is under heavy load, we occasionally saw a problem where the system would get a legitimate interrupt when they should be disabled. This was caused by the data_dma_cb() DMA callback unconditionally re-enabling FPGA interrupts even when data dumping is disabled. When data dumping was re-enabled, the irq handler would fire while a DMA was in progress. The "BUG_ON(priv->inflight != NULL);" during the second invocation of the DMA callback caused the system to crash. To fix the issue, the priv->enabled boolean is moved under the protection of the priv->lock spinlock. The DMA callback checks the boolean to know whether to re-enable FPGA interrupts before it returns. Now that it is fixed, the driver keeps FPGA interrupts disabled when it expects that they are disabled, fixing the bug. Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
1 parent 75ff85a commit 6c15d7a

File tree

1 file changed

+61
-40
lines changed

1 file changed

+61
-40
lines changed

drivers/misc/carma/carma-fpga.c

Lines changed: 61 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,9 @@ static void data_enable_interrupts(struct fpga_device *priv)
560560

561561
/* flush the writes */
562562
fpga_read_reg(priv, 0, MMAP_REG_STATUS);
563+
fpga_read_reg(priv, 1, MMAP_REG_STATUS);
564+
fpga_read_reg(priv, 2, MMAP_REG_STATUS);
565+
fpga_read_reg(priv, 3, MMAP_REG_STATUS);
563566

564567
/* switch back to the external interrupt source */
565568
iowrite32be(0x3F, priv->regs + SYS_IRQ_SOURCE_CTL);
@@ -591,8 +594,12 @@ static void data_dma_cb(void *data)
591594
list_move_tail(&priv->inflight->entry, &priv->used);
592595
priv->inflight = NULL;
593596

594-
/* clear the FPGA status and re-enable interrupts */
595-
data_enable_interrupts(priv);
597+
/*
598+
* If data dumping is still enabled, then clear the FPGA
599+
* status registers and re-enable FPGA interrupts
600+
*/
601+
if (priv->enabled)
602+
data_enable_interrupts(priv);
596603

597604
spin_unlock_irqrestore(&priv->lock, flags);
598605

@@ -708,6 +715,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
708715

709716
spin_lock(&priv->lock);
710717

718+
/*
719+
* This is an error case that should never happen.
720+
*
721+
* If this driver has a bug and manages to re-enable interrupts while
722+
* a DMA is in progress, then we will hit this statement and should
723+
* start paying attention immediately.
724+
*/
725+
BUG_ON(priv->inflight != NULL);
726+
711727
/* hide the interrupt by switching the IRQ driver to GPIO */
712728
data_disable_interrupts(priv);
713729

@@ -762,11 +778,15 @@ static irqreturn_t data_irq(int irq, void *dev_id)
762778
*/
763779
static int data_device_enable(struct fpga_device *priv)
764780
{
781+
bool enabled;
765782
u32 val;
766783
int ret;
767784

768785
/* multiple enables are safe: they do nothing */
769-
if (priv->enabled)
786+
spin_lock_irq(&priv->lock);
787+
enabled = priv->enabled;
788+
spin_unlock_irq(&priv->lock);
789+
if (enabled)
770790
return 0;
771791

772792
/* check that the FPGAs are programmed */
@@ -797,18 +817,23 @@ static int data_device_enable(struct fpga_device *priv)
797817
goto out_error;
798818
}
799819

820+
/* prevent the FPGAs from generating interrupts */
821+
data_disable_interrupts(priv);
822+
800823
/* hookup the irq handler */
801824
ret = request_irq(priv->irq, data_irq, IRQF_SHARED, drv_name, priv);
802825
if (ret) {
803826
dev_err(priv->dev, "unable to request IRQ handler\n");
804827
goto out_error;
805828
}
806829

807-
/* switch to the external FPGA IRQ line */
808-
data_enable_interrupts(priv);
809-
810-
/* success, we're enabled */
830+
/* allow the DMA callback to re-enable FPGA interrupts */
831+
spin_lock_irq(&priv->lock);
811832
priv->enabled = true;
833+
spin_unlock_irq(&priv->lock);
834+
835+
/* allow the FPGAs to generate interrupts */
836+
data_enable_interrupts(priv);
812837
return 0;
813838

814839
out_error:
@@ -834,41 +859,40 @@ static int data_device_enable(struct fpga_device *priv)
834859
*/
835860
static int data_device_disable(struct fpga_device *priv)
836861
{
837-
int ret;
862+
spin_lock_irq(&priv->lock);
838863

839864
/* allow multiple disable */
840-
if (!priv->enabled)
865+
if (!priv->enabled) {
866+
spin_unlock_irq(&priv->lock);
841867
return 0;
868+
}
869+
870+
/*
871+
* Mark the device disabled
872+
*
873+
* This stops DMA callbacks from re-enabling interrupts
874+
*/
875+
priv->enabled = false;
842876

843-
/* switch to the internal GPIO IRQ line */
877+
/* prevent the FPGAs from generating interrupts */
844878
data_disable_interrupts(priv);
845879

880+
/* wait until all ongoing DMA has finished */
881+
while (priv->inflight != NULL) {
882+
spin_unlock_irq(&priv->lock);
883+
wait_event(priv->wait, priv->inflight == NULL);
884+
spin_lock_irq(&priv->lock);
885+
}
886+
887+
spin_unlock_irq(&priv->lock);
888+
846889
/* unhook the irq handler */
847890
free_irq(priv->irq, priv);
848891

849-
/*
850-
* wait for all outstanding DMA to complete
851-
*
852-
* Device interrupts are disabled, therefore another buffer cannot
853-
* be marked inflight.
854-
*/
855-
ret = wait_event_interruptible(priv->wait, priv->inflight == NULL);
856-
if (ret)
857-
return ret;
858-
859892
/* free the correlation table */
860893
sg_free_table(&priv->corl_table);
861894
priv->corl_nents = 0;
862895

863-
/*
864-
* We are taking the spinlock not to protect priv->enabled, but instead
865-
* to make sure that there are no readers in the process of altering
866-
* the free or used lists while we are setting this flag.
867-
*/
868-
spin_lock_irq(&priv->lock);
869-
priv->enabled = false;
870-
spin_unlock_irq(&priv->lock);
871-
872896
/* free all buffers: the free and used lists are not being changed */
873897
data_free_buffers(priv);
874898
return 0;
@@ -896,15 +920,6 @@ static unsigned int list_num_entries(struct list_head *list)
896920
static int data_debug_show(struct seq_file *f, void *offset)
897921
{
898922
struct fpga_device *priv = f->private;
899-
int ret;
900-
901-
/*
902-
* Lock the mutex first, so that we get an accurate value for enable
903-
* Lock the spinlock next, to get accurate list counts
904-
*/
905-
ret = mutex_lock_interruptible(&priv->mutex);
906-
if (ret)
907-
return ret;
908923

909924
spin_lock_irq(&priv->lock);
910925

@@ -917,7 +932,6 @@ static int data_debug_show(struct seq_file *f, void *offset)
917932
seq_printf(f, "num_dropped: %d\n", priv->num_dropped);
918933

919934
spin_unlock_irq(&priv->lock);
920-
mutex_unlock(&priv->mutex);
921935
return 0;
922936
}
923937

@@ -970,7 +984,13 @@ static ssize_t data_en_show(struct device *dev, struct device_attribute *attr,
970984
char *buf)
971985
{
972986
struct fpga_device *priv = dev_get_drvdata(dev);
973-
return snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
987+
int ret;
988+
989+
spin_lock_irq(&priv->lock);
990+
ret = snprintf(buf, PAGE_SIZE, "%u\n", priv->enabled);
991+
spin_unlock_irq(&priv->lock);
992+
993+
return ret;
974994
}
975995

976996
static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
@@ -986,6 +1006,7 @@ static ssize_t data_en_set(struct device *dev, struct device_attribute *attr,
9861006
return -EINVAL;
9871007
}
9881008

1009+
/* protect against concurrent enable/disable */
9891010
ret = mutex_lock_interruptible(&priv->mutex);
9901011
if (ret)
9911012
return ret;

0 commit comments

Comments
 (0)