1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
|
From 298c1744068079c5174e0c6023a89cb5ad3576c4 Mon Sep 17 00:00:00 2001
From: Luke Diamand <luked@broadcom.com>
Date: Wed, 1 Jan 2014 00:45:29 +0000
Subject: [PATCH 149/196] bcm2708_fb: use IRQ for DMA copies
The copyarea ioctl() uses DMA to speed things along. This
was busy-waiting for completion. This change supports using
an interrupt instead for larger transfers. For small
transfers, busy-waiting is still likely to be faster.
Signed-off-by: Luke Diamand <luke@diamand.org>
---
arch/arm/mach-bcm2708/dma.c | 8 ++++
arch/arm/mach-bcm2708/include/mach/dma.h | 2 +
drivers/video/bcm2708_fb.c | 64 ++++++++++++++++++++++++++++++--
3 files changed, 70 insertions(+), 4 deletions(-)
diff --git a/arch/arm/mach-bcm2708/dma.c b/arch/arm/mach-bcm2708/dma.c
index 51d147a..1da2413 100644
--- a/arch/arm/mach-bcm2708/dma.c
+++ b/arch/arm/mach-bcm2708/dma.c
@@ -83,6 +83,14 @@ extern void bcm_dma_wait_idle(void __iomem *dma_chan_base)
EXPORT_SYMBOL_GPL(bcm_dma_start);
+extern bool bcm_dma_is_busy(void __iomem *dma_chan_base)
+{
+ dsb();
+
+ return readl(dma_chan_base + BCM2708_DMA_CS) & BCM2708_DMA_ACTIVE;
+}
+EXPORT_SYMBOL_GPL(bcm_dma_is_busy);
+
/* Complete an ongoing DMA (assuming its results are to be ignored)
Does nothing if there is no DMA in progress.
This routine waits for the current AXI transfer to complete before
diff --git a/arch/arm/mach-bcm2708/include/mach/dma.h b/arch/arm/mach-bcm2708/include/mach/dma.h
index f2568d4..a4aac4c 100644
--- a/arch/arm/mach-bcm2708/include/mach/dma.h
+++ b/arch/arm/mach-bcm2708/include/mach/dma.h
@@ -64,11 +64,13 @@ struct bcm2708_dma_cb {
unsigned long next;
unsigned long pad[2];
};
+struct scatterlist;
extern int bcm_sg_suitable_for_dma(struct scatterlist *sg_ptr, int sg_len);
extern void bcm_dma_start(void __iomem *dma_chan_base,
dma_addr_t control_block);
extern void bcm_dma_wait_idle(void __iomem *dma_chan_base);
+extern bool bcm_dma_is_busy(void __iomem *dma_chan_base);
extern int /*rc*/ bcm_dma_abort(void __iomem *dma_chan_base);
/* When listing features we can ask for when allocating DMA channels give
diff --git a/drivers/video/bcm2708_fb.c b/drivers/video/bcm2708_fb.c
index 4d7d963..5758146 100644
--- a/drivers/video/bcm2708_fb.c
+++ b/drivers/video/bcm2708_fb.c
@@ -21,6 +21,7 @@
#include <linux/mm.h>
#include <linux/fb.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/list.h>
#include <linux/platform_device.h>
@@ -48,6 +49,11 @@ static const char *bcm2708_name = "BCM2708 FB";
#define DRIVER_NAME "bcm2708_fb"
+static u32 dma_busy_wait_threshold = 1<<15;
+module_param(dma_busy_wait_threshold, int, 0644);
+MODULE_PARM_DESC(dma_busy_wait_threshold, "Busy-wait for DMA completion below this area");
+
+
/* this data structure describes each frame buffer device we find */
struct fbinfo_s {
@@ -77,6 +83,7 @@ struct bcm2708_fb {
void *cb_base; /* DMA control blocks */
dma_addr_t cb_handle;
struct dentry *debugfs_dir;
+ wait_queue_head_t dma_waitq;
struct bcm2708_fb_stats stats;
};
@@ -95,6 +102,10 @@ static int bcm2708_fb_debugfs_init(struct bcm2708_fb *fb)
"dma_copies",
offsetof(struct bcm2708_fb_stats, dma_copies)
},
+ {
+ "dma_irqs",
+ offsetof(struct bcm2708_fb_stats, dma_irqs)
+ },
};
fb->debugfs_dir = debugfs_create_dir(DRIVER_NAME, NULL);
@@ -400,6 +411,7 @@ static void bcm2708_fb_copyarea(struct fb_info *info,
int bytes_per_pixel = (info->var.bits_per_pixel + 7) >> 3;
/* Channel 0 supports larger bursts and is a bit faster */
int burst_size = (fb->dma_chan == 0) ? 8 : 2;
+ int pixels = region->width * region->height;
/* Fallback to cfb_copyarea() if we don't like something */
if (bytes_per_pixel > 4 ||
@@ -492,8 +504,20 @@ static void bcm2708_fb_copyarea(struct fb_info *info,
cb->next = 0;
- bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
- bcm_dma_wait_idle(fb->dma_chan_base);
+ if (pixels < dma_busy_wait_threshold) {
+ bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
+ bcm_dma_wait_idle(fb->dma_chan_base);
+ } else {
+ void __iomem *dma_chan = fb->dma_chan_base;
+ cb->info |= BCM2708_DMA_INT_EN;
+ bcm_dma_start(fb->dma_chan_base, fb->cb_handle);
+ while (bcm_dma_is_busy(dma_chan)) {
+ wait_event_interruptible(
+ fb->dma_waitq,
+ !bcm_dma_is_busy(dma_chan));
+ }
+ fb->stats.dma_irqs++;
+ }
fb->stats.dma_copies++;
}
@@ -504,6 +528,24 @@ static void bcm2708_fb_imageblit(struct fb_info *info,
cfb_imageblit(info, image);
}
+static irqreturn_t bcm2708_fb_dma_irq(int irq, void *cxt)
+{
+ struct bcm2708_fb *fb = cxt;
+
+ /* FIXME: should read status register to check if this is
+ * actually interrupting us or not, in case this interrupt
+ * ever becomes shared amongst several DMA channels
+ *
+ * readl(dma_chan_base + BCM2708_DMA_CS) & BCM2708_DMA_IRQ;
+ */
+
+ /* acknowledge the interrupt */
+ writel(BCM2708_DMA_INT, fb->dma_chan_base + BCM2708_DMA_CS);
+
+ wake_up(&fb->dma_waitq);
+ return IRQ_HANDLED;
+}
+
static struct fb_ops bcm2708_fb_ops = {
.owner = THIS_MODULE,
.fb_check_var = bcm2708_fb_check_var,
@@ -568,6 +610,7 @@ static int bcm2708_fb_register(struct bcm2708_fb *fb)
fb->fb.monspecs.dclkmax = 100000000;
bcm2708_fb_set_bitfields(&fb->fb.var);
+ init_waitqueue_head(&fb->dma_waitq);
/*
* Allocate colourmap.
@@ -593,14 +636,15 @@ static int bcm2708_fb_probe(struct platform_device *dev)
struct bcm2708_fb *fb;
int ret;
- fb = kmalloc(sizeof(struct bcm2708_fb), GFP_KERNEL);
+ fb = kzalloc(sizeof(struct bcm2708_fb), GFP_KERNEL);
if (!fb) {
dev_err(&dev->dev,
"could not allocate new bcm2708_fb struct\n");
ret = -ENOMEM;
goto free_region;
}
- memset(fb, 0, sizeof(struct bcm2708_fb));
+
+ bcm2708_fb_debugfs_init(fb);
bcm2708_fb_debugfs_init(fb);
@@ -624,6 +668,14 @@ static int bcm2708_fb_probe(struct platform_device *dev)
}
fb->dma_chan = ret;
+ ret = request_irq(fb->dma_irq, bcm2708_fb_dma_irq,
+ 0, "bcm2708_fb dma", fb);
+ if (ret) {
+ pr_err("%s: failed to request DMA irq\n", __func__);
+ goto free_dma_chan;
+ }
+
+
pr_info("BCM2708FB: allocated DMA channel %d @ %p\n",
fb->dma_chan, fb->dma_chan_base);
@@ -635,6 +687,8 @@ static int bcm2708_fb_probe(struct platform_device *dev)
goto out;
}
+free_dma_chan:
+ bcm_dma_chan_free(fb->dma_chan);
free_cb:
dma_free_writecombine(&dev->dev, SZ_64K, fb->cb_base, fb->cb_handle);
free_fb:
@@ -662,6 +716,8 @@ static int bcm2708_fb_remove(struct platform_device *dev)
fb->dma);
bcm2708_fb_debugfs_deinit(fb);
+ free_irq(fb->dma_irq, fb);
+
kfree(fb);
return 0;
--
1.9.1
|