summaryrefslogtreecommitdiff
path: root/target/linux/brcm2708/patches-4.9/950-0180-drm-vc4-Fulfill-user-BO-creation-requests-from-the-k.patch
blob: c0967b4639321c7f9d3210fcfb2861818062d970 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
From cb8a5a1cf8cfd5b88fe641ada9a2dfb318d43c3f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 8 Feb 2017 15:00:54 -0800
Subject: [PATCH] drm/vc4: Fulfill user BO creation requests from the kernel BO
 cache.

The from_cache flag was actually "the BO is invisible to userspace",
so we can repurpose to just zero out a cached BO and return it to
userspace.

Improves wall time for a loop of 5 glsl-algebraic-add-add-1 by
-1.44989% +/- 0.862891% (n=28, 1 outlier removed from each that
appeared to be other system noise)

Note that there's an intel-gpu-tools test to check for the proper
zeroing behavior here, which we continue to pass.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_bo.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -208,21 +208,22 @@ struct drm_gem_object *vc4_create_object
 }
 
 struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
-			     bool from_cache)
+			     bool allow_unzeroed)
 {
 	size_t size = roundup(unaligned_size, PAGE_SIZE);
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_gem_cma_object *cma_obj;
+	struct vc4_bo *bo;
 
 	if (size == 0)
 		return ERR_PTR(-EINVAL);
 
 	/* First, try to get a vc4_bo from the kernel BO cache. */
-	if (from_cache) {
-		struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
-
-		if (bo)
-			return bo;
+	bo = vc4_bo_get_from_cache(dev, size);
+	if (bo) {
+		if (!allow_unzeroed)
+			memset(bo->base.vaddr, 0, bo->base.base.size);
+		return bo;
 	}
 
 	cma_obj = drm_gem_cma_create(dev, size);