summaryrefslogtreecommitdiff
path: root/target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch
diff options
context:
space:
mode:
authorFelix Fietkau <nbd@openwrt.org>2016-01-17 10:42:23 +0000
committerFelix Fietkau <nbd@openwrt.org>2016-01-17 10:42:23 +0000
commit4224b52c3acc7203e7c2535d6806f30432dae5e3 (patch)
tree7a847d3e84dfc3608555587c3b9958474fc17d72 /target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch
parentcad399c87b7739c4b8eeb4119706860818f7d84f (diff)
downloadmtk-20170518-4224b52c3acc7203e7c2535d6806f30432dae5e3.zip
mtk-20170518-4224b52c3acc7203e7c2535d6806f30432dae5e3.tar.gz
mtk-20170518-4224b52c3acc7203e7c2535d6806f30432dae5e3.tar.bz2
brcm2708: add linux 4.4 support
- random-bcm2708 and spi-bcm2708 have been removed. - sound-soc-bcm2708-i2s has been upstreamed as sound-soc-bcm2835-i2s. Let's keep linux 4.1 for a while, since linux 4.4 appears to have some issues with multicast traffic on RPi ethernet: https://gist.github.com/Noltari/5b1cfdecce5ed4bc08fd Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com> SVN-Revision: 48266
Diffstat (limited to 'target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch')
-rw-r--r--target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch1894
1 files changed, 1894 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch b/target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch
new file mode 100644
index 0000000..54e6698
--- /dev/null
+++ b/target/linux/brcm2708/patches-4.4/0115-drm-vc4-Update-a-bunch-of-code-to-match-upstream-sub.patch
@@ -0,0 +1,1894 @@
+From 06dbf5f7d41615b40de35ddab611d92c2a9dd1c1 Mon Sep 17 00:00:00 2001
+From: Eric Anholt <eric@anholt.net>
+Date: Fri, 4 Dec 2015 11:35:34 -0800
+Subject: [PATCH 115/127] drm/vc4: Update a bunch of code to match upstream
+ submission.
+
+This gets almost everything matching, except for the MSAA support and
+using generic PM domains.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+---
+ drivers/gpu/drm/drm_gem_cma_helper.c | 13 +-
+ drivers/gpu/drm/vc4/vc4_bo.c | 322 +++++++++++++++++------------
+ drivers/gpu/drm/vc4/vc4_crtc.c | 7 +-
+ drivers/gpu/drm/vc4/vc4_drv.c | 6 +-
+ drivers/gpu/drm/vc4/vc4_drv.h | 20 +-
+ drivers/gpu/drm/vc4/vc4_gem.c | 24 ++-
+ drivers/gpu/drm/vc4/vc4_irq.c | 5 +-
+ drivers/gpu/drm/vc4/vc4_kms.c | 1 +
+ drivers/gpu/drm/vc4/vc4_packet.h | 210 +++++++++----------
+ drivers/gpu/drm/vc4/vc4_qpu_defines.h | 308 ++++++++++++++-------------
+ drivers/gpu/drm/vc4/vc4_render_cl.c | 4 +-
+ drivers/gpu/drm/vc4/vc4_v3d.c | 10 +-
+ drivers/gpu/drm/vc4/vc4_validate.c | 130 ++++++------
+ drivers/gpu/drm/vc4/vc4_validate_shaders.c | 66 +++---
+ include/drm/drmP.h | 8 +-
+ 15 files changed, 598 insertions(+), 536 deletions(-)
+
+--- a/drivers/gpu/drm/drm_gem_cma_helper.c
++++ b/drivers/gpu/drm/drm_gem_cma_helper.c
+@@ -58,15 +58,14 @@ __drm_gem_cma_create(struct drm_device *
+ struct drm_gem_cma_object *cma_obj;
+ struct drm_gem_object *gem_obj;
+ int ret;
+- size_t obj_size = (drm->driver->gem_obj_size ?
+- drm->driver->gem_obj_size :
+- sizeof(*cma_obj));
+
+- cma_obj = kzalloc(obj_size, GFP_KERNEL);
+- if (!cma_obj)
++ if (drm->driver->gem_create_object)
++ gem_obj = drm->driver->gem_create_object(drm, size);
++ else
++ gem_obj = kzalloc(sizeof(*cma_obj), GFP_KERNEL);
++ if (!gem_obj)
+ return ERR_PTR(-ENOMEM);
+-
+- gem_obj = &cma_obj->base;
++ cma_obj = container_of(gem_obj, struct drm_gem_cma_object, base);
+
+ ret = drm_gem_object_init(drm, gem_obj, size);
+ if (ret)
+--- a/drivers/gpu/drm/vc4/vc4_bo.c
++++ b/drivers/gpu/drm/vc4/vc4_bo.c
+@@ -12,6 +12,10 @@
+ * access to system memory with no MMU in between. To support it, we
+ * use the GEM CMA helper functions to allocate contiguous ranges of
+ * physical memory for our BOs.
++ *
++ * Since the CMA allocator is very slow, we keep a cache of recently
++ * freed BOs around so that the kernel's allocation of objects for 3D
++ * rendering can return quickly.
+ */
+
+ #include "vc4_drv.h"
+@@ -34,6 +38,36 @@ static void vc4_bo_stats_dump(struct vc4
+ vc4->bo_stats.size_cached / 1024);
+ }
+
++#ifdef CONFIG_DEBUG_FS
++int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
++{
++ struct drm_info_node *node = (struct drm_info_node *)m->private;
++ struct drm_device *dev = node->minor->dev;
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_bo_stats stats;
++
++ /* Take a snapshot of the current stats with the lock held. */
++ mutex_lock(&vc4->bo_lock);
++ stats = vc4->bo_stats;
++ mutex_unlock(&vc4->bo_lock);
++
++ seq_printf(m, "num bos allocated: %d\n",
++ stats.num_allocated);
++ seq_printf(m, "size bos allocated: %dkb\n",
++ stats.size_allocated / 1024);
++ seq_printf(m, "num bos used: %d\n",
++ stats.num_allocated - stats.num_cached);
++ seq_printf(m, "size bos used: %dkb\n",
++ (stats.size_allocated - stats.size_cached) / 1024);
++ seq_printf(m, "num bos cached: %d\n",
++ stats.num_cached);
++ seq_printf(m, "size bos cached: %dkb\n",
++ stats.size_cached / 1024);
++
++ return 0;
++}
++#endif
++
+ static uint32_t bo_page_index(size_t size)
+ {
+ return (size / PAGE_SIZE) - 1;
+@@ -81,8 +115,8 @@ static struct list_head *vc4_get_cache_l
+ struct list_head *new_list;
+ uint32_t i;
+
+- new_list = kmalloc(new_size * sizeof(struct list_head),
+- GFP_KERNEL);
++ new_list = kmalloc_array(new_size, sizeof(struct list_head),
++ GFP_KERNEL);
+ if (!new_list)
+ return NULL;
+
+@@ -90,7 +124,9 @@ static struct list_head *vc4_get_cache_l
+ * head locations.
+ */
+ for (i = 0; i < vc4->bo_cache.size_list_size; i++) {
+- struct list_head *old_list = &vc4->bo_cache.size_list[i];
++ struct list_head *old_list =
++ &vc4->bo_cache.size_list[i];
++
+ if (list_empty(old_list))
+ INIT_LIST_HEAD(&new_list[i]);
+ else
+@@ -122,11 +158,60 @@ void vc4_bo_cache_purge(struct drm_devic
+ mutex_unlock(&vc4->bo_lock);
+ }
+
+-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size)
++static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
++ uint32_t size)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+- uint32_t size = roundup(unaligned_size, PAGE_SIZE);
+ uint32_t page_index = bo_page_index(size);
++ struct vc4_bo *bo = NULL;
++
++ size = roundup(size, PAGE_SIZE);
++
++ mutex_lock(&vc4->bo_lock);
++ if (page_index >= vc4->bo_cache.size_list_size)
++ goto out;
++
++ if (list_empty(&vc4->bo_cache.size_list[page_index]))
++ goto out;
++
++ bo = list_first_entry(&vc4->bo_cache.size_list[page_index],
++ struct vc4_bo, size_head);
++ vc4_bo_remove_from_cache(bo);
++ kref_init(&bo->base.base.refcount);
++
++out:
++ mutex_unlock(&vc4->bo_lock);
++ return bo;
++}
++
++/**
++ * vc4_gem_create_object - Implementation of driver->gem_create_object.
++ *
++ * This lets the CMA helpers allocate object structs for us, and keep
++ * our BO stats correct.
++ */
++struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size)
++{
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
++ struct vc4_bo *bo;
++
++ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
++ if (!bo)
++ return ERR_PTR(-ENOMEM);
++
++ mutex_lock(&vc4->bo_lock);
++ vc4->bo_stats.num_allocated++;
++ vc4->bo_stats.size_allocated += size;
++ mutex_unlock(&vc4->bo_lock);
++
++ return &bo->base.base;
++}
++
++struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t unaligned_size,
++ bool from_cache)
++{
++ size_t size = roundup(unaligned_size, PAGE_SIZE);
++ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_gem_cma_object *cma_obj;
+ int pass;
+
+@@ -134,18 +219,12 @@ struct vc4_bo *vc4_bo_create(struct drm_
+ return NULL;
+
+ /* First, try to get a vc4_bo from the kernel BO cache. */
+- mutex_lock(&vc4->bo_lock);
+- if (page_index < vc4->bo_cache.size_list_size &&
+- !list_empty(&vc4->bo_cache.size_list[page_index])) {
+- struct vc4_bo *bo =
+- list_first_entry(&vc4->bo_cache.size_list[page_index],
+- struct vc4_bo, size_head);
+- vc4_bo_remove_from_cache(bo);
+- mutex_unlock(&vc4->bo_lock);
+- kref_init(&bo->base.base.refcount);
+- return bo;
++ if (from_cache) {
++ struct vc4_bo *bo = vc4_bo_get_from_cache(dev, size);
++
++ if (bo)
++ return bo;
+ }
+- mutex_unlock(&vc4->bo_lock);
+
+ /* Otherwise, make a new BO. */
+ for (pass = 0; ; pass++) {
+@@ -179,9 +258,6 @@ struct vc4_bo *vc4_bo_create(struct drm_
+ }
+ }
+
+- vc4->bo_stats.num_allocated++;
+- vc4->bo_stats.size_allocated += size;
+-
+ return to_vc4_bo(&cma_obj->base);
+ }
+
+@@ -199,7 +275,7 @@ int vc4_dumb_create(struct drm_file *fil
+ if (args->size < args->pitch * args->height)
+ args->size = args->pitch * args->height;
+
+- bo = vc4_bo_create(dev, args->size);
++ bo = vc4_bo_create(dev, args->size, false);
+ if (!bo)
+ return -ENOMEM;
+
+@@ -209,8 +285,8 @@ int vc4_dumb_create(struct drm_file *fil
+ return ret;
+ }
+
+-static void
+-vc4_bo_cache_free_old(struct drm_device *dev)
++/* Must be called with bo_lock held. */
++static void vc4_bo_cache_free_old(struct drm_device *dev)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long expire_time = jiffies - msecs_to_jiffies(1000);
+@@ -313,15 +389,77 @@ vc4_prime_export(struct drm_device *dev,
+ return drm_gem_prime_export(dev, obj, flags);
+ }
+
+-int
+-vc4_create_bo_ioctl(struct drm_device *dev, void *data,
+- struct drm_file *file_priv)
++int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
++{
++ struct drm_gem_object *gem_obj;
++ struct vc4_bo *bo;
++ int ret;
++
++ ret = drm_gem_mmap(filp, vma);
++ if (ret)
++ return ret;
++
++ gem_obj = vma->vm_private_data;
++ bo = to_vc4_bo(gem_obj);
++
++ if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
++ DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
++ return -EINVAL;
++ }
++
++ /*
++ * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
++ * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
++ * the whole buffer.
++ */
++ vma->vm_flags &= ~VM_PFNMAP;
++ vma->vm_pgoff = 0;
++
++ ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
++ bo->base.vaddr, bo->base.paddr,
++ vma->vm_end - vma->vm_start);
++ if (ret)
++ drm_gem_vm_close(vma);
++
++ return ret;
++}
++
++int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
++{
++ struct vc4_bo *bo = to_vc4_bo(obj);
++
++ if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
++ DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
++ return -EINVAL;
++ }
++
++ return drm_gem_cma_prime_mmap(obj, vma);
++}
++
++void *vc4_prime_vmap(struct drm_gem_object *obj)
++{
++ struct vc4_bo *bo = to_vc4_bo(obj);
++
++ if (bo->validated_shader) {
++ DRM_ERROR("mmaping of shader BOs not allowed.\n");
++ return ERR_PTR(-EINVAL);
++ }
++
++ return drm_gem_cma_prime_vmap(obj);
++}
++
++int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv)
+ {
+ struct drm_vc4_create_bo *args = data;
+ struct vc4_bo *bo = NULL;
+ int ret;
+
+- bo = vc4_bo_create(dev, args->size);
++ /*
++ * We can't allocate from the BO cache, because the BOs don't
++ * get zeroed, and that might leak data between users.
++ */
++ bo = vc4_bo_create(dev, args->size, false);
+ if (!bo)
+ return -ENOMEM;
+
+@@ -331,6 +469,25 @@ vc4_create_bo_ioctl(struct drm_device *d
+ return ret;
+ }
+
++int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv)
++{
++ struct drm_vc4_mmap_bo *args = data;
++ struct drm_gem_object *gem_obj;
++
++ gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
++ if (!gem_obj) {
++ DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
++ return -EINVAL;
++ }
++
++ /* The mmap offset was set up at BO allocation time. */
++ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
++
++ drm_gem_object_unreference_unlocked(gem_obj);
++ return 0;
++}
++
+ int
+ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+@@ -355,7 +512,7 @@ vc4_create_shader_bo_ioctl(struct drm_de
+ return -EINVAL;
+ }
+
+- bo = vc4_bo_create(dev, args->size);
++ bo = vc4_bo_create(dev, args->size, true);
+ if (!bo)
+ return -ENOMEM;
+
+@@ -364,6 +521,11 @@ vc4_create_shader_bo_ioctl(struct drm_de
+ args->size);
+ if (ret != 0)
+ goto fail;
++ /* Clear the rest of the memory from allocating from the BO
++ * cache.
++ */
++ memset(bo->base.vaddr + args->size, 0,
++ bo->base.base.size - args->size);
+
+ bo->validated_shader = vc4_validate_shader(&bo->base);
+ if (!bo->validated_shader) {
+@@ -382,85 +544,6 @@ vc4_create_shader_bo_ioctl(struct drm_de
+ return ret;
+ }
+
+-int
+-vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+- struct drm_file *file_priv)
+-{
+- struct drm_vc4_mmap_bo *args = data;
+- struct drm_gem_object *gem_obj;
+-
+- gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+- if (!gem_obj) {
+- DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
+- return -EINVAL;
+- }
+-
+- /* The mmap offset was set up at BO allocation time. */
+- args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+-
+- drm_gem_object_unreference(gem_obj);
+- return 0;
+-}
+-
+-int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
+-{
+- struct drm_gem_object *gem_obj;
+- struct vc4_bo *bo;
+- int ret;
+-
+- ret = drm_gem_mmap(filp, vma);
+- if (ret)
+- return ret;
+-
+- gem_obj = vma->vm_private_data;
+- bo = to_vc4_bo(gem_obj);
+-
+- if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) {
+- DRM_ERROR("mmaping of shader BOs for writing not allowed.\n");
+- return -EINVAL;
+- }
+-
+- /*
+- * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
+- * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
+- * the whole buffer.
+- */
+- vma->vm_flags &= ~VM_PFNMAP;
+- vma->vm_pgoff = 0;
+-
+- ret = dma_mmap_writecombine(bo->base.base.dev->dev, vma,
+- bo->base.vaddr, bo->base.paddr,
+- vma->vm_end - vma->vm_start);
+- if (ret)
+- drm_gem_vm_close(vma);
+-
+- return ret;
+-}
+-
+-int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+-{
+- struct vc4_bo *bo = to_vc4_bo(obj);
+-
+- if (bo->validated_shader) {
+- DRM_ERROR("mmaping of shader BOs not allowed.\n");
+- return -EINVAL;
+- }
+-
+- return drm_gem_cma_prime_mmap(obj, vma);
+-}
+-
+-void *vc4_prime_vmap(struct drm_gem_object *obj)
+-{
+- struct vc4_bo *bo = to_vc4_bo(obj);
+-
+- if (bo->validated_shader) {
+- DRM_ERROR("mmaping of shader BOs not allowed.\n");
+- return ERR_PTR(-EINVAL);
+- }
+-
+- return drm_gem_cma_prime_vmap(obj);
+-}
+-
+ void vc4_bo_cache_init(struct drm_device *dev)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+@@ -472,7 +555,7 @@ void vc4_bo_cache_init(struct drm_device
+ INIT_WORK(&vc4->bo_cache.time_work, vc4_bo_cache_time_work);
+ setup_timer(&vc4->bo_cache.time_timer,
+ vc4_bo_cache_time_timer,
+- (unsigned long) dev);
++ (unsigned long)dev);
+ }
+
+ void vc4_bo_cache_destroy(struct drm_device *dev)
+@@ -489,28 +572,3 @@ void vc4_bo_cache_destroy(struct drm_dev
+ vc4_bo_stats_dump(vc4);
+ }
+ }
+-
+-#ifdef CONFIG_DEBUG_FS
+-int vc4_bo_stats_debugfs(struct seq_file *m, void *unused)
+-{
+- struct drm_info_node *node = (struct drm_info_node *) m->private;
+- struct drm_device *dev = node->minor->dev;
+- struct vc4_dev *vc4 = to_vc4_dev(dev);
+- struct vc4_bo_stats stats;
+-
+- mutex_lock(&vc4->bo_lock);
+- stats = vc4->bo_stats;
+- mutex_unlock(&vc4->bo_lock);
+-
+- seq_printf(m, "num bos allocated: %d\n", stats.num_allocated);
+- seq_printf(m, "size bos allocated: %dkb\n", stats.size_allocated / 1024);
+- seq_printf(m, "num bos used: %d\n", (stats.num_allocated -
+- stats.num_cached));
+- seq_printf(m, "size bos used: %dkb\n", (stats.size_allocated -
+- stats.size_cached) / 1024);
+- seq_printf(m, "num bos cached: %d\n", stats.num_cached);
+- seq_printf(m, "size bos cached: %dkb\n", stats.size_cached / 1024);
+-
+- return 0;
+-}
+-#endif
+--- a/drivers/gpu/drm/vc4/vc4_crtc.c
++++ b/drivers/gpu/drm/vc4/vc4_crtc.c
+@@ -501,6 +501,7 @@ vc4_async_page_flip_complete(struct vc4_
+ vc4_plane_async_set_fb(plane, flip_state->fb);
+ if (flip_state->event) {
+ unsigned long flags;
++
+ spin_lock_irqsave(&dev->event_lock, flags);
+ drm_crtc_send_vblank_event(crtc, flip_state->event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+@@ -562,9 +563,9 @@ static int vc4_async_page_flip(struct dr
+ }
+
+ static int vc4_page_flip(struct drm_crtc *crtc,
+- struct drm_framebuffer *fb,
+- struct drm_pending_vblank_event *event,
+- uint32_t flags)
++ struct drm_framebuffer *fb,
++ struct drm_pending_vblank_event *event,
++ uint32_t flags)
+ {
+ if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+ return vc4_async_page_flip(crtc, fb, event, flags);
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -81,7 +81,8 @@ static const struct drm_ioctl_desc vc4_d
+ DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+- DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY),
++ DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
++ DRM_ROOT_ONLY),
+ };
+
+ static struct drm_driver vc4_drm_driver = {
+@@ -107,6 +108,7 @@ static struct drm_driver vc4_drm_driver
+ .debugfs_cleanup = vc4_debugfs_cleanup,
+ #endif
+
++ .gem_create_object = vc4_create_object,
+ .gem_free_object = vc4_free_object,
+ .gem_vm_ops = &drm_gem_cma_vm_ops,
+
+@@ -128,8 +130,6 @@ static struct drm_driver vc4_drm_driver
+ .num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
+ .fops = &vc4_drm_fops,
+
+- //.gem_obj_size = sizeof(struct vc4_bo),
+-
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -72,6 +72,9 @@ struct vc4_dev {
+ * job_done_work.
+ */
+ struct list_head job_done_list;
++ /* Spinlock used to synchronize the job_list and seqno
++ * accesses between the IRQ handler and GEM ioctls.
++ */
+ spinlock_t job_lock;
+ wait_queue_head_t job_wait_queue;
+ struct work_struct job_done_work;
+@@ -318,8 +321,7 @@ struct vc4_texture_sample_info {
+ * and validate the shader state record's uniforms that define the texture
+ * samples.
+ */
+-struct vc4_validated_shader_info
+-{
++struct vc4_validated_shader_info {
+ uint32_t uniforms_size;
+ uint32_t uniforms_src_size;
+ uint32_t num_texture_samples;
+@@ -355,8 +357,10 @@ struct vc4_validated_shader_info
+ #define wait_for(COND, MS) _wait_for(COND, MS, 1)
+
+ /* vc4_bo.c */
++struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
+ void vc4_free_object(struct drm_gem_object *gem_obj);
+-struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size);
++struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
++ bool from_cache);
+ int vc4_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+@@ -432,7 +436,8 @@ struct drm_plane *vc4_plane_init(struct
+ enum drm_plane_type type);
+ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist);
+ u32 vc4_plane_dlist_size(struct drm_plane_state *state);
+-void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb);
++void vc4_plane_async_set_fb(struct drm_plane *plane,
++ struct drm_framebuffer *fb);
+
+ /* vc4_v3d.c */
+ extern struct platform_driver vc4_v3d_driver;
+@@ -450,9 +455,6 @@ vc4_validate_bin_cl(struct drm_device *d
+ int
+ vc4_validate_shader_recs(struct drm_device *dev, struct vc4_exec_info *exec);
+
+-struct vc4_validated_shader_info *
+-vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+-
+ bool vc4_use_bo(struct vc4_exec_info *exec,
+ uint32_t hindex,
+ enum vc4_bo_mode mode,
+@@ -464,3 +466,7 @@ bool vc4_check_tex_size(struct vc4_exec_
+ struct drm_gem_cma_object *fbo,
+ uint32_t offset, uint8_t tiling_format,
+ uint32_t width, uint32_t height, uint8_t cpp);
++
++/* vc4_validate_shader.c */
++struct vc4_validated_shader_info *
++vc4_validate_shader(struct drm_gem_cma_object *shader_obj);
+--- a/drivers/gpu/drm/vc4/vc4_gem.c
++++ b/drivers/gpu/drm/vc4/vc4_gem.c
+@@ -53,9 +53,8 @@ vc4_free_hang_state(struct drm_device *d
+ unsigned int i;
+
+ mutex_lock(&dev->struct_mutex);
+- for (i = 0; i < state->user_state.bo_count; i++) {
++ for (i = 0; i < state->user_state.bo_count; i++)
+ drm_gem_object_unreference(state->bo[i]);
+- }
+ mutex_unlock(&dev->struct_mutex);
+
+ kfree(state);
+@@ -65,10 +64,10 @@ int
+ vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+ {
+- struct drm_vc4_get_hang_state *get_state = data;
++ struct drm_vc4_get_hang_state *get_state = data;
+ struct drm_vc4_get_hang_state_bo *bo_state;
+ struct vc4_hang_state *kernel_state;
+- struct drm_vc4_get_hang_state *state;
++ struct drm_vc4_get_hang_state *state;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ unsigned long irqflags;
+ u32 i;
+@@ -107,6 +106,7 @@ vc4_get_hang_state_ioctl(struct drm_devi
+ for (i = 0; i < state->bo_count; i++) {
+ struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]);
+ u32 handle;
++
+ ret = drm_gem_handle_create(file_priv, kernel_state->bo[i],
+ &handle);
+
+@@ -124,7 +124,7 @@ vc4_get_hang_state_ioctl(struct drm_devi
+ state->bo_count * sizeof(*bo_state));
+ kfree(bo_state);
+
+- err_free:
++err_free:
+
+ vc4_free_hang_state(dev, kernel_state);
+
+@@ -578,7 +578,7 @@ vc4_get_bcl(struct drm_device *dev, stru
+ goto fail;
+ }
+
+- bo = vc4_bo_create(dev, exec_size);
++ bo = vc4_bo_create(dev, exec_size, true);
+ if (!bo) {
+ DRM_ERROR("Couldn't allocate BO for binning\n");
+ ret = PTR_ERR(exec->exec_bo);
+@@ -668,6 +668,7 @@ vc4_job_handle_completed(struct vc4_dev
+ static void vc4_seqno_cb_work(struct work_struct *work)
+ {
+ struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work);
++
+ cb->func(cb);
+ }
+
+@@ -717,6 +718,7 @@ vc4_wait_for_seqno_ioctl_helper(struct d
+
+ if ((ret == -EINTR || ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
+ uint64_t delta = jiffies_to_nsecs(jiffies - start);
++
+ if (*timeout_ns >= delta)
+ *timeout_ns -= delta;
+ }
+@@ -750,9 +752,10 @@ vc4_wait_bo_ioctl(struct drm_device *dev
+ }
+ bo = to_vc4_bo(gem_obj);
+
+- ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno, &args->timeout_ns);
++ ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
++ &args->timeout_ns);
+
+- drm_gem_object_unreference(gem_obj);
++ drm_gem_object_unreference_unlocked(gem_obj);
+ return ret;
+ }
+
+@@ -793,7 +796,8 @@ vc4_submit_cl_ioctl(struct drm_device *d
+ if (ret)
+ goto fail;
+ } else {
+- exec->ct0ca = exec->ct0ea = 0;
++ exec->ct0ca = 0;
++ exec->ct0ea = 0;
+ }
+
+ ret = vc4_get_rcl(dev, exec);
+@@ -831,7 +835,7 @@ vc4_gem_init(struct drm_device *dev)
+ INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
+ setup_timer(&vc4->hangcheck.timer,
+ vc4_hangcheck_elapsed,
+- (unsigned long) dev);
++ (unsigned long)dev);
+
+ INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
+ }
+--- a/drivers/gpu/drm/vc4/vc4_irq.c
++++ b/drivers/gpu/drm/vc4/vc4_irq.c
+@@ -56,7 +56,7 @@ vc4_overflow_mem_work(struct work_struct
+ struct drm_device *dev = vc4->dev;
+ struct vc4_bo *bo;
+
+- bo = vc4_bo_create(dev, 256 * 1024);
++ bo = vc4_bo_create(dev, 256 * 1024, true);
+ if (!bo) {
+ DRM_ERROR("Couldn't allocate binner overflow mem\n");
+ return;
+@@ -87,9 +87,8 @@ vc4_overflow_mem_work(struct work_struct
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
+ }
+
+- if (vc4->overflow_mem) {
++ if (vc4->overflow_mem)
+ drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
+- }
+ vc4->overflow_mem = bo;
+
+ V3D_WRITE(V3D_BPOA, bo->base.paddr);
+--- a/drivers/gpu/drm/vc4/vc4_kms.c
++++ b/drivers/gpu/drm/vc4/vc4_kms.c
+@@ -132,6 +132,7 @@ static int vc4_atomic_commit(struct drm_
+ struct drm_gem_cma_object *cma_bo =
+ drm_fb_cma_get_gem_obj(new_state->fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
++
+ wait_seqno = max(bo->seqno, wait_seqno);
+ }
+ }
+--- a/drivers/gpu/drm/vc4/vc4_packet.h
++++ b/drivers/gpu/drm/vc4/vc4_packet.h
+@@ -27,60 +27,60 @@
+ #include "vc4_regs.h" /* for VC4_MASK, VC4_GET_FIELD, VC4_SET_FIELD */
+
+ enum vc4_packet {
+- VC4_PACKET_HALT = 0,
+- VC4_PACKET_NOP = 1,
++ VC4_PACKET_HALT = 0,
++ VC4_PACKET_NOP = 1,
+
+- VC4_PACKET_FLUSH = 4,
+- VC4_PACKET_FLUSH_ALL = 5,
+- VC4_PACKET_START_TILE_BINNING = 6,
+- VC4_PACKET_INCREMENT_SEMAPHORE = 7,
+- VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
+-
+- VC4_PACKET_BRANCH = 16,
+- VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
+-
+- VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
+- VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
+- VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
+- VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
+- VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
+- VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
+-
+- VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
+- VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
+-
+- VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
+- VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
+-
+- VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
+-
+- VC4_PACKET_GL_SHADER_STATE = 64,
+- VC4_PACKET_NV_SHADER_STATE = 65,
+- VC4_PACKET_VG_SHADER_STATE = 66,
+-
+- VC4_PACKET_CONFIGURATION_BITS = 96,
+- VC4_PACKET_FLAT_SHADE_FLAGS = 97,
+- VC4_PACKET_POINT_SIZE = 98,
+- VC4_PACKET_LINE_WIDTH = 99,
+- VC4_PACKET_RHT_X_BOUNDARY = 100,
+- VC4_PACKET_DEPTH_OFFSET = 101,
+- VC4_PACKET_CLIP_WINDOW = 102,
+- VC4_PACKET_VIEWPORT_OFFSET = 103,
+- VC4_PACKET_Z_CLIPPING = 104,
+- VC4_PACKET_CLIPPER_XY_SCALING = 105,
+- VC4_PACKET_CLIPPER_Z_SCALING = 106,
+-
+- VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
+- VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
+- VC4_PACKET_CLEAR_COLORS = 114,
+- VC4_PACKET_TILE_COORDINATES = 115,
+-
+- /* Not an actual hardware packet -- this is what we use to put
+- * references to GEM bos in the command stream, since we need the u32
+- * int the actual address packet in order to store the offset from the
+- * start of the BO.
+- */
+- VC4_PACKET_GEM_HANDLES = 254,
++ VC4_PACKET_FLUSH = 4,
++ VC4_PACKET_FLUSH_ALL = 5,
++ VC4_PACKET_START_TILE_BINNING = 6,
++ VC4_PACKET_INCREMENT_SEMAPHORE = 7,
++ VC4_PACKET_WAIT_ON_SEMAPHORE = 8,
++
++ VC4_PACKET_BRANCH = 16,
++ VC4_PACKET_BRANCH_TO_SUB_LIST = 17,
++
++ VC4_PACKET_STORE_MS_TILE_BUFFER = 24,
++ VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25,
++ VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26,
++ VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27,
++ VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28,
++ VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29,
++
++ VC4_PACKET_GL_INDEXED_PRIMITIVE = 32,
++ VC4_PACKET_GL_ARRAY_PRIMITIVE = 33,
++
++ VC4_PACKET_COMPRESSED_PRIMITIVE = 48,
++ VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49,
++
++ VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56,
++
++ VC4_PACKET_GL_SHADER_STATE = 64,
++ VC4_PACKET_NV_SHADER_STATE = 65,
++ VC4_PACKET_VG_SHADER_STATE = 66,
++
++ VC4_PACKET_CONFIGURATION_BITS = 96,
++ VC4_PACKET_FLAT_SHADE_FLAGS = 97,
++ VC4_PACKET_POINT_SIZE = 98,
++ VC4_PACKET_LINE_WIDTH = 99,
++ VC4_PACKET_RHT_X_BOUNDARY = 100,
++ VC4_PACKET_DEPTH_OFFSET = 101,
++ VC4_PACKET_CLIP_WINDOW = 102,
++ VC4_PACKET_VIEWPORT_OFFSET = 103,
++ VC4_PACKET_Z_CLIPPING = 104,
++ VC4_PACKET_CLIPPER_XY_SCALING = 105,
++ VC4_PACKET_CLIPPER_Z_SCALING = 106,
++
++ VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112,
++ VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113,
++ VC4_PACKET_CLEAR_COLORS = 114,
++ VC4_PACKET_TILE_COORDINATES = 115,
++
++ /* Not an actual hardware packet -- this is what we use to put
++ * references to GEM bos in the command stream, since we need the u32
++ * int the actual address packet in order to store the offset from the
++ * start of the BO.
++ */
++ VC4_PACKET_GEM_HANDLES = 254,
+ } __attribute__ ((__packed__));
+
+ #define VC4_PACKET_HALT_SIZE 1
+@@ -148,10 +148,10 @@ enum vc4_packet {
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address)
+ */
+
+-#define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3)
+-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2)
+-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1)
+-#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0)
++#define VC4_LOADSTORE_TILE_BUFFER_EOF BIT(3)
++#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK BIT(2)
++#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS BIT(1)
++#define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR BIT(0)
+
+ /** @} */
+
+@@ -160,10 +160,10 @@ enum vc4_packet {
+ * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
+ * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL
+ */
+-#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15)
+-#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14)
+-#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13)
+-#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12)
++#define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR BIT(15)
++#define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR BIT(14)
++#define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR BIT(13)
++#define VC4_STORE_TILE_BUFFER_DISABLE_SWAP BIT(12)
+
+ #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8)
+ #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8
+@@ -201,28 +201,28 @@ enum vc4_packet {
+ #define VC4_INDEX_BUFFER_U16 (1 << 4)
+
+ /* This flag is only present in NV shader state. */
+-#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3)
+-#define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2)
+-#define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1)
+-#define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0)
++#define VC4_SHADER_FLAG_SHADED_CLIP_COORDS BIT(3)
++#define VC4_SHADER_FLAG_ENABLE_CLIPPING BIT(2)
++#define VC4_SHADER_FLAG_VS_POINT_SIZE BIT(1)
++#define VC4_SHADER_FLAG_FS_SINGLE_THREAD BIT(0)
+
+ /** @{ byte 2 of config bits. */
+-#define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1)
+-#define VC4_CONFIG_BITS_EARLY_Z (1 << 0)
++#define VC4_CONFIG_BITS_EARLY_Z_UPDATE BIT(1)
++#define VC4_CONFIG_BITS_EARLY_Z BIT(0)
+ /** @} */
+
+ /** @{ byte 1 of config bits. */
+-#define VC4_CONFIG_BITS_Z_UPDATE (1 << 7)
++#define VC4_CONFIG_BITS_Z_UPDATE BIT(7)
+ /** same values in this 3-bit field as PIPE_FUNC_* */
+ #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4
+-#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3)
++#define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE BIT(3)
+
+ #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1)
+ #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1)
+ #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1)
+ #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1)
+
+-#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0)
++#define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT BIT(0)
+ /** @} */
+
+ /** @{ byte 0 of config bits. */
+@@ -230,15 +230,15 @@ enum vc4_packet {
+ #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6)
+ #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6)
+
+-#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4)
+-#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3)
+-#define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2)
+-#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1)
+-#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0)
++#define VC4_CONFIG_BITS_AA_POINTS_AND_LINES BIT(4)
++#define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET BIT(3)
++#define VC4_CONFIG_BITS_CW_PRIMITIVES BIT(2)
++#define VC4_CONFIG_BITS_ENABLE_PRIM_BACK BIT(1)
++#define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT BIT(0)
+ /** @} */
+
+ /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */
+-#define VC4_BIN_CONFIG_DB_NON_MS (1 << 7)
++#define VC4_BIN_CONFIG_DB_NON_MS BIT(7)
+
+ #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5)
+ #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5
+@@ -254,17 +254,17 @@ enum vc4_packet {
+ #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2
+ #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3
+
+-#define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2)
+-#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1)
+-#define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0)
++#define VC4_BIN_CONFIG_AUTO_INIT_TSDA BIT(2)
++#define VC4_BIN_CONFIG_TILE_BUFFER_64BIT BIT(1)
++#define VC4_BIN_CONFIG_MS_MODE_4X BIT(0)
+ /** @} */
+
+ /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */
+-#define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12)
+-#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11)
+-#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10)
+-#define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9)
+-#define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8)
++#define VC4_RENDER_CONFIG_DB_NON_MS BIT(12)
++#define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE BIT(11)
++#define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G BIT(10)
++#define VC4_RENDER_CONFIG_COVERAGE_MODE BIT(9)
++#define VC4_RENDER_CONFIG_ENABLE_VG_MASK BIT(8)
+
+ /** The values of the field are VC4_TILING_FORMAT_* */
+ #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6)
+@@ -280,8 +280,8 @@ enum vc4_packet {
+ #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1
+ #define VC4_RENDER_CONFIG_FORMAT_BGR565 2
+
+-#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1)
+-#define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0)
++#define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT BIT(1)
++#define VC4_RENDER_CONFIG_MS_MODE_4X BIT(0)
+
+ #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4)
+ #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4)
+@@ -291,24 +291,24 @@ enum vc4_packet {
+ #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0)
+
+ enum vc4_texture_data_type {
+- VC4_TEXTURE_TYPE_RGBA8888 = 0,
+- VC4_TEXTURE_TYPE_RGBX8888 = 1,
+- VC4_TEXTURE_TYPE_RGBA4444 = 2,
+- VC4_TEXTURE_TYPE_RGBA5551 = 3,
+- VC4_TEXTURE_TYPE_RGB565 = 4,
+- VC4_TEXTURE_TYPE_LUMINANCE = 5,
+- VC4_TEXTURE_TYPE_ALPHA = 6,
+- VC4_TEXTURE_TYPE_LUMALPHA = 7,
+- VC4_TEXTURE_TYPE_ETC1 = 8,
+- VC4_TEXTURE_TYPE_S16F = 9,
+- VC4_TEXTURE_TYPE_S8 = 10,
+- VC4_TEXTURE_TYPE_S16 = 11,
+- VC4_TEXTURE_TYPE_BW1 = 12,
+- VC4_TEXTURE_TYPE_A4 = 13,
+- VC4_TEXTURE_TYPE_A1 = 14,
+- VC4_TEXTURE_TYPE_RGBA64 = 15,
+- VC4_TEXTURE_TYPE_RGBA32R = 16,
+- VC4_TEXTURE_TYPE_YUV422R = 17,
++ VC4_TEXTURE_TYPE_RGBA8888 = 0,
++ VC4_TEXTURE_TYPE_RGBX8888 = 1,
++ VC4_TEXTURE_TYPE_RGBA4444 = 2,
++ VC4_TEXTURE_TYPE_RGBA5551 = 3,
++ VC4_TEXTURE_TYPE_RGB565 = 4,
++ VC4_TEXTURE_TYPE_LUMINANCE = 5,
++ VC4_TEXTURE_TYPE_ALPHA = 6,
++ VC4_TEXTURE_TYPE_LUMALPHA = 7,
++ VC4_TEXTURE_TYPE_ETC1 = 8,
++ VC4_TEXTURE_TYPE_S16F = 9,
++ VC4_TEXTURE_TYPE_S8 = 10,
++ VC4_TEXTURE_TYPE_S16 = 11,
++ VC4_TEXTURE_TYPE_BW1 = 12,
++ VC4_TEXTURE_TYPE_A4 = 13,
++ VC4_TEXTURE_TYPE_A1 = 14,
++ VC4_TEXTURE_TYPE_RGBA64 = 15,
++ VC4_TEXTURE_TYPE_RGBA32R = 16,
++ VC4_TEXTURE_TYPE_YUV422R = 17,
+ };
+
+ #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12)
+--- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
++++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
+@@ -25,194 +25,190 @@
+ #define VC4_QPU_DEFINES_H
+
+ enum qpu_op_add {
+- QPU_A_NOP,
+- QPU_A_FADD,
+- QPU_A_FSUB,
+- QPU_A_FMIN,
+- QPU_A_FMAX,
+- QPU_A_FMINABS,
+- QPU_A_FMAXABS,
+- QPU_A_FTOI,
+- QPU_A_ITOF,
+- QPU_A_ADD = 12,
+- QPU_A_SUB,
+- QPU_A_SHR,
+- QPU_A_ASR,
+- QPU_A_ROR,
+- QPU_A_SHL,
+- QPU_A_MIN,
+- QPU_A_MAX,
+- QPU_A_AND,
+- QPU_A_OR,
+- QPU_A_XOR,
+- QPU_A_NOT,
+- QPU_A_CLZ,
+- QPU_A_V8ADDS = 30,
+- QPU_A_V8SUBS = 31,
++ QPU_A_NOP,
++ QPU_A_FADD,
++ QPU_A_FSUB,
++ QPU_A_FMIN,
++ QPU_A_FMAX,
++ QPU_A_FMINABS,
++ QPU_A_FMAXABS,
++ QPU_A_FTOI,
++ QPU_A_ITOF,
++ QPU_A_ADD = 12,
++ QPU_A_SUB,
++ QPU_A_SHR,
++ QPU_A_ASR,
++ QPU_A_ROR,
++ QPU_A_SHL,
++ QPU_A_MIN,
++ QPU_A_MAX,
++ QPU_A_AND,
++ QPU_A_OR,
++ QPU_A_XOR,
++ QPU_A_NOT,
++ QPU_A_CLZ,
++ QPU_A_V8ADDS = 30,
++ QPU_A_V8SUBS = 31,
+ };
+
+ enum qpu_op_mul {
+- QPU_M_NOP,
+- QPU_M_FMUL,
+- QPU_M_MUL24,
+- QPU_M_V8MULD,
+- QPU_M_V8MIN,
+- QPU_M_V8MAX,
+- QPU_M_V8ADDS,
+- QPU_M_V8SUBS,
++ QPU_M_NOP,
++ QPU_M_FMUL,
++ QPU_M_MUL24,
++ QPU_M_V8MULD,
++ QPU_M_V8MIN,
++ QPU_M_V8MAX,
++ QPU_M_V8ADDS,
++ QPU_M_V8SUBS,
+ };
+
+ enum qpu_raddr {
+- QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
+- /* 0-31 are the plain regfile a or b fields */
+- QPU_R_UNIF = 32,
+- QPU_R_VARY = 35,
+- QPU_R_ELEM_QPU = 38,
+- QPU_R_NOP,
+- QPU_R_XY_PIXEL_COORD = 41,
+- QPU_R_MS_REV_FLAGS = 41,
+- QPU_R_VPM = 48,
+- QPU_R_VPM_LD_BUSY,
+- QPU_R_VPM_LD_WAIT,
+- QPU_R_MUTEX_ACQUIRE,
++ QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */
++ /* 0-31 are the plain regfile a or b fields */
++ QPU_R_UNIF = 32,
++ QPU_R_VARY = 35,
++ QPU_R_ELEM_QPU = 38,
++ QPU_R_NOP,
++ QPU_R_XY_PIXEL_COORD = 41,
++ QPU_R_MS_REV_FLAGS = 41,
++ QPU_R_VPM = 48,
++ QPU_R_VPM_LD_BUSY,
++ QPU_R_VPM_LD_WAIT,
++ QPU_R_MUTEX_ACQUIRE,
+ };
+
+ enum qpu_waddr {
+- /* 0-31 are the plain regfile a or b fields */
+- QPU_W_ACC0 = 32, /* aka r0 */
+- QPU_W_ACC1,
+- QPU_W_ACC2,
+- QPU_W_ACC3,
+- QPU_W_TMU_NOSWAP,
+- QPU_W_ACC5,
+- QPU_W_HOST_INT,
+- QPU_W_NOP,
+- QPU_W_UNIFORMS_ADDRESS,
+- QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
+- QPU_W_MS_FLAGS = 42,
+- QPU_W_REV_FLAG = 42,
+- QPU_W_TLB_STENCIL_SETUP = 43,
+- QPU_W_TLB_Z,
+- QPU_W_TLB_COLOR_MS,
+- QPU_W_TLB_COLOR_ALL,
+- QPU_W_TLB_ALPHA_MASK,
+- QPU_W_VPM,
+- QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
+- QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
+- QPU_W_MUTEX_RELEASE,
+- QPU_W_SFU_RECIP,
+- QPU_W_SFU_RECIPSQRT,
+- QPU_W_SFU_EXP,
+- QPU_W_SFU_LOG,
+- QPU_W_TMU0_S,
+- QPU_W_TMU0_T,
+- QPU_W_TMU0_R,
+- QPU_W_TMU0_B,
+- QPU_W_TMU1_S,
+- QPU_W_TMU1_T,
+- QPU_W_TMU1_R,
+- QPU_W_TMU1_B,
++ /* 0-31 are the plain regfile a or b fields */
++ QPU_W_ACC0 = 32, /* aka r0 */
++ QPU_W_ACC1,
++ QPU_W_ACC2,
++ QPU_W_ACC3,
++ QPU_W_TMU_NOSWAP,
++ QPU_W_ACC5,
++ QPU_W_HOST_INT,
++ QPU_W_NOP,
++ QPU_W_UNIFORMS_ADDRESS,
++ QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */
++ QPU_W_MS_FLAGS = 42,
++ QPU_W_REV_FLAG = 42,
++ QPU_W_TLB_STENCIL_SETUP = 43,
++ QPU_W_TLB_Z,
++ QPU_W_TLB_COLOR_MS,
++ QPU_W_TLB_COLOR_ALL,
++ QPU_W_TLB_ALPHA_MASK,
++ QPU_W_VPM,
++ QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */
++ QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */
++ QPU_W_MUTEX_RELEASE,
++ QPU_W_SFU_RECIP,
++ QPU_W_SFU_RECIPSQRT,
++ QPU_W_SFU_EXP,
++ QPU_W_SFU_LOG,
++ QPU_W_TMU0_S,
++ QPU_W_TMU0_T,
++ QPU_W_TMU0_R,
++ QPU_W_TMU0_B,
++ QPU_W_TMU1_S,
++ QPU_W_TMU1_T,
++ QPU_W_TMU1_R,
++ QPU_W_TMU1_B,
+ };
+
+ enum qpu_sig_bits {
+- QPU_SIG_SW_BREAKPOINT,
+- QPU_SIG_NONE,
+- QPU_SIG_THREAD_SWITCH,
+- QPU_SIG_PROG_END,
+- QPU_SIG_WAIT_FOR_SCOREBOARD,
+- QPU_SIG_SCOREBOARD_UNLOCK,
+- QPU_SIG_LAST_THREAD_SWITCH,
+- QPU_SIG_COVERAGE_LOAD,
+- QPU_SIG_COLOR_LOAD,
+- QPU_SIG_COLOR_LOAD_END,
+- QPU_SIG_LOAD_TMU0,
+- QPU_SIG_LOAD_TMU1,
+- QPU_SIG_ALPHA_MASK_LOAD,
+- QPU_SIG_SMALL_IMM,
+- QPU_SIG_LOAD_IMM,
+- QPU_SIG_BRANCH
++ QPU_SIG_SW_BREAKPOINT,
++ QPU_SIG_NONE,
++ QPU_SIG_THREAD_SWITCH,
++ QPU_SIG_PROG_END,
++ QPU_SIG_WAIT_FOR_SCOREBOARD,
++ QPU_SIG_SCOREBOARD_UNLOCK,
++ QPU_SIG_LAST_THREAD_SWITCH,
++ QPU_SIG_COVERAGE_LOAD,
++ QPU_SIG_COLOR_LOAD,
++ QPU_SIG_COLOR_LOAD_END,
++ QPU_SIG_LOAD_TMU0,
++ QPU_SIG_LOAD_TMU1,
++ QPU_SIG_ALPHA_MASK_LOAD,
++ QPU_SIG_SMALL_IMM,
++ QPU_SIG_LOAD_IMM,
++ QPU_SIG_BRANCH
+ };
+
+ enum qpu_mux {
+- /* hardware mux values */
+- QPU_MUX_R0,
+- QPU_MUX_R1,
+- QPU_MUX_R2,
+- QPU_MUX_R3,
+- QPU_MUX_R4,
+- QPU_MUX_R5,
+- QPU_MUX_A,
+- QPU_MUX_B,
++ /* hardware mux values */
++ QPU_MUX_R0,
++ QPU_MUX_R1,
++ QPU_MUX_R2,
++ QPU_MUX_R3,
++ QPU_MUX_R4,
++ QPU_MUX_R5,
++ QPU_MUX_A,
++ QPU_MUX_B,
+
+- /* non-hardware mux values */
+- QPU_MUX_IMM,
++ /* non-hardware mux values */
++ QPU_MUX_IMM,
+ };
+
+ enum qpu_cond {
+- QPU_COND_NEVER,
+- QPU_COND_ALWAYS,
+- QPU_COND_ZS,
+- QPU_COND_ZC,
+- QPU_COND_NS,
+- QPU_COND_NC,
+- QPU_COND_CS,
+- QPU_COND_CC,
++ QPU_COND_NEVER,
++ QPU_COND_ALWAYS,
++ QPU_COND_ZS,
++ QPU_COND_ZC,
++ QPU_COND_NS,
++ QPU_COND_NC,
++ QPU_COND_CS,
++ QPU_COND_CC,
+ };
+
+ enum qpu_pack_mul {
+- QPU_PACK_MUL_NOP,
+- QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */
+- QPU_PACK_MUL_8A,
+- QPU_PACK_MUL_8B,
+- QPU_PACK_MUL_8C,
+- QPU_PACK_MUL_8D,
++ QPU_PACK_MUL_NOP,
++ /* replicated to each 8 bits of the 32-bit dst. */
++ QPU_PACK_MUL_8888 = 3,
++ QPU_PACK_MUL_8A,
++ QPU_PACK_MUL_8B,
++ QPU_PACK_MUL_8C,
++ QPU_PACK_MUL_8D,
+ };
+
+ enum qpu_pack_a {
+- QPU_PACK_A_NOP,
+- /* convert to 16 bit float if float input, or to int16. */
+- QPU_PACK_A_16A,
+- QPU_PACK_A_16B,
+- /* replicated to each 8 bits of the 32-bit dst. */
+- QPU_PACK_A_8888,
+- /* Convert to 8-bit unsigned int. */
+- QPU_PACK_A_8A,
+- QPU_PACK_A_8B,
+- QPU_PACK_A_8C,
+- QPU_PACK_A_8D,
+-
+- /* Saturating variants of the previous instructions. */
+- QPU_PACK_A_32_SAT, /* int-only */
+- QPU_PACK_A_16A_SAT, /* int or float */
+- QPU_PACK_A_16B_SAT,
+- QPU_PACK_A_8888_SAT,
+- QPU_PACK_A_8A_SAT,
+- QPU_PACK_A_8B_SAT,
+- QPU_PACK_A_8C_SAT,
+- QPU_PACK_A_8D_SAT,
++ QPU_PACK_A_NOP,
++ /* convert to 16 bit float if float input, or to int16. */
++ QPU_PACK_A_16A,
++ QPU_PACK_A_16B,
++ /* replicated to each 8 bits of the 32-bit dst. */
++ QPU_PACK_A_8888,
++ /* Convert to 8-bit unsigned int. */
++ QPU_PACK_A_8A,
++ QPU_PACK_A_8B,
++ QPU_PACK_A_8C,
++ QPU_PACK_A_8D,
++
++ /* Saturating variants of the previous instructions. */
++ QPU_PACK_A_32_SAT, /* int-only */
++ QPU_PACK_A_16A_SAT, /* int or float */
++ QPU_PACK_A_16B_SAT,
++ QPU_PACK_A_8888_SAT,
++ QPU_PACK_A_8A_SAT,
++ QPU_PACK_A_8B_SAT,
++ QPU_PACK_A_8C_SAT,
++ QPU_PACK_A_8D_SAT,
+ };
+
+ enum qpu_unpack_r4 {
+- QPU_UNPACK_R4_NOP,
+- QPU_UNPACK_R4_F16A_TO_F32,
+- QPU_UNPACK_R4_F16B_TO_F32,
+- QPU_UNPACK_R4_8D_REP,
+- QPU_UNPACK_R4_8A,
+- QPU_UNPACK_R4_8B,
+- QPU_UNPACK_R4_8C,
+- QPU_UNPACK_R4_8D,
+-};
+-
+-#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
+-/* Using the GNU statement expression extension */
+-#define QPU_SET_FIELD(value, field) \
+- ({ \
+- uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
+- assert((fieldval & ~ field ## _MASK) == 0); \
+- fieldval & field ## _MASK; \
+- })
++ QPU_UNPACK_R4_NOP,
++ QPU_UNPACK_R4_F16A_TO_F32,
++ QPU_UNPACK_R4_F16B_TO_F32,
++ QPU_UNPACK_R4_8D_REP,
++ QPU_UNPACK_R4_8A,
++ QPU_UNPACK_R4_8B,
++ QPU_UNPACK_R4_8C,
++ QPU_UNPACK_R4_8D,
++};
++
++#define QPU_MASK(high, low) \
++ ((((uint64_t)1 << ((high) - (low) + 1)) - 1) << (low))
+
+-#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
++#define QPU_GET_FIELD(word, field) \
++ ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
+
+ #define QPU_SIG_SHIFT 60
+ #define QPU_SIG_MASK QPU_MASK(63, 60)
+--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
++++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
+@@ -63,7 +63,6 @@ static inline void rcl_u32(struct vc4_rc
+ setup->next_offset += 4;
+ }
+
+-
+ /*
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+@@ -217,7 +216,7 @@ static int vc4_create_rcl_bo(struct drm_
+ }
+ size += xtiles * ytiles * loop_body_size;
+
+- setup->rcl = &vc4_bo_create(dev, size)->base;
++ setup->rcl = &vc4_bo_create(dev, size, true)->base;
+ if (!setup->rcl)
+ return -ENOMEM;
+ list_add_tail(&to_vc4_bo(&setup->rcl->base)->unref_head,
+@@ -256,6 +255,7 @@ static int vc4_create_rcl_bo(struct drm_
+ for (x = min_x_tile; x <= max_x_tile; x++) {
+ bool first = (x == min_x_tile && y == min_y_tile);
+ bool last = (x == max_x_tile && y == max_y_tile);
++
+ emit_tile(exec, setup, x, y, first, last);
+ }
+ }
+--- a/drivers/gpu/drm/vc4/vc4_v3d.c
++++ b/drivers/gpu/drm/vc4/vc4_v3d.c
+@@ -125,7 +125,7 @@ int vc4_v3d_debugfs_regs(struct seq_file
+
+ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
+ {
+- struct drm_info_node *node = (struct drm_info_node *) m->private;
++ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ uint32_t ident1 = V3D_READ(V3D_IDENT1);
+@@ -133,11 +133,13 @@ int vc4_v3d_debugfs_ident(struct seq_fil
+ uint32_t tups = VC4_GET_FIELD(ident1, V3D_IDENT1_TUPS);
+ uint32_t qups = VC4_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+- seq_printf(m, "Revision: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
++ seq_printf(m, "Revision: %d\n",
++ VC4_GET_FIELD(ident1, V3D_IDENT1_REV));
+ seq_printf(m, "Slices: %d\n", nslc);
+ seq_printf(m, "TMUs: %d\n", nslc * tups);
+ seq_printf(m, "QPUs: %d\n", nslc * qups);
+- seq_printf(m, "Semaphores: %d\n", VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
++ seq_printf(m, "Semaphores: %d\n",
++ VC4_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+
+ return 0;
+ }
+@@ -218,7 +220,7 @@ static int vc4_v3d_bind(struct device *d
+ }
+
+ static void vc4_v3d_unbind(struct device *dev, struct device *master,
+- void *data)
++ void *data)
+ {
+ struct drm_device *drm = dev_get_drvdata(master);
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
+--- a/drivers/gpu/drm/vc4/vc4_validate.c
++++ b/drivers/gpu/drm/vc4/vc4_validate.c
+@@ -48,7 +48,6 @@
+ void *validated, \
+ void *untrusted
+
+-
+ /** Return the width in pixels of a 64-byte microtile. */
+ static uint32_t
+ utile_width(int cpp)
+@@ -192,7 +191,7 @@ vc4_check_tex_size(struct vc4_exec_info
+
+ if (size + offset < size ||
+ size + offset > fbo->base.size) {
+- DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
++ DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
+ width, height,
+ aligned_width, aligned_height,
+ size, offset, fbo->base.size);
+@@ -278,7 +277,7 @@ validate_indexed_prim_list(VALIDATE_ARGS
+
+ if (offset > ib->base.size ||
+ (ib->base.size - offset) / index_size < length) {
+- DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
++ DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
+ offset, length, index_size, ib->base.size);
+ return -EINVAL;
+ }
+@@ -377,6 +376,7 @@ static int
+ validate_tile_binning_config(VALIDATE_ARGS)
+ {
+ struct drm_device *dev = exec->exec_bo->base.dev;
++ struct vc4_bo *tile_bo;
+ uint8_t flags;
+ uint32_t tile_state_size, tile_alloc_size;
+ uint32_t tile_count;
+@@ -438,12 +438,12 @@ validate_tile_binning_config(VALIDATE_AR
+ */
+ tile_alloc_size += 1024 * 1024;
+
+- exec->tile_bo = &vc4_bo_create(dev, exec->tile_alloc_offset +
+- tile_alloc_size)->base;
++ tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
++ true);
++ exec->tile_bo = &tile_bo->base;
+ if (!exec->tile_bo)
+ return -ENOMEM;
+- list_add_tail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,
+- &exec->unref_list);
++ list_add_tail(&tile_bo->unref_head, &exec->unref_list);
+
+ /* tile alloc address. */
+ *(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
+@@ -463,8 +463,8 @@ validate_gem_handles(VALIDATE_ARGS)
+ return 0;
+ }
+
+-#define VC4_DEFINE_PACKET(packet, name, func) \
+- [packet] = { packet ## _SIZE, name, func }
++#define VC4_DEFINE_PACKET(packet, func) \
++ [packet] = { packet ## _SIZE, #packet, func }
+
+ static const struct cmd_info {
+ uint16_t len;
+@@ -472,42 +472,43 @@ static const struct cmd_info {
+ int (*func)(struct vc4_exec_info *exec, void *validated,
+ void *untrusted);
+ } cmd_info[] = {
+- VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", validate_flush_all),
+- VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
+- VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
+-
+- VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
+-
+- VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
+-
+- /* This is only used by clipped primitives (packets 48 and 49), which
+- * we don't support parsing yet.
+- */
+- VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
+-
+- VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
+- VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, "NV Shader State", validate_nv_shader_state),
+-
+- VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
+- VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, validate_flush_all),
++ VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
++ validate_start_tile_binning),
++ VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
++ validate_increment_semaphore),
++
++ VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
++ validate_indexed_prim_list),
++ VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
++ validate_gl_array_primitive),
++
++ VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
++
++ VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
++ VC4_DEFINE_PACKET(VC4_PACKET_NV_SHADER_STATE, validate_nv_shader_state),
++
++ VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
+ /* Note: The docs say this was also 105, but it was 106 in the
+ * initial userland code drop.
+ */
+- VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
++ VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
+
+- VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
++ VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
++ validate_tile_binning_config),
+
+- VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
++ VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
+ };
+
+ int
+@@ -526,7 +527,7 @@ vc4_validate_bin_cl(struct drm_device *d
+ u8 cmd = *(uint8_t *)src_pkt;
+ const struct cmd_info *info;
+
+- if (cmd > ARRAY_SIZE(cmd_info)) {
++ if (cmd >= ARRAY_SIZE(cmd_info)) {
+ DRM_ERROR("0x%08x: packet %d out of bounds\n",
+ src_offset, cmd);
+ return -EINVAL;
+@@ -539,11 +540,6 @@ vc4_validate_bin_cl(struct drm_device *d
+ return -EINVAL;
+ }
+
+-#if 0
+- DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
+- src_offset, cmd, info->name, info->len);
+-#endif
+-
+ if (src_offset + info->len > len) {
+ DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
+ "exceeds bounds (0x%08x)\n",
+@@ -558,8 +554,7 @@ vc4_validate_bin_cl(struct drm_device *d
+ if (info->func && info->func(exec,
+ dst_pkt + 1,
+ src_pkt + 1)) {
+- DRM_ERROR("0x%08x: packet %d (%s) failed to "
+- "validate\n",
++ DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
+ src_offset, cmd, info->name);
+ return -EINVAL;
+ }
+@@ -618,12 +613,14 @@ reloc_tex(struct vc4_exec_info *exec,
+
+ if (sample->is_direct) {
+ uint32_t remaining_size = tex->base.size - p0;
++
+ if (p0 > tex->base.size - 4) {
+ DRM_ERROR("UBO offset greater than UBO size\n");
+ goto fail;
+ }
+ if (p1 > remaining_size - 4) {
+- DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
++ DRM_ERROR("UBO clamp would allow reads "
++ "outside of UBO\n");
+ goto fail;
+ }
+ *validated_p0 = tex->paddr + p0;
+@@ -786,7 +783,7 @@ validate_shader_rec(struct drm_device *d
+ struct drm_gem_cma_object *bo[ARRAY_SIZE(gl_relocs) + 8];
+ uint32_t nr_attributes = 0, nr_fixed_relocs, nr_relocs, packet_size;
+ int i;
+- struct vc4_validated_shader_info *validated_shader;
++ struct vc4_validated_shader_info *shader;
+
+ if (state->packet == VC4_PACKET_NV_SHADER_STATE) {
+ relocs = nv_relocs;
+@@ -841,12 +838,12 @@ validate_shader_rec(struct drm_device *d
+ else
+ mode = VC4_MODE_RENDER;
+
+- if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i])) {
++ if (!vc4_use_bo(exec, src_handles[i], mode, &bo[i]))
+ return false;
+- }
+ }
+
+ for (i = 0; i < nr_fixed_relocs; i++) {
++ struct vc4_bo *vc4_bo;
+ uint32_t o = relocs[i].offset;
+ uint32_t src_offset = *(uint32_t *)(pkt_u + o);
+ uint32_t *texture_handles_u;
+@@ -858,34 +855,34 @@ validate_shader_rec(struct drm_device *d
+ switch (relocs[i].type) {
+ case RELOC_CODE:
+ if (src_offset != 0) {
+- DRM_ERROR("Shaders must be at offset 0 of "
+- "the BO.\n");
++ DRM_ERROR("Shaders must be at offset 0 "
++ "of the BO.\n");
+ goto fail;
+ }
+
+- validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
+- if (!validated_shader)
++ vc4_bo = to_vc4_bo(&bo[i]->base);
++ shader = vc4_bo->validated_shader;
++ if (!shader)
+ goto fail;
+
+- if (validated_shader->uniforms_src_size >
+- exec->uniforms_size) {
++ if (shader->uniforms_src_size > exec->uniforms_size) {
+ DRM_ERROR("Uniforms src buffer overflow\n");
+ goto fail;
+ }
+
+ texture_handles_u = exec->uniforms_u;
+ uniform_data_u = (texture_handles_u +
+- validated_shader->num_texture_samples);
++ shader->num_texture_samples);
+
+ memcpy(exec->uniforms_v, uniform_data_u,
+- validated_shader->uniforms_size);
++ shader->uniforms_size);
+
+ for (tex = 0;
+- tex < validated_shader->num_texture_samples;
++ tex < shader->num_texture_samples;
+ tex++) {
+ if (!reloc_tex(exec,
+ uniform_data_u,
+- &validated_shader->texture_samples[tex],
++ &shader->texture_samples[tex],
+ texture_handles_u[tex])) {
+ goto fail;
+ }
+@@ -893,9 +890,9 @@ validate_shader_rec(struct drm_device *d
+
+ *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
+
+- exec->uniforms_u += validated_shader->uniforms_src_size;
+- exec->uniforms_v += validated_shader->uniforms_size;
+- exec->uniforms_p += validated_shader->uniforms_size;
++ exec->uniforms_u += shader->uniforms_src_size;
++ exec->uniforms_v += shader->uniforms_size;
++ exec->uniforms_p += shader->uniforms_size;
+
+ break;
+
+@@ -926,7 +923,8 @@ validate_shader_rec(struct drm_device *d
+ max_index = ((vbo->base.size - offset - attr_size) /
+ stride);
+ if (state->max_index > max_index) {
+- DRM_ERROR("primitives use index %d out of supplied %d\n",
++ DRM_ERROR("primitives use index %d out of "
++ "supplied %d\n",
+ state->max_index, max_index);
+ return -EINVAL;
+ }
+--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
++++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+@@ -24,24 +24,16 @@
+ /**
+ * DOC: Shader validator for VC4.
+ *
+- * The VC4 has no IOMMU between it and system memory. So, a user with access
+- * to execute shaders could escalate privilege by overwriting system memory
+- * (using the VPM write address register in the general-purpose DMA mode) or
+- * reading system memory it shouldn't (reading it as a texture, or uniform
+- * data, or vertex data).
++ * The VC4 has no IOMMU between it and system memory, so a user with
++ * access to execute shaders could escalate privilege by overwriting
++ * system memory (using the VPM write address register in the
++ * general-purpose DMA mode) or reading system memory it shouldn't
++ * (reading it as a texture, or uniform data, or vertex data).
+ *
+- * This walks over a shader starting from some offset within a BO, ensuring
+- * that its accesses are appropriately bounded, and recording how many texture
+- * accesses are made and where so that we can do relocations for them in the
++ * This walks over a shader BO, ensuring that its accesses are
++ * appropriately bounded, and recording how many texture accesses are
++ * made and where so that we can do relocations for them in the
+ * uniform stream.
+- *
+- * The kernel API has shaders stored in user-mapped BOs. The BOs will be
+- * forcibly unmapped from the process before validation, and any cache of
+- * validated state will be flushed if the mapping is faulted back in.
+- *
+- * Storing the shaders in BOs means that the validation process will be slow
+- * due to uncached reads, but since shaders are long-lived and shader BOs are
+- * never actually modified, this shouldn't be a problem.
+ */
+
+ #include "vc4_drv.h"
+@@ -70,7 +62,6 @@ waddr_to_live_reg_index(uint32_t waddr,
+ else
+ return waddr;
+ } else if (waddr <= QPU_W_ACC3) {
+-
+ return 64 + waddr - QPU_W_ACC0;
+ } else {
+ return ~0;
+@@ -85,15 +76,14 @@ raddr_add_a_to_live_reg_index(uint64_t i
+ uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+ uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+- if (add_a == QPU_MUX_A) {
++ if (add_a == QPU_MUX_A)
+ return raddr_a;
+- } else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
++ else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
+ return 32 + raddr_b;
+- } else if (add_a <= QPU_MUX_R3) {
++ else if (add_a <= QPU_MUX_R3)
+ return 64 + add_a;
+- } else {
++ else
+ return ~0;
+- }
+ }
+
+ static bool
+@@ -111,9 +101,9 @@ is_tmu_write(uint32_t waddr)
+ }
+
+ static bool
+-record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
+- struct vc4_shader_validation_state *validation_state,
+- int tmu)
++record_texture_sample(struct vc4_validated_shader_info *validated_shader,
++ struct vc4_shader_validation_state *validation_state,
++ int tmu)
+ {
+ uint32_t s = validated_shader->num_texture_samples;
+ int i;
+@@ -226,8 +216,8 @@ check_tmu_write(uint64_t inst,
+ validated_shader->uniforms_size += 4;
+
+ if (submit) {
+- if (!record_validated_texture_sample(validated_shader,
+- validation_state, tmu)) {
++ if (!record_texture_sample(validated_shader,
++ validation_state, tmu)) {
+ return false;
+ }
+
+@@ -238,10 +228,10 @@ check_tmu_write(uint64_t inst,
+ }
+
+ static bool
+-check_register_write(uint64_t inst,
+- struct vc4_validated_shader_info *validated_shader,
+- struct vc4_shader_validation_state *validation_state,
+- bool is_mul)
++check_reg_write(uint64_t inst,
++ struct vc4_validated_shader_info *validated_shader,
++ struct vc4_shader_validation_state *validation_state,
++ bool is_mul)
+ {
+ uint32_t waddr = (is_mul ?
+ QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
+@@ -297,7 +287,7 @@ check_register_write(uint64_t inst,
+ return true;
+
+ case QPU_W_TLB_STENCIL_SETUP:
+- return true;
++ return true;
+ }
+
+ return true;
+@@ -360,7 +350,7 @@ track_live_clamps(uint64_t inst,
+ }
+
+ validation_state->live_max_clamp_regs[lri_add] = true;
+- } if (op_add == QPU_A_MIN) {
++ } else if (op_add == QPU_A_MIN) {
+ /* Track live clamps of a value clamped to a minimum of 0 and
+ * a maximum of some uniform's offset.
+ */
+@@ -392,8 +382,10 @@ check_instruction_writes(uint64_t inst,
+ return false;
+ }
+
+- ok = (check_register_write(inst, validated_shader, validation_state, false) &&
+- check_register_write(inst, validated_shader, validation_state, true));
++ ok = (check_reg_write(inst, validated_shader, validation_state,
++ false) &&
++ check_reg_write(inst, validated_shader, validation_state,
++ true));
+
+ track_live_clamps(inst, validated_shader, validation_state);
+
+@@ -441,7 +433,7 @@ vc4_validate_shader(struct drm_gem_cma_o
+ shader = shader_obj->vaddr;
+ max_ip = shader_obj->base.size / sizeof(uint64_t);
+
+- validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
++ validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
+ if (!validated_shader)
+ return NULL;
+
+@@ -497,7 +489,7 @@ vc4_validate_shader(struct drm_gem_cma_o
+
+ if (ip == max_ip) {
+ DRM_ERROR("shader failed to terminate before "
+- "shader BO end at %d\n",
++ "shader BO end at %zd\n",
+ shader_obj->base.size);
+ goto fail;
+ }
+--- a/include/drm/drmP.h
++++ b/include/drm/drmP.h
+@@ -585,6 +585,13 @@ struct drm_driver {
+ int (*gem_open_object) (struct drm_gem_object *, struct drm_file *);
+ void (*gem_close_object) (struct drm_gem_object *, struct drm_file *);
+
++ /**
++ * Hook for allocating the GEM object struct, for use by core
++ * helpers.
++ */
++ struct drm_gem_object *(*gem_create_object)(struct drm_device *dev,
++ size_t size);
++
+ /* prime: */
+ /* export handle -> fd (see drm_gem_prime_handle_to_fd() helper) */
+ int (*prime_handle_to_fd)(struct drm_device *dev, struct drm_file *file_priv,
+@@ -639,7 +646,6 @@ struct drm_driver {
+
+ u32 driver_features;
+ int dev_priv_size;
+- size_t gem_obj_size;
+ const struct drm_ioctl_desc *ioctls;
+ int num_ioctls;
+ const struct file_operations *fops;