diff options
Diffstat (limited to 'target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch')
-rw-r--r-- | target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch | 333 |
1 files changed, 333 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch b/target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch new file mode 100644 index 0000000..e71b82b --- /dev/null +++ b/target/linux/brcm2708/patches-4.4/0113-drm-vc4-Add-an-interface-for-capturing-the-GPU-state.patch @@ -0,0 +1,333 @@ +From b9e5697fbec13e6203b63649ee5d7c6819a5fb6b Mon Sep 17 00:00:00 2001 +From: Eric Anholt <eric@anholt.net> +Date: Fri, 30 Oct 2015 10:09:02 -0700 +Subject: [PATCH 113/232] drm/vc4: Add an interface for capturing the GPU state + after a hang. + +This can be parsed with vc4-gpu-tools tools for trying to figure out +what was going on. + +Signed-off-by: Eric Anholt <eric@anholt.net> +--- + drivers/gpu/drm/vc4/vc4_bo.c | 4 +- + drivers/gpu/drm/vc4/vc4_drv.c | 1 + + drivers/gpu/drm/vc4/vc4_drv.h | 4 + + drivers/gpu/drm/vc4/vc4_gem.c | 185 ++++++++++++++++++++++++++++++++++++++++++ + include/uapi/drm/vc4_drm.h | 45 ++++++++++ + 5 files changed, 237 insertions(+), 2 deletions(-) + +--- a/drivers/gpu/drm/vc4/vc4_bo.c ++++ b/drivers/gpu/drm/vc4/vc4_bo.c +@@ -415,8 +415,8 @@ int vc4_mmap(struct file *filp, struct v + gem_obj = vma->vm_private_data; + bo = to_vc4_bo(gem_obj); + +- if (bo->validated_shader) { +- DRM_ERROR("mmaping of shader BOs not allowed.\n"); ++ if (bo->validated_shader && (vma->vm_flags & VM_WRITE)) { ++ DRM_ERROR("mmaping of shader BOs for writing not allowed.\n"); + return -EINVAL; + } + +--- a/drivers/gpu/drm/vc4/vc4_drv.c ++++ b/drivers/gpu/drm/vc4/vc4_drv.c +@@ -81,6 +81,7 @@ static const struct drm_ioctl_desc vc4_d + DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0), + DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0), ++ DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl, DRM_ROOT_ONLY), + }; + + static struct drm_driver vc4_drm_driver = { +--- a/drivers/gpu/drm/vc4/vc4_drv.h ++++ b/drivers/gpu/drm/vc4/vc4_drv.h +@@ -20,6 +20,8 @@ struct vc4_dev { + struct drm_fbdev_cma *fbdev; + struct rpi_firmware *firmware; + ++ struct vc4_hang_state *hang_state; ++ + /* The kernel-space BO cache. Tracks buffers that have been + * unreferenced by all other users (refcounts of 0!) but not + * yet freed, so we can do cheap allocations. +@@ -366,6 +368,8 @@ int vc4_create_shader_bo_ioctl(struct dr + struct drm_file *file_priv); + int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); ++int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); + int vc4_mmap(struct file *filp, struct vm_area_struct *vma); + int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + void *vc4_prime_vmap(struct drm_gem_object *obj); +--- a/drivers/gpu/drm/vc4/vc4_gem.c ++++ b/drivers/gpu/drm/vc4/vc4_gem.c +@@ -40,6 +40,186 @@ vc4_queue_hangcheck(struct drm_device *d + round_jiffies_up(jiffies + msecs_to_jiffies(100))); + } + ++struct vc4_hang_state { ++ struct drm_vc4_get_hang_state user_state; ++ ++ u32 bo_count; ++ struct drm_gem_object **bo; ++}; ++ ++static void ++vc4_free_hang_state(struct drm_device *dev, struct vc4_hang_state *state) ++{ ++ unsigned int i; ++ ++ mutex_lock(&dev->struct_mutex); ++ for (i = 0; i < state->user_state.bo_count; i++) { ++ drm_gem_object_unreference(state->bo[i]); ++ } ++ mutex_unlock(&dev->struct_mutex); ++ ++ kfree(state); ++} ++ ++int ++vc4_get_hang_state_ioctl(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_vc4_get_hang_state *get_state = data; ++ struct drm_vc4_get_hang_state_bo *bo_state; ++ struct vc4_hang_state *kernel_state; ++ struct drm_vc4_get_hang_state *state; ++ struct vc4_dev *vc4 = to_vc4_dev(dev); ++ unsigned long irqflags; ++ u32 i; ++ int ret; ++ ++ spin_lock_irqsave(&vc4->job_lock, irqflags); ++ kernel_state = vc4->hang_state; ++ if (!kernel_state) { ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ return -ENOENT; ++ } ++ state = &kernel_state->user_state; ++ ++ /* If the user's array isn't big enough, just return the ++ * required array size. ++ */ ++ if (get_state->bo_count < state->bo_count) { ++ get_state->bo_count = state->bo_count; ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ return 0; ++ } ++ ++ vc4->hang_state = NULL; ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ ++ /* Save the user's BO pointer, so we don't stomp it with the memcpy. */ ++ state->bo = get_state->bo; ++ memcpy(get_state, state, sizeof(*state)); ++ ++ bo_state = kcalloc(state->bo_count, sizeof(*bo_state), GFP_KERNEL); ++ if (!bo_state) { ++ ret = -ENOMEM; ++ goto err_free; ++ } ++ ++ for (i = 0; i < state->bo_count; i++) { ++ struct vc4_bo *vc4_bo = to_vc4_bo(kernel_state->bo[i]); ++ u32 handle; ++ ret = drm_gem_handle_create(file_priv, kernel_state->bo[i], ++ &handle); ++ ++ if (ret) { ++ state->bo_count = i - 1; ++ goto err; ++ } ++ bo_state[i].handle = handle; ++ bo_state[i].paddr = vc4_bo->base.paddr; ++ bo_state[i].size = vc4_bo->base.base.size; ++ } ++ ++ ret = copy_to_user((void __user *)(uintptr_t)get_state->bo, ++ bo_state, ++ state->bo_count * sizeof(*bo_state)); ++ kfree(bo_state); ++ ++ err_free: ++ ++ vc4_free_hang_state(dev, kernel_state); ++ ++err: ++ return ret; ++} ++ ++static void ++vc4_save_hang_state(struct drm_device *dev) ++{ ++ struct vc4_dev *vc4 = to_vc4_dev(dev); ++ struct drm_vc4_get_hang_state *state; ++ struct vc4_hang_state *kernel_state; ++ struct vc4_exec_info *exec; ++ struct vc4_bo *bo; ++ unsigned long irqflags; ++ unsigned int i, unref_list_count; ++ ++ kernel_state = kcalloc(1, sizeof(*state), GFP_KERNEL); ++ if (!kernel_state) ++ return; ++ ++ state = &kernel_state->user_state; ++ ++ spin_lock_irqsave(&vc4->job_lock, irqflags); ++ exec = vc4_first_job(vc4); ++ if (!exec) { ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ return; ++ } ++ ++ unref_list_count = 0; ++ list_for_each_entry(bo, &exec->unref_list, unref_head) ++ unref_list_count++; ++ ++ state->bo_count = exec->bo_count + unref_list_count; ++ kernel_state->bo = kcalloc(state->bo_count, sizeof(*kernel_state->bo), ++ GFP_ATOMIC); ++ if (!kernel_state->bo) { ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ return; ++ } ++ ++ for (i = 0; i < exec->bo_count; i++) { ++ drm_gem_object_reference(&exec->bo[i].bo->base); ++ kernel_state->bo[i] = &exec->bo[i].bo->base; ++ } ++ ++ list_for_each_entry(bo, &exec->unref_list, unref_head) { ++ drm_gem_object_reference(&bo->base.base); ++ kernel_state->bo[i] = &bo->base.base; ++ i++; ++ } ++ ++ state->start_bin = exec->ct0ca; ++ state->start_render = exec->ct1ca; ++ ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ ++ state->ct0ca = V3D_READ(V3D_CTNCA(0)); ++ state->ct0ea = V3D_READ(V3D_CTNEA(0)); ++ ++ state->ct1ca = V3D_READ(V3D_CTNCA(1)); ++ state->ct1ea = V3D_READ(V3D_CTNEA(1)); ++ ++ state->ct0cs = V3D_READ(V3D_CTNCS(0)); ++ state->ct1cs = V3D_READ(V3D_CTNCS(1)); ++ ++ state->ct0ra0 = V3D_READ(V3D_CT00RA0); ++ state->ct1ra0 = V3D_READ(V3D_CT01RA0); ++ ++ state->bpca = V3D_READ(V3D_BPCA); ++ state->bpcs = V3D_READ(V3D_BPCS); ++ state->bpoa = V3D_READ(V3D_BPOA); ++ state->bpos = V3D_READ(V3D_BPOS); ++ ++ state->vpmbase = V3D_READ(V3D_VPMBASE); ++ ++ state->dbge = V3D_READ(V3D_DBGE); ++ state->fdbgo = V3D_READ(V3D_FDBGO); ++ state->fdbgb = V3D_READ(V3D_FDBGB); ++ state->fdbgr = V3D_READ(V3D_FDBGR); ++ state->fdbgs = V3D_READ(V3D_FDBGS); ++ state->errstat = V3D_READ(V3D_ERRSTAT); ++ ++ spin_lock_irqsave(&vc4->job_lock, irqflags); ++ if (vc4->hang_state) { ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ vc4_free_hang_state(dev, kernel_state); ++ } else { ++ vc4->hang_state = kernel_state; ++ spin_unlock_irqrestore(&vc4->job_lock, irqflags); ++ } ++} ++ + static void + vc4_reset(struct drm_device *dev) + { +@@ -64,6 +244,8 @@ vc4_reset_work(struct work_struct *work) + struct vc4_dev *vc4 = + container_of(work, struct vc4_dev, hangcheck.reset_work); + ++ vc4_save_hang_state(vc4->dev); ++ + vc4_reset(vc4->dev); + } + +@@ -673,4 +855,7 @@ vc4_gem_destroy(struct drm_device *dev) + } + + vc4_bo_cache_destroy(dev); ++ ++ if (vc4->hang_state) ++ vc4_free_hang_state(dev, vc4->hang_state); + } +--- a/include/uapi/drm/vc4_drm.h ++++ b/include/uapi/drm/vc4_drm.h +@@ -32,6 +32,7 @@ + #define DRM_VC4_CREATE_BO 0x03 + #define DRM_VC4_MMAP_BO 0x04 + #define DRM_VC4_CREATE_SHADER_BO 0x05 ++#define DRM_VC4_GET_HANG_STATE 0x06 + + #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) + #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) +@@ -39,6 +40,7 @@ + #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) + #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) + #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) ++#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) + + struct drm_vc4_submit_rcl_surface { + uint32_t hindex; /* Handle index, or ~0 if not present. */ +@@ -226,4 +228,47 @@ struct drm_vc4_mmap_bo { + uint64_t offset; + }; + ++struct drm_vc4_get_hang_state_bo { ++ uint32_t handle; ++ uint32_t paddr; ++ uint32_t size; ++ uint32_t pad; ++}; ++ ++/** ++ * struct drm_vc4_hang_state - ioctl argument for collecting state ++ * from a GPU hang for analysis. ++*/ ++struct drm_vc4_get_hang_state { ++ /** Pointer to array of struct drm_vc4_get_hang_state_bo. */ ++ uint64_t bo; ++ /** ++ * On input, the size of the bo array. Output is the number ++ * of bos to be returned. ++ */ ++ uint32_t bo_count; ++ ++ uint32_t start_bin, start_render; ++ ++ uint32_t ct0ca, ct0ea; ++ uint32_t ct1ca, ct1ea; ++ uint32_t ct0cs, ct1cs; ++ uint32_t ct0ra0, ct1ra0; ++ ++ uint32_t bpca, bpcs; ++ uint32_t bpoa, bpos; ++ ++ uint32_t vpmbase; ++ ++ uint32_t dbge; ++ uint32_t fdbgo; ++ uint32_t fdbgb; ++ uint32_t fdbgr; ++ uint32_t fdbgs; ++ uint32_t errstat; ++ ++ /* Pad that we may save more registers into in the future. */ ++ uint32_t pad[16]; ++}; ++ + #endif /* _UAPI_VC4_DRM_H_ */ |