diff options
Diffstat (limited to 'target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch')
-rw-r--r-- | target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch | 268 |
1 files changed, 268 insertions, 0 deletions
diff --git a/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch b/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch new file mode 100644 index 0000000..25b3db3 --- /dev/null +++ b/target/linux/brcm2708/patches-4.1/0106-vchiq_arm-Two-cacheing-fixes.patch @@ -0,0 +1,268 @@ +From edb21286ac7e246dfe7c9ee05101880f719e00e8 Mon Sep 17 00:00:00 2001 +From: Phil Elwell <phil@raspberrypi.org> +Date: Wed, 8 Jul 2015 14:48:57 +0100 +Subject: [PATCH 106/121] vchiq_arm: Two cacheing fixes + +1) Make fragment size vary with cache line size +Without this patch, non-cache-line-aligned transfers may corrupt +(or be corrupted by) adjacent data structures. + +Both ARM and VC need to be updated to enable this feature. This is +ensured by having the loader apply a new DT parameter - +cache-line-size. The existence of this parameter guarantees that the +kernel is capable, and the parameter will only be modified from the +safe default if the loader is capable. + +2) Flush/invalidate vmalloc'd memory, and invalidate after reads +--- + arch/arm/boot/dts/bcm2708_common.dtsi | 5 + + .../interface/vchiq_arm/vchiq_2835_arm.c | 112 +++++++++++++-------- + 2 files changed, 77 insertions(+), 40 deletions(-) + +--- a/arch/arm/boot/dts/bcm2708_common.dtsi ++++ b/arch/arm/boot/dts/bcm2708_common.dtsi +@@ -218,6 +218,7 @@ + compatible = "brcm,bcm2835-vchiq"; + reg = <0x7e00b840 0xf>; + interrupts = <0 2>; ++ cache-line-size = <32>; + }; + + thermal: thermal { +@@ -270,4 +271,8 @@ + clock-frequency = <126000000>; + }; + }; ++ ++ __overrides__ { ++ cache_line_size = <&vchiq>, "cache-line-size:0"; ++ }; + }; +--- a/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c ++++ b/drivers/misc/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +@@ -42,6 +42,7 @@ + #include <linux/platform_data/mailbox-bcm2708.h> + #include <linux/platform_device.h> + #include <linux/uaccess.h> ++#include <linux/of.h> + #include <asm/pgtable.h> + + #define TOTAL_SLOTS (VCHIQ_SLOT_ZERO_SLOTS + 2 * 32) +@@ -64,8 +65,10 @@ typedef struct vchiq_2835_state_struct { + } VCHIQ_2835_ARM_STATE_T; + + static void __iomem *g_regs; +-static FRAGMENTS_T *g_fragments_base; +-static FRAGMENTS_T *g_free_fragments; ++static unsigned int g_cache_line_size = sizeof(CACHE_LINE_SIZE); ++static unsigned int g_fragments_size; ++static char *g_fragments_base; ++static char *g_free_fragments; + static struct semaphore g_free_fragments_sema; + static unsigned long g_virt_to_bus_offset; + +@@ -95,9 +98,13 @@ int vchiq_platform_init(struct platform_ + + g_virt_to_bus_offset = virt_to_dma(dev, (void *)0); + ++ (void)of_property_read_u32(dev->of_node, "cache-line-size", ++ &g_cache_line_size); ++ g_fragments_size = 2 * g_cache_line_size; ++ + /* Allocate space for the channels in coherent memory */ + slot_mem_size = PAGE_ALIGN(TOTAL_SLOTS * VCHIQ_SLOT_SIZE); +- frag_mem_size = PAGE_ALIGN(sizeof(FRAGMENTS_T) * MAX_FRAGMENTS); ++ frag_mem_size = PAGE_ALIGN(g_fragments_size * MAX_FRAGMENTS); + + slot_mem = dmam_alloc_coherent(dev, slot_mem_size + frag_mem_size, + &slot_phys, GFP_KERNEL); +@@ -117,15 +124,15 @@ int vchiq_platform_init(struct platform_ + vchiq_slot_zero->platform_data[VCHIQ_PLATFORM_FRAGMENTS_COUNT_IDX] = + MAX_FRAGMENTS; + +- g_fragments_base = (FRAGMENTS_T *)(slot_mem + slot_mem_size); ++ g_fragments_base = (char *)slot_mem + slot_mem_size; + slot_mem_size += frag_mem_size; + + g_free_fragments = g_fragments_base; + for (i = 0; i < (MAX_FRAGMENTS - 1); i++) { +- *(FRAGMENTS_T **)&g_fragments_base[i] = +- &g_fragments_base[i + 1]; ++ *(char **)&g_fragments_base[i*g_fragments_size] = ++ &g_fragments_base[(i + 1)*g_fragments_size]; + } +- *(FRAGMENTS_T **)&g_fragments_base[i] = NULL; ++ *(char **)&g_fragments_base[i * g_fragments_size] = NULL; + sema_init(&g_free_fragments_sema, MAX_FRAGMENTS); + + if (vchiq_init_state(state, vchiq_slot_zero, 0) != VCHIQ_SUCCESS) +@@ -344,7 +351,7 @@ vchiq_doorbell_irq(int irq, void *dev_id + ** cached area. + + ** N.B. This implementation plays slightly fast and loose with the Linux +-** driver programming rules, e.g. its use of __virt_to_bus instead of ++** driver programming rules, e.g. its use of dmac_map_area instead of + ** dma_map_single, but it isn't a multi-platform driver and it benefits + ** from increased speed as a result. + */ +@@ -355,7 +362,6 @@ create_pagelist(char __user *buf, size_t + { + PAGELIST_T *pagelist; + struct page **pages; +- struct page *page; + unsigned long *addrs; + unsigned int num_pages, offset, i; + char *addr, *base_addr, *next_addr; +@@ -386,10 +392,25 @@ create_pagelist(char __user *buf, size_t + pages = (struct page **)(addrs + num_pages + 1); + + if (is_vmalloc_addr(buf)) { +- for (actual_pages = 0; actual_pages < num_pages; actual_pages++) { +- pages[actual_pages] = vmalloc_to_page(buf + (actual_pages * PAGE_SIZE)); ++ int dir = (type == PAGELIST_WRITE) ? ++ DMA_TO_DEVICE : DMA_FROM_DEVICE; ++ unsigned long length = pagelist->length; ++ unsigned int offset = pagelist->offset; ++ ++ for (actual_pages = 0; actual_pages < num_pages; ++ actual_pages++) { ++ struct page *pg = vmalloc_to_page(buf + (actual_pages * ++ PAGE_SIZE)); ++ size_t bytes = PAGE_SIZE - offset; ++ ++ if (bytes > length) ++ bytes = length; ++ pages[actual_pages] = pg; ++ dmac_map_area(page_address(pg) + offset, bytes, dir); ++ length -= bytes; ++ offset = 0; + } +- *need_release = 0; /* do not try and release vmalloc pages */ ++ *need_release = 0; /* do not try and release vmalloc pages */ + } else { + down_read(&task->mm->mmap_sem); + actual_pages = get_user_pages(task, task->mm, +@@ -418,7 +439,7 @@ create_pagelist(char __user *buf, size_t + actual_pages = -ENOMEM; + return actual_pages; + } +- *need_release = 1; /* release user pages */ ++ *need_release = 1; /* release user pages */ + } + + pagelist->length = count; +@@ -451,10 +472,10 @@ create_pagelist(char __user *buf, size_t + + /* Partial cache lines (fragments) require special measures */ + if ((type == PAGELIST_READ) && +- ((pagelist->offset & (CACHE_LINE_SIZE - 1)) || ++ ((pagelist->offset & (g_cache_line_size - 1)) || + ((pagelist->offset + pagelist->length) & +- (CACHE_LINE_SIZE - 1)))) { +- FRAGMENTS_T *fragments; ++ (g_cache_line_size - 1)))) { ++ char *fragments; + + if (down_interruptible(&g_free_fragments_sema) != 0) { + kfree(pagelist); +@@ -464,19 +485,15 @@ create_pagelist(char __user *buf, size_t + WARN_ON(g_free_fragments == NULL); + + down(&g_free_fragments_mutex); +- fragments = (FRAGMENTS_T *) g_free_fragments; ++ fragments = g_free_fragments; + WARN_ON(fragments == NULL); +- g_free_fragments = *(FRAGMENTS_T **) g_free_fragments; ++ g_free_fragments = *(char **) g_free_fragments; + up(&g_free_fragments_mutex); +- pagelist->type = +- PAGELIST_READ_WITH_FRAGMENTS + (fragments - +- g_fragments_base); ++ pagelist->type = PAGELIST_READ_WITH_FRAGMENTS + ++ (fragments - g_fragments_base) / g_fragments_size; + } + +- for (page = virt_to_page(pagelist); +- page <= virt_to_page(addrs + num_pages - 1); page++) { +- flush_dcache_page(page); +- } ++ dmac_flush_range(pagelist, addrs + num_pages); + + *ppagelist = pagelist; + +@@ -502,13 +519,14 @@ free_pagelist(PAGELIST_T *pagelist, int + + /* Deal with any partial cache lines (fragments) */ + if (pagelist->type >= PAGELIST_READ_WITH_FRAGMENTS) { +- FRAGMENTS_T *fragments = g_fragments_base + +- (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS); ++ char *fragments = g_fragments_base + ++ (pagelist->type - PAGELIST_READ_WITH_FRAGMENTS) * ++ g_fragments_size; + int head_bytes, tail_bytes; +- head_bytes = (CACHE_LINE_SIZE - pagelist->offset) & +- (CACHE_LINE_SIZE - 1); ++ head_bytes = (g_cache_line_size - pagelist->offset) & ++ (g_cache_line_size - 1); + tail_bytes = (pagelist->offset + actual) & +- (CACHE_LINE_SIZE - 1); ++ (g_cache_line_size - 1); + + if ((actual >= 0) && (head_bytes != 0)) { + if (head_bytes > actual) +@@ -516,32 +534,46 @@ free_pagelist(PAGELIST_T *pagelist, int + + memcpy((char *)page_address(pages[0]) + + pagelist->offset, +- fragments->headbuf, ++ fragments, + head_bytes); + } + if ((actual >= 0) && (head_bytes < actual) && + (tail_bytes != 0)) { + memcpy((char *)page_address(pages[num_pages - 1]) + + ((pagelist->offset + actual) & +- (PAGE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1)), +- fragments->tailbuf, tail_bytes); ++ (PAGE_SIZE - 1) & ~(g_cache_line_size - 1)), ++ fragments + g_cache_line_size, ++ tail_bytes); + } + + down(&g_free_fragments_mutex); +- *(FRAGMENTS_T **) fragments = g_free_fragments; ++ *(char **)fragments = g_free_fragments; + g_free_fragments = fragments; + up(&g_free_fragments_mutex); + up(&g_free_fragments_sema); + } + +- if (*need_release) { ++ if (*need_release) { ++ unsigned int length = pagelist->length; ++ unsigned int offset = pagelist->offset; ++ + for (i = 0; i < num_pages; i++) { +- if (pagelist->type != PAGELIST_WRITE) +- set_page_dirty(pages[i]); ++ struct page *pg = pages[i]; + +- page_cache_release(pages[i]); ++ if (pagelist->type != PAGELIST_WRITE) { ++ unsigned int bytes = PAGE_SIZE - offset; ++ ++ if (bytes > length) ++ bytes = length; ++ dmac_unmap_area(page_address(pg) + offset, ++ bytes, DMA_FROM_DEVICE); ++ length -= bytes; ++ offset = 0; ++ set_page_dirty(pg); ++ } ++ page_cache_release(pg); + } +- } ++ } + + kfree(pagelist); + } |