diff options
author | Felix Fietkau <nbd@nbd.name> | 2017-12-04 22:44:33 +0100 |
---|---|---|
committer | Felix Fietkau <nbd@nbd.name> | 2018-03-10 11:58:19 +0100 |
commit | 4e8f1e9f4ca088542fd2b861ea2f1a9dca0d845f (patch) | |
tree | a7f52213318e9089aaae22556c9eaea5e5f1f812 /target/linux/generic/hack-4.9 | |
parent | 916277a033bd1e31a82eace1b512c2ed03590172 (diff) | |
download | mtk-20170518-4e8f1e9f4ca088542fd2b861ea2f1a9dca0d845f.zip mtk-20170518-4e8f1e9f4ca088542fd2b861ea2f1a9dca0d845f.tar.gz mtk-20170518-4e8f1e9f4ca088542fd2b861ea2f1a9dca0d845f.tar.bz2 |
kernel: unroll MIPS r4k cache blast function
Optimize the compiler output for larger cache blast cases that are
common for DMA-based networking.
On ar71xx, I measured a routing throughput increase of ~8%
Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com>
Signed-off-by: Rosen Penev <rosenp@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Diffstat (limited to 'target/linux/generic/hack-4.9')
-rw-r--r-- | target/linux/generic/hack-4.9/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/target/linux/generic/hack-4.9/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch b/target/linux/generic/hack-4.9/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch new file mode 100644 index 0000000..ce7901a --- /dev/null +++ b/target/linux/generic/hack-4.9/300-MIPS-r4k_cache-use-more-efficient-cache-blast.patch @@ -0,0 +1,66 @@ +From: Ben Menchaca <ben.menchaca@qca.qualcomm.com> +Date: Fri, 7 Jun 2013 18:35:22 -0500 +Subject: MIPS: r4k_cache: use more efficient cache blast + +Optimize the compiler output for larger cache blast cases that are +common for DMA-based networking. + +Signed-off-by: Ben Menchaca <ben.menchaca@qca.qualcomm.com> +Signed-off-by: Felix Fietkau <nbd@nbd.name> +--- +--- a/arch/mips/include/asm/r4kcache.h ++++ b/arch/mips/include/asm/r4kcache.h +@@ -665,16 +665,48 @@ static inline void prot##extra##blast_##pfx##cache##_range(unsigned long start, + unsigned long end) \ + { \ + unsigned long lsize = cpu_##desc##_line_size(); \ ++ unsigned long lsize_2 = lsize * 2; \ ++ unsigned long lsize_3 = lsize * 3; \ ++ unsigned long lsize_4 = lsize * 4; \ ++ unsigned long lsize_5 = lsize * 5; \ ++ unsigned long lsize_6 = lsize * 6; \ ++ unsigned long lsize_7 = lsize * 7; \ ++ unsigned long lsize_8 = lsize * 8; \ + unsigned long addr = start & ~(lsize - 1); \ +- unsigned long aend = (end - 1) & ~(lsize - 1); \ ++ unsigned long aend = (end + lsize - 1) & ~(lsize - 1); \ ++ int lines = (aend - addr) / lsize; \ + \ + __##pfx##flush_prologue \ + \ +- while (1) { \ ++ while (lines >= 8) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ prot##cache_op(hitop, addr + lsize_2); \ ++ prot##cache_op(hitop, addr + lsize_3); \ ++ prot##cache_op(hitop, addr + lsize_4); \ ++ prot##cache_op(hitop, addr + lsize_5); \ ++ prot##cache_op(hitop, addr + lsize_6); \ ++ prot##cache_op(hitop, addr + lsize_7); \ ++ addr += lsize_8; \ ++ lines -= 8; \ ++ } \ ++ \ ++ if (lines & 0x4) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ prot##cache_op(hitop, addr + lsize_2); \ ++ prot##cache_op(hitop, addr + lsize_3); \ ++ addr += lsize_4; \ ++ } \ ++ \ ++ if (lines & 0x2) { \ ++ prot##cache_op(hitop, addr); \ ++ prot##cache_op(hitop, addr + lsize); \ ++ addr += lsize_2; \ ++ } \ ++ \ ++ if (lines & 0x1) { \ + prot##cache_op(hitop, addr); \ +- if (addr == aend) \ +- break; \ +- addr += lsize; \ + } \ + \ + __##pfx##flush_epilogue \ |