summaryrefslogtreecommitdiff
path: root/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch
diff options
context:
space:
mode:
Diffstat (limited to 'target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch')
-rw-r--r--target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch355
1 files changed, 355 insertions, 0 deletions
diff --git a/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch b/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch
new file mode 100644
index 0000000..92b456d
--- /dev/null
+++ b/target/linux/omap24xx/patches-2.6.37/100-optimized-arm-div.patch
@@ -0,0 +1,355 @@
+---
+ arch/arm/boot/compressed/lib1funcs.S | 348 +++++++++++++++++++++++++++++++++++
+ 1 file changed, 348 insertions(+)
+
+--- /dev/null
++++ linux-2.6.35/arch/arm/boot/compressed/lib1funcs.S
+@@ -0,0 +1,348 @@
++/*
++ * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
++ *
++ * Author: Nicolas Pitre <nico@fluxnic.net>
++ * - contributed to gcc-3.4 on Sep 30, 2003
++ * - adapted for the Linux kernel on Oct 2, 2003
++ */
++
++/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
++
++This file is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 2, or (at your option) any
++later version.
++
++In addition to the permissions in the GNU General Public License, the
++Free Software Foundation gives you unlimited permission to link the
++compiled version of this file into combinations with other programs,
++and to distribute those combinations without any restriction coming
++from the use of this file. (The General Public License restrictions
++do apply in other respects; for example, they cover modification of
++the file, and distribution when not linked into a combine
++executable.)
++
++This file is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with this program; see the file COPYING. If not, write to
++the Free Software Foundation, 59 Temple Place - Suite 330,
++Boston, MA 02111-1307, USA. */
++
++
++#include <linux/linkage.h>
++#include <asm/assembler.h>
++
++
++.macro ARM_DIV_BODY dividend, divisor, result, curbit
++
++#if __LINUX_ARM_ARCH__ >= 5
++
++ clz \curbit, \divisor
++ clz \result, \dividend
++ sub \result, \curbit, \result
++ mov \curbit, #1
++ mov \divisor, \divisor, lsl \result
++ mov \curbit, \curbit, lsl \result
++ mov \result, #0
++
++#else
++
++ @ Initially shift the divisor left 3 bits if possible,
++ @ set curbit accordingly. This allows for curbit to be located
++ @ at the left end of each 4 bit nibbles in the division loop
++ @ to save one loop in most cases.
++ tst \divisor, #0xe0000000
++ moveq \divisor, \divisor, lsl #3
++ moveq \curbit, #8
++ movne \curbit, #1
++
++ @ Unless the divisor is very big, shift it up in multiples of
++ @ four bits, since this is the amount of unwinding in the main
++ @ division loop. Continue shifting until the divisor is
++ @ larger than the dividend.
++1: cmp \divisor, #0x10000000
++ cmplo \divisor, \dividend
++ movlo \divisor, \divisor, lsl #4
++ movlo \curbit, \curbit, lsl #4
++ blo 1b
++
++ @ For very big divisors, we must shift it a bit at a time, or
++ @ we will be in danger of overflowing.
++1: cmp \divisor, #0x80000000
++ cmplo \divisor, \dividend
++ movlo \divisor, \divisor, lsl #1
++ movlo \curbit, \curbit, lsl #1
++ blo 1b
++
++ mov \result, #0
++
++#endif
++
++ @ Division loop
++1: cmp \dividend, \divisor
++ subhs \dividend, \dividend, \divisor
++ orrhs \result, \result, \curbit
++ cmp \dividend, \divisor, lsr #1
++ subhs \dividend, \dividend, \divisor, lsr #1
++ orrhs \result, \result, \curbit, lsr #1
++ cmp \dividend, \divisor, lsr #2
++ subhs \dividend, \dividend, \divisor, lsr #2
++ orrhs \result, \result, \curbit, lsr #2
++ cmp \dividend, \divisor, lsr #3
++ subhs \dividend, \dividend, \divisor, lsr #3
++ orrhs \result, \result, \curbit, lsr #3
++ cmp \dividend, #0 @ Early termination?
++ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
++ movne \divisor, \divisor, lsr #4
++ bne 1b
++
++.endm
++
++
++.macro ARM_DIV2_ORDER divisor, order
++
++#if __LINUX_ARM_ARCH__ >= 5
++
++ clz \order, \divisor
++ rsb \order, \order, #31
++
++#else
++
++ cmp \divisor, #(1 << 16)
++ movhs \divisor, \divisor, lsr #16
++ movhs \order, #16
++ movlo \order, #0
++
++ cmp \divisor, #(1 << 8)
++ movhs \divisor, \divisor, lsr #8
++ addhs \order, \order, #8
++
++ cmp \divisor, #(1 << 4)
++ movhs \divisor, \divisor, lsr #4
++ addhs \order, \order, #4
++
++ cmp \divisor, #(1 << 2)
++ addhi \order, \order, #3
++ addls \order, \order, \divisor, lsr #1
++
++#endif
++
++.endm
++
++
++.macro ARM_MOD_BODY dividend, divisor, order, spare
++
++#if __LINUX_ARM_ARCH__ >= 5
++
++ clz \order, \divisor
++ clz \spare, \dividend
++ sub \order, \order, \spare
++ mov \divisor, \divisor, lsl \order
++
++#else
++
++ mov \order, #0
++
++ @ Unless the divisor is very big, shift it up in multiples of
++ @ four bits, since this is the amount of unwinding in the main
++ @ division loop. Continue shifting until the divisor is
++ @ larger than the dividend.
++1: cmp \divisor, #0x10000000
++ cmplo \divisor, \dividend
++ movlo \divisor, \divisor, lsl #4
++ addlo \order, \order, #4
++ blo 1b
++
++ @ For very big divisors, we must shift it a bit at a time, or
++ @ we will be in danger of overflowing.
++1: cmp \divisor, #0x80000000
++ cmplo \divisor, \dividend
++ movlo \divisor, \divisor, lsl #1
++ addlo \order, \order, #1
++ blo 1b
++
++#endif
++
++ @ Perform all needed substractions to keep only the reminder.
++ @ Do comparisons in batch of 4 first.
++ subs \order, \order, #3 @ yes, 3 is intended here
++ blt 2f
++
++1: cmp \dividend, \divisor
++ subhs \dividend, \dividend, \divisor
++ cmp \dividend, \divisor, lsr #1
++ subhs \dividend, \dividend, \divisor, lsr #1
++ cmp \dividend, \divisor, lsr #2
++ subhs \dividend, \dividend, \divisor, lsr #2
++ cmp \dividend, \divisor, lsr #3
++ subhs \dividend, \dividend, \divisor, lsr #3
++ cmp \dividend, #1
++ mov \divisor, \divisor, lsr #4
++ subges \order, \order, #4
++ bge 1b
++
++ tst \order, #3
++ teqne \dividend, #0
++ beq 5f
++
++ @ Either 1, 2 or 3 comparison/substractions are left.
++2: cmn \order, #2
++ blt 4f
++ beq 3f
++ cmp \dividend, \divisor
++ subhs \dividend, \dividend, \divisor
++ mov \divisor, \divisor, lsr #1
++3: cmp \dividend, \divisor
++ subhs \dividend, \dividend, \divisor
++ mov \divisor, \divisor, lsr #1
++4: cmp \dividend, \divisor
++ subhs \dividend, \dividend, \divisor
++5:
++.endm
++
++
++ENTRY(__udivsi3)
++ENTRY(__aeabi_uidiv)
++
++ subs r2, r1, #1
++ moveq pc, lr
++ bcc Ldiv0
++ cmp r0, r1
++ bls 11f
++ tst r1, r2
++ beq 12f
++
++ ARM_DIV_BODY r0, r1, r2, r3
++
++ mov r0, r2
++ mov pc, lr
++
++11: moveq r0, #1
++ movne r0, #0
++ mov pc, lr
++
++12: ARM_DIV2_ORDER r1, r2
++
++ mov r0, r0, lsr r2
++ mov pc, lr
++
++ENDPROC(__udivsi3)
++ENDPROC(__aeabi_uidiv)
++
++ENTRY(__umodsi3)
++
++ subs r2, r1, #1 @ compare divisor with 1
++ bcc Ldiv0
++ cmpne r0, r1 @ compare dividend with divisor
++ moveq r0, #0
++ tsthi r1, r2 @ see if divisor is power of 2
++ andeq r0, r0, r2
++ movls pc, lr
++
++ ARM_MOD_BODY r0, r1, r2, r3
++
++ mov pc, lr
++
++ENDPROC(__umodsi3)
++
++ENTRY(__divsi3)
++ENTRY(__aeabi_idiv)
++
++ cmp r1, #0
++ eor ip, r0, r1 @ save the sign of the result.
++ beq Ldiv0
++ rsbmi r1, r1, #0 @ loops below use unsigned.
++ subs r2, r1, #1 @ division by 1 or -1 ?
++ beq 10f
++ movs r3, r0
++ rsbmi r3, r0, #0 @ positive dividend value
++ cmp r3, r1
++ bls 11f
++ tst r1, r2 @ divisor is power of 2 ?
++ beq 12f
++
++ ARM_DIV_BODY r3, r1, r0, r2
++
++ cmp ip, #0
++ rsbmi r0, r0, #0
++ mov pc, lr
++
++10: teq ip, r0 @ same sign ?
++ rsbmi r0, r0, #0
++ mov pc, lr
++
++11: movlo r0, #0
++ moveq r0, ip, asr #31
++ orreq r0, r0, #1
++ mov pc, lr
++
++12: ARM_DIV2_ORDER r1, r2
++
++ cmp ip, #0
++ mov r0, r3, lsr r2
++ rsbmi r0, r0, #0
++ mov pc, lr
++
++ENDPROC(__divsi3)
++ENDPROC(__aeabi_idiv)
++
++ENTRY(__modsi3)
++
++ cmp r1, #0
++ beq Ldiv0
++ rsbmi r1, r1, #0 @ loops below use unsigned.
++ movs ip, r0 @ preserve sign of dividend
++ rsbmi r0, r0, #0 @ if negative make positive
++ subs r2, r1, #1 @ compare divisor with 1
++ cmpne r0, r1 @ compare dividend with divisor
++ moveq r0, #0
++ tsthi r1, r2 @ see if divisor is power of 2
++ andeq r0, r0, r2
++ bls 10f
++
++ ARM_MOD_BODY r0, r1, r2, r3
++
++10: cmp ip, #0
++ rsbmi r0, r0, #0
++ mov pc, lr
++
++ENDPROC(__modsi3)
++
++#ifdef CONFIG_AEABI
++
++ENTRY(__aeabi_uidivmod)
++
++ stmfd sp!, {r0, r1, ip, lr}
++ bl __aeabi_uidiv
++ ldmfd sp!, {r1, r2, ip, lr}
++ mul r3, r0, r2
++ sub r1, r1, r3
++ mov pc, lr
++
++ENDPROC(__aeabi_uidivmod)
++
++ENTRY(__aeabi_idivmod)
++
++ stmfd sp!, {r0, r1, ip, lr}
++ bl __aeabi_idiv
++ ldmfd sp!, {r1, r2, ip, lr}
++ mul r3, r0, r2
++ sub r1, r1, r3
++ mov pc, lr
++
++ENDPROC(__aeabi_idivmod)
++
++#endif
++
++Ldiv0:
++
++ str lr, [sp, #-8]!
++ bl __div0
++ mov r0, #0 @ About as wrong as it could be.
++ ldr pc, [sp], #8
++
++