diff options
Diffstat (limited to 'target/linux/etrax-2.6/patches/cris/006-gcc-4.patch')
-rw-r--r-- | target/linux/etrax-2.6/patches/cris/006-gcc-4.patch | 752 |
1 files changed, 752 insertions, 0 deletions
diff --git a/target/linux/etrax-2.6/patches/cris/006-gcc-4.patch b/target/linux/etrax-2.6/patches/cris/006-gcc-4.patch new file mode 100644 index 0000000..31a4107 --- /dev/null +++ b/target/linux/etrax-2.6/patches/cris/006-gcc-4.patch @@ -0,0 +1,752 @@ +diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c +--- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:46:35.000000000 +0200 ++++ linux-2.6.19.2/arch/cris/arch-v10/lib/memset.c 2007-05-20 01:51:47.000000000 +0200 +@@ -29,224 +29,21 @@ + + #include <linux/types.h> + +-/* No, there's no macro saying 12*4, since it is "hard" to get it into +- the asm in a good way. Thus better to expose the problem everywhere. +- */ + +-/* Assuming 1 cycle per dword written or read (ok, not really true), and +- one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1) +- so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */ +- +-#define ZERO_BLOCK_SIZE (1*12*4) +- +-void *memset(void *pdst, +- int c, +- size_t plen) ++/** ++ * memset - Fill a region of memory with the given value ++ * @s: Pointer to the start of the area. ++ * @c: The byte to fill the area with ++ * @count: The size of the area. ++ * ++ * Do not use memset() to access IO space, use memset_io() instead. ++ */ ++void *memset(void *s, int c, size_t count) + { +- /* Ok. Now we want the parameters put in special registers. +- Make sure the compiler is able to make something useful of this. */ +- +- register char *return_dst __asm__ ("r10") = pdst; +- register int n __asm__ ("r12") = plen; +- register int lc __asm__ ("r11") = c; +- +- /* Most apps use memset sanely. Only those memsetting about 3..4 +- bytes or less get penalized compared to the generic implementation +- - and that's not really sane use. */ +- +- /* Ugh. This is fragile at best. Check with newer GCC releases, if +- they compile cascaded "x |= x << 8" sanely! */ +- __asm__("movu.b %0,$r13\n\t" +- "lslq 8,$r13\n\t" +- "move.b %0,$r13\n\t" +- "move.d $r13,%0\n\t" +- "lslq 16,$r13\n\t" +- "or.d $r13,%0" +- : "=r" (lc) : "0" (lc) : "r13"); +- +- { +- register char *dst __asm__ ("r13") = pdst; +- +- /* This is NONPORTABLE, but since this whole routine is */ +- /* grossly nonportable that doesn't matter. */ +- +- if (((unsigned long) pdst & 3) != 0 +- /* Oops! n=0 must be a legal call, regardless of alignment. */ +- && n >= 3) +- { +- if ((unsigned long)dst & 1) +- { +- *dst = (char) lc; +- n--; +- dst++; +- } +- +- if ((unsigned long)dst & 2) +- { +- *(short *)dst = lc; +- n -= 2; +- dst += 2; +- } +- } +- +- /* Now the fun part. For the threshold value of this, check the equation +- above. */ +- /* Decide which copying method to use. */ +- if (n >= ZERO_BLOCK_SIZE) +- { +- /* For large copies we use 'movem' */ +- +- /* It is not optimal to tell the compiler about clobbering any +- registers; that will move the saving/restoring of those registers +- to the function prologue/epilogue, and make non-movem sizes +- suboptimal. +- +- This method is not foolproof; it assumes that the "asm reg" +- declarations at the beginning of the function really are used +- here (beware: they may be moved to temporary registers). +- This way, we do not have to save/move the registers around into +- temporaries; we can safely use them straight away. +- +- If you want to check that the allocation was right; then +- check the equalities in the first comment. It should say +- "r13=r13, r12=r12, r11=r11" */ +- __asm__ volatile (" +- ;; Check that the following is true (same register names on +- ;; both sides of equal sign, as in r8=r8): +- ;; %0=r13, %1=r12, %4=r11 +- ;; +- ;; Save the registers we'll clobber in the movem process +- ;; on the stack. Don't mention them to gcc, it will only be +- ;; upset. +- subq 11*4,$sp +- movem $r10,[$sp] +- +- move.d $r11,$r0 +- move.d $r11,$r1 +- move.d $r11,$r2 +- move.d $r11,$r3 +- move.d $r11,$r4 +- move.d $r11,$r5 +- move.d $r11,$r6 +- move.d $r11,$r7 +- move.d $r11,$r8 +- move.d $r11,$r9 +- move.d $r11,$r10 +- +- ;; Now we've got this: +- ;; r13 - dst +- ;; r12 - n +- +- ;; Update n for the first loop +- subq 12*4,$r12 +-0: +- subq 12*4,$r12 +- bge 0b +- movem $r11,[$r13+] +- +- addq 12*4,$r12 ;; compensate for last loop underflowing n +- +- ;; Restore registers from stack +- movem [$sp+],$r10" +- +- /* Outputs */ : "=r" (dst), "=r" (n) +- /* Inputs */ : "0" (dst), "1" (n), "r" (lc)); +- +- } +- +- /* Either we directly starts copying, using dword copying +- in a loop, or we copy as much as possible with 'movem' +- and then the last block (<44 bytes) is copied here. +- This will work since 'movem' will have updated src,dst,n. */ +- +- while ( n >= 16 ) +- { +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- n -= 16; +- } ++ char *xs = s; + +- /* A switch() is definitely the fastest although it takes a LOT of code. +- * Particularly if you inline code this. +- */ +- switch (n) +- { +- case 0: +- break; +- case 1: +- *(char*)dst = (char) lc; +- break; +- case 2: +- *(short*)dst = (short) lc; +- break; +- case 3: +- *((short*)dst)++ = (short) lc; +- *(char*)dst = (char) lc; +- break; +- case 4: +- *((long*)dst)++ = lc; +- break; +- case 5: +- *((long*)dst)++ = lc; +- *(char*)dst = (char) lc; +- break; +- case 6: +- *((long*)dst)++ = lc; +- *(short*)dst = (short) lc; +- break; +- case 7: +- *((long*)dst)++ = lc; +- *((short*)dst)++ = (short) lc; +- *(char*)dst = (char) lc; +- break; +- case 8: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- break; +- case 9: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *(char*)dst = (char) lc; +- break; +- case 10: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *(short*)dst = (short) lc; +- break; +- case 11: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((short*)dst)++ = (short) lc; +- *(char*)dst = (char) lc; +- break; +- case 12: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- break; +- case 13: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *(char*)dst = (char) lc; +- break; +- case 14: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *(short*)dst = (short) lc; +- break; +- case 15: +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((long*)dst)++ = lc; +- *((short*)dst)++ = (short) lc; +- *(char*)dst = (char) lc; +- break; +- } +- } ++ while (count--) ++ *xs++ = c; ++ return s; ++} + +- return return_dst; /* destination pointer. */ +-} /* memset() */ +diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c linux-2.6.19.2/arch/cris/arch-v10/lib/string.c +--- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/string.c 2007-05-20 01:46:35.000000000 +0200 ++++ linux-2.6.19.2/arch/cris/arch-v10/lib/string.c 2007-05-20 01:51:19.000000000 +0200 +@@ -33,193 +33,21 @@ + + #include <linux/types.h> + +-void *memcpy(void *pdst, +- const void *psrc, +- size_t pn) ++ /** ++ * memcpy - Copy one area of memory to another ++ * @dest: Where to copy to ++ * @src: Where to copy from ++ * @count: The size of the area. ++ * ++ * You should not use this function to access IO space, use memcpy_toio() ++ * or memcpy_fromio() instead. ++ */ ++void *memcpy(void *dest, const void *src, size_t count) + { +- /* Ok. Now we want the parameters put in special registers. +- Make sure the compiler is able to make something useful of this. +- As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop). ++ char *tmp = dest; ++ const char *s = src; + +- If gcc was allright, it really would need no temporaries, and no +- stack space to save stuff on. */ +- +- register void *return_dst __asm__ ("r10") = pdst; +- register char *dst __asm__ ("r13") = pdst; +- register const char *src __asm__ ("r11") = psrc; +- register int n __asm__ ("r12") = pn; +- +- +- /* When src is aligned but not dst, this makes a few extra needless +- cycles. I believe it would take as many to check that the +- re-alignment was unnecessary. */ +- if (((unsigned long) dst & 3) != 0 +- /* Don't align if we wouldn't copy more than a few bytes; so we +- don't have to check further for overflows. */ +- && n >= 3) +- { +- if ((unsigned long) dst & 1) +- { +- n--; +- *(char*)dst = *(char*)src; +- src++; +- dst++; +- } +- +- if ((unsigned long) dst & 2) +- { +- n -= 2; +- *(short*)dst = *(short*)src; +- src += 2; +- dst += 2; +- } +- } +- +- /* Decide which copying method to use. */ +- if (n >= 44*2) /* Break even between movem and +- move16 is at 38.7*2, but modulo 44. */ +- { +- /* For large copies we use 'movem' */ +- +- /* It is not optimal to tell the compiler about clobbering any +- registers; that will move the saving/restoring of those registers +- to the function prologue/epilogue, and make non-movem sizes +- suboptimal. +- +- This method is not foolproof; it assumes that the "asm reg" +- declarations at the beginning of the function really are used +- here (beware: they may be moved to temporary registers). +- This way, we do not have to save/move the registers around into +- temporaries; we can safely use them straight away. +- +- If you want to check that the allocation was right; then +- check the equalities in the first comment. It should say +- "r13=r13, r11=r11, r12=r12" */ +- __asm__ volatile (" +- ;; Check that the following is true (same register names on +- ;; both sides of equal sign, as in r8=r8): +- ;; %0=r13, %1=r11, %2=r12 +- ;; +- ;; Save the registers we'll use in the movem process +- ;; on the stack. +- subq 11*4,$sp +- movem $r10,[$sp] +- +- ;; Now we've got this: +- ;; r11 - src +- ;; r13 - dst +- ;; r12 - n +- +- ;; Update n for the first loop +- subq 44,$r12 +-0: +- movem [$r11+],$r10 +- subq 44,$r12 +- bge 0b +- movem $r10,[$r13+] +- +- addq 44,$r12 ;; compensate for last loop underflowing n +- +- ;; Restore registers from stack +- movem [$sp+],$r10" +- +- /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n) +- /* Inputs */ : "0" (dst), "1" (src), "2" (n)); +- +- } +- +- /* Either we directly starts copying, using dword copying +- in a loop, or we copy as much as possible with 'movem' +- and then the last block (<44 bytes) is copied here. +- This will work since 'movem' will have updated src,dst,n. */ +- +- while ( n >= 16 ) +- { +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- n -= 16; +- } +- +- /* A switch() is definitely the fastest although it takes a LOT of code. +- * Particularly if you inline code this. +- */ +- switch (n) +- { +- case 0: +- break; +- case 1: +- *(char*)dst = *(char*)src; +- break; +- case 2: +- *(short*)dst = *(short*)src; +- break; +- case 3: +- *((short*)dst)++ = *((short*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 4: +- *((long*)dst)++ = *((long*)src)++; +- break; +- case 5: +- *((long*)dst)++ = *((long*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 6: +- *((long*)dst)++ = *((long*)src)++; +- *(short*)dst = *(short*)src; +- break; +- case 7: +- *((long*)dst)++ = *((long*)src)++; +- *((short*)dst)++ = *((short*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 8: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- break; +- case 9: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 10: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *(short*)dst = *(short*)src; +- break; +- case 11: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((short*)dst)++ = *((short*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 12: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- break; +- case 13: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *(char*)dst = *(char*)src; +- break; +- case 14: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *(short*)dst = *(short*)src; +- break; +- case 15: +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((long*)dst)++ = *((long*)src)++; +- *((short*)dst)++ = *((short*)src)++; +- *(char*)dst = *(char*)src; +- break; +- } +- +- return return_dst; /* destination pointer. */ +-} /* memcpy() */ ++ while (count--) ++ *tmp++ = *s++; ++ return dest; ++} +diff -urN linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c +--- linux-2.6.19.2.orig/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 22:11:26.000000000 +0200 ++++ linux-2.6.19.2/arch/cris/arch-v10/lib/usercopy.c 2007-05-16 23:17:41.000000000 +0200 +@@ -88,63 +88,38 @@ + If you want to check that the allocation was right; then + check the equalities in the first comment. It should say + "r13=r13, r11=r11, r12=r12". */ +- __asm__ volatile ("\ +- .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ +- .err \n\ +- .endif \n\ +- +- ;; Save the registers we'll use in the movem process +- ;; on the stack. +- subq 11*4,$sp +- movem $r10,[$sp] +- +- ;; Now we've got this: +- ;; r11 - src +- ;; r13 - dst +- ;; r12 - n +- +- ;; Update n for the first loop +- subq 44,$r12 +- +-; Since the noted PC of a faulting instruction in a delay-slot of a taken +-; branch, is that of the branch target, we actually point at the from-movem +-; for this case. There is no ambiguity here; if there was a fault in that +-; instruction (meaning a kernel oops), the faulted PC would be the address +-; after *that* movem. +- +-0: +- movem [$r11+],$r10 +- subq 44,$r12 +- bge 0b +- movem $r10,[$r13+] +-1: +- addq 44,$r12 ;; compensate for last loop underflowing n +- +- ;; Restore registers from stack +- movem [$sp+],$r10 +-2: +- .section .fixup,\"ax\" +- +-; To provide a correct count in r10 of bytes that failed to be copied, +-; we jump back into the loop if the loop-branch was taken. There is no +-; performance penalty for sany use; the program will segfault soon enough. +- +-3: +- move.d [$sp],$r10 +- addq 44,$r10 +- move.d $r10,[$sp] +- jump 0b +-4: +- movem [$sp+],$r10 +- addq 44,$r10 +- addq 44,$r12 +- jump 2b +- +- .previous +- .section __ex_table,\"a\" +- .dword 0b,3b +- .dword 1b,4b +- .previous" ++ __asm__ volatile ( ++ ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" ++ ".err \n\t" ++ ".endif \n\t" ++ "subq 11*4,$sp\n\t" ++ "movem $r10,[$sp]\n\t" ++ "subq 44,$r12\n\t" ++ "0:\n\t" ++ "movem [$r11+],$r10\n\t" ++ "subq 44,$r12\n\t" ++ "bge 0b\n\t" ++ "movem $r10,[$r13+]\n\t" ++ "1:\n\t" ++ "addq 44,$r12 \n\t" ++ "movem [$sp+],$r10\n\t" ++ "2:\n\t" ++ ".section .fixup,\"ax\"\n\t" ++ "3:\n\t" ++ "move.d [$sp],$r10\n\t" ++ "addq 44,$r10\n\t" ++ "move.d $r10,[$sp]\n\t" ++ "jump 0b\n\t" ++ "4:\n\t" ++ "movem [$sp+],$r10\n\t" ++ "addq 44,$r10\n\t" ++ "addq 44,$r12\n\t" ++ "jump 2b\n\t" ++ ".previous\n\t" ++ ".section __ex_table,\"a\"\n\t" ++ ".dword 0b,3b\n\t" ++ ".dword 1b,4b\n\t" ++ ".previous\n\t" + + /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) + /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); +@@ -253,60 +228,32 @@ + If you want to check that the allocation was right; then + check the equalities in the first comment. It should say + "r13=r13, r11=r11, r12=r12" */ +- __asm__ volatile (" +- .ifnc %0%1%2%3,$r13$r11$r12$r10 \n\ +- .err \n\ +- .endif \n\ +- +- ;; Save the registers we'll use in the movem process +- ;; on the stack. +- subq 11*4,$sp +- movem $r10,[$sp] +- +- ;; Now we've got this: +- ;; r11 - src +- ;; r13 - dst +- ;; r12 - n +- +- ;; Update n for the first loop +- subq 44,$r12 +-0: +- movem [$r11+],$r10 +-1: +- subq 44,$r12 +- bge 0b +- movem $r10,[$r13+] +- +- addq 44,$r12 ;; compensate for last loop underflowing n +- +- ;; Restore registers from stack +- movem [$sp+],$r10 +-4: +- .section .fixup,\"ax\" +- +-;; Do not jump back into the loop if we fail. For some uses, we get a +-;; page fault somewhere on the line. Without checking for page limits, +-;; we don't know where, but we need to copy accurately and keep an +-;; accurate count; not just clear the whole line. To do that, we fall +-;; down in the code below, proceeding with smaller amounts. It should +-;; be kept in mind that we have to cater to code like what at one time +-;; was in fs/super.c: +-;; i = size - copy_from_user((void *)page, data, size); +-;; which would cause repeated faults while clearing the remainder of +-;; the SIZE bytes at PAGE after the first fault. +-;; A caveat here is that we must not fall through from a failing page +-;; to a valid page. +- +-3: +- movem [$sp+],$r10 +- addq 44,$r12 ;; Get back count before faulting point. +- subq 44,$r11 ;; Get back pointer to faulting movem-line. +- jump 4b ;; Fall through, pretending the fault didn't happen. +- +- .previous +- .section __ex_table,\"a\" +- .dword 1b,3b +- .previous" ++ __asm__ volatile ( ++ ".ifnc %0%1%2%3,$r13$r11$r12$r10 \n\t" ++ ".err \n\t" ++ ".endif \n\t" ++ "subq 11*4,$sp\n\t" ++ "movem $r10,[$sp]\n\t" ++ "subq 44,$r12\n\t" ++ "0:\n\t" ++ "movem [$r11+],$r10\n\t" ++ "1:\n\t" ++ "subq 44,$r12\n\t" ++ "bge 0b\n\t" ++ "movem $r10,[$r13+]\n\t" ++ "addq 44,$r12 \n\t" ++ "movem [$sp+],$r10\n\t" ++ "4:\n\t" ++ ".section .fixup,\"ax\"\n\t" ++ "3:\n\t" ++ "movem [$sp+],$r10\n\t" ++ "addq 44,$r12\n\t" ++ "subq 44,$r11\n\t" ++ "jump 4b \n\t" ++ ".previous\n\t" ++ ".section __ex_table,\"a\"\n\t" ++ ".dword 1b,3b\n\t" ++ ".previous\n\t" + + /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn) + /* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn)); +@@ -425,66 +372,50 @@ + If you want to check that the allocation was right; then + check the equalities in the first comment. It should say + something like "r13=r13, r11=r11, r12=r12". */ +- __asm__ volatile (" +- .ifnc %0%1%2,$r13$r12$r10 \n\ +- .err \n\ +- .endif \n\ +- +- ;; Save the registers we'll clobber in the movem process +- ;; on the stack. Don't mention them to gcc, it will only be +- ;; upset. +- subq 11*4,$sp +- movem $r10,[$sp] +- +- clear.d $r0 +- clear.d $r1 +- clear.d $r2 +- clear.d $r3 +- clear.d $r4 +- clear.d $r5 +- clear.d $r6 +- clear.d $r7 +- clear.d $r8 +- clear.d $r9 +- clear.d $r10 +- clear.d $r11 +- +- ;; Now we've got this: +- ;; r13 - dst +- ;; r12 - n +- +- ;; Update n for the first loop +- subq 12*4,$r12 +-0: +- subq 12*4,$r12 +- bge 0b +- movem $r11,[$r13+] +-1: +- addq 12*4,$r12 ;; compensate for last loop underflowing n +- +- ;; Restore registers from stack +- movem [$sp+],$r10 +-2: +- .section .fixup,\"ax\" +-3: +- move.d [$sp],$r10 +- addq 12*4,$r10 +- move.d $r10,[$sp] +- clear.d $r10 +- jump 0b +- +-4: +- movem [$sp+],$r10 +- addq 12*4,$r10 +- addq 12*4,$r12 +- jump 2b +- +- .previous +- .section __ex_table,\"a\" +- .dword 0b,3b +- .dword 1b,4b +- .previous" +- ++ __asm__ volatile ( ++ ".ifnc %0%1%2,$r13$r12$r10\n\t" ++ ".err \n\t" ++ ".endif\n\t" ++ "subq 11*4,$sp\n\t" ++ "movem $r10,[$sp]\n\t" ++ "clear.d $r0\n\t" ++ "clear.d $r1\n\t" ++ "clear.d $r2\n\t" ++ "clear.d $r3\n\t" ++ "clear.d $r4\n\t" ++ "clear.d $r5\n\t" ++ "clear.d $r6\n\t" ++ "clear.d $r7\n\t" ++ "clear.d $r8\n\t" ++ "clear.d $r9\n\t" ++ "clear.d $r10\n\t" ++ "clear.d $r11\n\t" ++ "subq 12*4,$r12\n\t" ++ "0:\n\t" ++ "subq 12*4,$r12\n\t" ++ "bge 0b\n\t" ++ "movem $r11,[$r13+]\n\t" ++ "1: \n\t" ++ "addq 12*4,$r12 \n\t" ++ "movem [$sp+],$r10\n\t" ++ "2:\n\t" ++ ".section .fixup,\"ax\"\n\t" ++ "3:\n\t" ++ "move.d [$sp],$r10\n\t" ++ "addq 12*4,$r10\n\t" ++ "move.d $r10,[$sp]\n\t" ++ "clear.d $r10\n\t" ++ "jump 0b\n\t" ++ "4:\n\t" ++ "movem [$sp+],$r10\n\t" ++ "addq 12*4,$r10\n\t" ++ "addq 12*4,$r12\n\t" ++ "jump 2b\n\t" ++ ".previous\n\t" ++ ".section __ex_table,\"a\"\n\t" ++ ".dword 0b,3b\n\t" ++ ".dword 1b,4b\n\t" ++ ".previous\n\t" + /* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn) + /* Inputs */ : "0" (dst), "1" (n), "2" (retn) + /* Clobber */ : "r11"); |