linux-mips
[Top] [All Lists]

[PATCH 1/3] Fix csum_partial_copy_from_user (take 2)

To: linux-mips@linux-mips.org
Subject: [PATCH 1/3] Fix csum_partial_copy_from_user (take 2)
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Wed, 27 Dec 2006 00:54:07 +0900 (JST)
Cc: ralf@linux-mips.org
Original-recipient: rfc822;linux-mips@linux-mips.org
Sender: linux-mips-bounce@linux-mips.org
I found that asm version of csum_partial_copy_from_user() introduced
in e9e016815f264227b6260f77ca84f1c43cf8b9bd was less effective.

For csum_partial_copy_from_user() case, "both_aligned" 8-word copy/sum
loop block is skipped to handle LOAD failure properly, and 4-word
copy/sum block is not loop, thus we will loop at ineffective
"less_than_4units" block.

This patch re-arrange register usages so that t0-t7 can be used in
"both_aligned" loop.  This makes "both_aligned" loop can be used for
copy_from_user case too.  This patch also cleanup codes around entry
point.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
---
 arch/mips/lib/csum_partial.S |   74 ++++++++++++++++---------------------------
 include/asm-mips/checksum.h  |   12 +++---
 2 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index ec0744d..26ac0f8 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -291,8 +291,8 @@ #define dst a1
 #define len a2
 #define psum a3
 #define sum v0
-#define odd t5
-#define errptr t6
+#define odd t8
+#define errptr t9
 
 /*
  * The exception handler for loads requires that:
@@ -376,30 +376,20 @@ #define ADDRMASK (NBYTES-1)
 
        .set    noat
 
-LEAF(csum_partial_copy_nocheck)
-       move    AT, zero
-       b       __csum_partial_copy
-        move   errptr, zero
-FEXPORT(__csum_partial_copy_from_user)
-       b       __csum_partial_copy_user
-        PTR_ADDU       AT, src, len    /* See (1) above. */
-FEXPORT(__csum_and_copy_to_user)
-       move    AT, zero
-__csum_partial_copy_user:
+LEAF(__csum_partial_copy_user)
+       PTR_ADDU        AT, src, len    /* See (1) above. */
 #ifdef CONFIG_64BIT
        move    errptr, a4
 #else
        lw      errptr, 16(sp)
 #endif
-__csum_partial_copy:
+FEXPORT(csum_partial_copy_nocheck)
        move    sum, zero
        move    odd, zero
        /*
         * Note: dst & src may be unaligned, len may be 0
         * Temps
         */
-#define rem t8
-
        /*
         * The "issue break"s below are very approximate.
         * Issue delays for dcache fills will perturb the schedule, as will
@@ -422,51 +412,45 @@ #define rem t8
 both_aligned:
         SRL    t0, len, LOG_NBYTES+3    # +3 for 8 units/iter
        beqz    t0, cleanup_both_aligned # len < 8*NBYTES
-        and    rem, len, (8*NBYTES-1)   # rem = len % (8*NBYTES)
-       /*
-        * We can not do this loop if LOAD might fail, otherwize
-        * l_exc_copy can not calclate sum correctly.
-        * AT==0 means LOAD should not fail.
-        */
-       bnez    AT, cleanup_both_aligned
         nop
+       SUB     len, 8*NBYTES           # subtract here for bgez loop
        .align  4
 1:
-       LOAD    t0, UNIT(0)(src)
-       LOAD    t1, UNIT(1)(src)
-       LOAD    t2, UNIT(2)(src)
-       LOAD    t3, UNIT(3)(src)
+EXC(   LOAD    t0, UNIT(0)(src),       l_exc)
+EXC(   LOAD    t1, UNIT(1)(src),       l_exc_copy)
+EXC(   LOAD    t2, UNIT(2)(src),       l_exc_copy)
+EXC(   LOAD    t3, UNIT(3)(src),       l_exc_copy)
+EXC(   LOAD    t4, UNIT(4)(src),       l_exc_copy)
+EXC(   LOAD    t5, UNIT(5)(src),       l_exc_copy)
+EXC(   LOAD    t6, UNIT(6)(src),       l_exc_copy)
+EXC(   LOAD    t7, UNIT(7)(src),       l_exc_copy)
        SUB     len, len, 8*NBYTES
-       LOAD    t4, UNIT(4)(src)
-       LOAD    t7, UNIT(5)(src)
+       ADD     src, src, 8*NBYTES
 EXC(   STORE   t0, UNIT(0)(dst),       s_exc)
        ADDC(sum, t0)
 EXC(   STORE   t1, UNIT(1)(dst),       s_exc)
        ADDC(sum, t1)
-       LOAD    t0, UNIT(6)(src)
-       LOAD    t1, UNIT(7)(src)
-       ADD     src, src, 8*NBYTES
-       ADD     dst, dst, 8*NBYTES
-EXC(   STORE   t2, UNIT(-6)(dst),      s_exc)
+EXC(   STORE   t2, UNIT(2)(dst),       s_exc)
        ADDC(sum, t2)
-EXC(   STORE   t3, UNIT(-5)(dst),      s_exc)
+EXC(   STORE   t3, UNIT(3)(dst),       s_exc)
        ADDC(sum, t3)
-EXC(   STORE   t4, UNIT(-4)(dst),      s_exc)
+EXC(   STORE   t4, UNIT(4)(dst),       s_exc)
        ADDC(sum, t4)
-EXC(   STORE   t7, UNIT(-3)(dst),      s_exc)
+EXC(   STORE   t5, UNIT(5)(dst),       s_exc)
+       ADDC(sum, t5)
+EXC(   STORE   t6, UNIT(6)(dst),       s_exc)
+       ADDC(sum, t6)
+EXC(   STORE   t7, UNIT(7)(dst),       s_exc)
        ADDC(sum, t7)
-EXC(   STORE   t0, UNIT(-2)(dst),      s_exc)
-       ADDC(sum, t0)
-EXC(   STORE   t1, UNIT(-1)(dst),      s_exc)
-       .set reorder
-       ADDC(sum, t1)
-       bne     len, rem, 1b
-       .set noreorder
+       bgez    len, 1b
+        ADD    dst, dst, 8*NBYTES
+       ADD     len, 8*NBYTES           # revert len (see above)
 
        /*
-        * len == rem == the number of bytes left to copy < 8*NBYTES
+        * len == the number of bytes left to copy < 8*NBYTES
         */
 cleanup_both_aligned:
+#define rem t7
        beqz    len, done
         sltu   t0, len, 4*NBYTES
        bnez    t0, less_than_4units
@@ -729,4 +713,4 @@ s_exc:
        li      v1, -EFAULT
        jr      ra
         sw     v1, (errptr)
-       END(csum_partial_copy_nocheck)
+       END(__csum_partial_copy_user)
diff --git a/include/asm-mips/checksum.h b/include/asm-mips/checksum.h
index 6596fe6..84b0ace 100644
--- a/include/asm-mips/checksum.h
+++ b/include/asm-mips/checksum.h
@@ -29,10 +29,8 @@ #include <asm/uaccess.h>
  */
 __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-__wsum __csum_partial_copy_from_user(const void __user *src, void *dst,
-                                    int len, __wsum sum, int *err_ptr);
-__wsum __csum_and_copy_to_user(const void *src, void __user *dst,
-                              int len, __wsum sum, int *err_ptr);
+__wsum __csum_partial_copy_user(const void __user *src, void __user *dst,
+                               int len, __wsum sum, int *err_ptr);
 
 /*
  * this is a new version of the above that records errors it finds in *errp,
@@ -43,7 +41,8 @@ __wsum csum_partial_copy_from_user(const
                                   __wsum sum, int *err_ptr)
 {
        might_sleep();
-       return __csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
+       return __csum_partial_copy_user(src, (void __user *)dst,
+                                       len, sum, err_ptr);
 }
 
 /*
@@ -56,7 +55,8 @@ __wsum csum_and_copy_to_user(const void 
 {
        might_sleep();
        if (access_ok(VERIFY_WRITE, dst, len))
-               return __csum_and_copy_to_user(src, dst, len, sum, err_ptr);
+               return __csum_partial_copy_user((const void __user *)src, dst,
+                                               len, sum, err_ptr);
        if (len)
                *err_ptr = -EFAULT;
 

<Prev in Thread] Current Thread [Next in Thread>