On Mon, 9 Apr 2012, Steven J. Hill wrote:
> diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S
> index 606c8a9..a0df003 100644
> --- a/arch/mips/lib/memset.S
> +++ b/arch/mips/lib/memset.S
> @@ -26,23 +35,36 @@
> .previous
>
> .macro f_fill64 dst, offset, val, fixup
> - EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup)
> - EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup)
> +#ifdef CONFIG_CPU_MICROMIPS
> + EX(swp, t8, (\offset + 0 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 1 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 2 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 3 * STORSIZE)(\dst), \fixup)
> +#if LONGSIZE == 4
> + EX(swp, t8, (\offset + 4 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 5 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 6 * STORSIZE)(\dst), \fixup)
> + EX(swp, t8, (\offset + 7 * STORSIZE)(\dst), \fixup)
> +#endif
This looks bogus to me, either define LONG_SP to SWP or SDP (in
<asm/asm.h>) based on _MIPS_SZLONG or #error on LONGSIZE != 4. Also it
would make sense to make this macro respect val in the microMIPS mode too,
then an appropriate #define to a1/t8 elsewhere in this file will do.
Otherwise it's pure obfuscation.
> diff --git a/arch/mips/lib/strlen_user.S b/arch/mips/lib/strlen_user.S
> index fdbb970..60fa23b 100644
> --- a/arch/mips/lib/strlen_user.S
> +++ b/arch/mips/lib/strlen_user.S
> @@ -28,9 +29,13 @@ LEAF(__strlen_user_asm)
>
> FEXPORT(__strlen_user_nocheck_asm)
> move v0, a0
> -1: EX(lb, t0, (v0), .Lfault)
> +#ifdef CONFIG_CPU_MICROMIPS
> +1: EX(lbu16, v1, (v0), .Lfault)
> +#else
> +1: EX(lb, v1, (v0), .Lfault)
> +#endif
> PTR_ADDIU v0, 1
> - bnez t0, 1b
> + bnez v1, 1b
> PTR_SUBU v0, a0
> jr ra
> END(__strlen_user_asm)
Perhaps just LBU in all cases? You'll avoid the ugly #ifdef.
> diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S
> index 7201b2f..bcbb9a0 100644
> --- a/arch/mips/lib/strncpy_user.S
> +++ b/arch/mips/lib/strncpy_user.S
> @@ -30,30 +31,32 @@
> LEAF(__strncpy_from_user_asm)
> LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
> and v0, a1
> +#ifdef CONFIG_CPU_MICROMIPS
> + bnezc v0, .Lfault
> +#else
> bnez v0, .Lfault
> +#endif
You don't need this, .Lfault is surely within 126 bytes from here so GAS
will relax the branch to its 16-bit encoding which will give an equivalent
result. Relaxing to compact branches is not supported by GAS at the
moment.
> FEXPORT(__strncpy_from_user_nocheck_asm)
> - move v0, zero
> - move v1, a1
> .set noreorder
> -1: EX(lbu, t0, (v1), .Lfault)
> + move t0, zero
> + move v1, a1
> +1: EX(lbu, v0, (v1), .Lfault)
> PTR_ADDIU v1, 1
> R10KCBARRIER(0(ra))
> - beqz t0, 2f
> - sb t0, (a0)
> - PTR_ADDIU v0, 1
> - .set reorder
> - PTR_ADDIU a0, 1
> - bne v0, a2, 1b
> -2: PTR_ADDU t0, a1, v0
> - xor t0, a1
> - bltz t0, .Lfault
> + beqz v0, 2f
> + sb v0, (a0)
> + PTR_ADDIU t0, 1
> + bne t0, a2, 1b
> + PTR_ADDIU a0, 1
> +2: PTR_ADDU v0, a1, t0
> + xor v0, a1
> + bltz v0, .Lfault
> + nop
> jr ra # return n
> + move v0, t0
> END(__strncpy_from_user_asm)
You don't need all of this, GAS will schedule these delay slots just fine
in the microMIPS mode.
> -.Lfault: li v0, -EFAULT
> +.Lfault:
> jr ra
> -
> - .section __ex_table,"a"
> - PTR 1b, .Lfault
> - .previous
> + li v0, -EFAULT
It looks like an independent bug fix -- submit separately?
> diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S
> index 6445716..9090ced 100644
> --- a/arch/mips/lib/strnlen_user.S
> +++ b/arch/mips/lib/strnlen_user.S
> @@ -26,21 +27,34 @@
> * the maximum is a tad hairier ...
> */
> LEAF(__strnlen_user_asm)
> + .set noreorder
> LONG_L v0, TI_ADDR_LIMIT($28) # pointer ok?
> and v0, a0
> +#ifdef CONFIG_CPU_MICROMIPS
> + bnezc v0, .Lfault
> +#else
> bnez v0, .Lfault
> +#endif
Again you don't need this, see above.
> FEXPORT(__strnlen_user_nocheck_asm)
> - move v0, a0
> PTR_ADDU a1, a0 # stop pointer
> + move v0, a0
> 1: beq v0, a1, 1f # limit reached?
> + nop
> EX(lb, t0, (v0), .Lfault)
> - PTR_ADDU v0, 1
> +#ifdef CONFIG_CPU_MICROMIPS
> + addius5 v0, 1
> + bnezc t0, 1b
> +1: jr ra
> + PTR_SUBU v0, a0
> +#else
> bnez t0, 1b
> -1: PTR_SUBU v0, a0
> - jr ra
> + PTR_ADDU v0, 1
> +1: jr ra
> + PTR_SUBU v0, a0
> +#endif
> END(__strnlen_user_asm)
>
> .Lfault:
> - move v0, zero
> jr ra
> + move v0, zero
You don't need all of this if you discard .set noreorder and let GAS
schedule the instructions and pick their optimal encodings itself.
> diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
> index cc0b626..be71d38 100644
> --- a/arch/mips/mm/page.c
> +++ b/arch/mips/mm/page.c
> @@ -368,6 +360,12 @@ void __cpuinit build_clear_page(void)
> for (i = 0; i < (buf - clear_page_array); i++)
> pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
> pr_debug("\t.set pop\n");
> +#ifdef CONFIG_CPU_MICROMIPS
> + memcpy(((u8 *)clear_page) - 1, clear_page_array,
> + ARRAY_SIZE(clear_page_array) * 4);
> +#else
> + memcpy(clear_page, clear_page_array, ARRAY_SIZE(clear_page_array) * 4);
> +#endif
> }
>
> static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
> @@ -607,6 +605,12 @@ void __cpuinit build_copy_page(void)
> for (i = 0; i < (buf - copy_page_array); i++)
> pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
> pr_debug("\t.set pop\n");
> +#ifdef CONFIG_CPU_MICROMIPS
> + memcpy(((u8 *)copy_page) - 1, copy_page_array,
> + ARRAY_SIZE(copy_page_array) * 4);
> +#else
> + memcpy(copy_page, copy_page_array, ARRAY_SIZE(copy_page_array) * 4);
> +#endif
> }
>
> #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
I suggest that instead of the hacks above you arrange for an alias to
copy_page with the ISA bit clear. You won't need these changes then (and
will get slightly better machine code).
Maciej
|