linux-mips
[Top] [All Lists]

[PATCH] MIPS: Loongson: Introduce and use loongson_llsc_mb()

To: Ralf Baechle <ralf@linux-mips.org>, ambrosehua@gmail.com
Subject: [PATCH] MIPS: Loongson: Introduce and use loongson_llsc_mb()
From: Huang Pei <huangpei@loongson.cn>
Date: Tue, 15 Jan 2019 16:04:54 +0800
Cc: Paul Burton <paul.burton@mips.com>, "Steven J . Hill" <Steven.Hill@cavium.com>, linux-mips@linux-mips.org, Fuxin Zhang <zhangfx@lemote.com>, Zhangjin Wu <wuzhangjin@gmail.com>, Li Xuefeng <lixuefeng@loongson.cn>, Xu Chenghua <xuchenghua@loongson.cn>, Huacai Chen <chenhc@lemote.com>
List-archive: <http://www.linux-mips.org/archives/linux-mips/>
List-help: <mailto:ecartis@linux-mips.org?Subject=help>
List-id: linux-mips <linux-mips.eddie.linux-mips.org>
List-owner: <mailto:ralf@linux-mips.org>
List-post: <mailto:linux-mips@linux-mips.org>
List-software: Ecartis version 1.0.0
List-subscribe: <mailto:ecartis@linux-mips.org?subject=subscribe%20linux-mips>
List-unsubscribe: <mailto:ecartis@linux-mips.org?subject=unsubscribe%20linux-mips>
Original-recipient: rfc822;linux-mips@linux-mips.org
Sender: linux-mips-bounce@linux-mips.org
From: Huacai Chen <chenhc@lemote.com>

On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
lld/scd is very weak ordering. We should add sync instructions "before
each ll/lld" and "at the branch-target between ll/sc" to workaround.
Otherwise, this flaw will cause deadlock occasionally (e.g. when doing
heavy load test with LTP).

Below is the explaination of CPU designer:

"For Loongson 3 family, when a memory access instruction (load, store,
or prefetch)'s executing occurs between the execution of LL and SC, the
success or failure of SC is not predictable. Although programmer would
not insert memory access instructions between LL and SC, the memory
instructions before LL in program-order, may dynamically executed
between the execution of LL/SC, so a memory fence (SYNC) is needed
before LL/LLD to avoid this situation.

Since Loongson-3A R2 (3A2000), we have improved our hardware design to
handle this case. But we later deduce a rarely circumstance that some
speculatively executed memory instructions due to branch misprediction
between LL/SC still fall into the above case, so a memory fence (SYNC)
at branch-target (if its target is not between LL/SC) is needed for
Loongson 3A1000, 3B1500, 3A2000 and 3A3000.

Our processor is continually evolving and we aim to to remove all these
workaround-SYNCs around LL/SC for new-come processor."

Here is an example:

Both cpu1 and cpu2 simutaneously run atomic_add by 1 on same atomic var,
this bug cause both 'sc' run by two cpus (in atomic_add) succeed at same
time('sc' return 1), and the variable is only *added by 1*, sometimes,
which is wrong and unacceptable(it should be added by 2).

Why disable fix-loongson3-llsc in compiler?
Because compiler fix will cause problems in kernel's __ex_table section.

This patch fix all the cases in kernel, but:

+. the fix at the end of futex_atomic_cmpxchg_inatomic is for branch-target
of 'bne', there other cases which smp_mb__before_llsc() and smp_llsc_mb() fix
the ll and branch-target coincidently such as atomic_sub_if_positive/
cmpxchg/xchg, just like this one.

+. Loongson 3 does support CONFIG_EDAC_ATOMIC_SCRUB, so no need to touch
edac.h

+. local_ops and cmpxchg_local should not be affected by this bug since
only the owner can write.

+. mips_atomic_set for syscall.c is deprecated and rarely used, just let
it go

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Huang Pei <huangpei@loongson.cn>
---
 arch/mips/Kconfig               | 15 +++++++++++++++
 arch/mips/include/asm/atomic.h  |  6 ++++++
 arch/mips/include/asm/barrier.h |  6 ++++++
 arch/mips/include/asm/bitops.h  |  5 +++++
 arch/mips/include/asm/futex.h   |  3 +++
 arch/mips/include/asm/pgtable.h |  2 ++
 arch/mips/loongson64/Platform   |  3 +++
 arch/mips/mm/tlbex.c            | 10 ++++++++++
 8 files changed, 50 insertions(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 7872907..a52dc1e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1403,6 +1403,21 @@ config LOONGSON3_ENHANCEMENT
          please say 'N' here. If you want a high-performance kernel to run on
          new Loongson 3 machines only, please say 'Y' here.
 
+config CPU_LOONGSON3_WORKAROUNDS
+       bool "Old Loongson 3 LLSC Workarounds"
+       default y if SMP
+       depends on CPU_LOONGSON3
+       help
+         Loongson 3 processors have the llsc issues which require workarounds.
+         Without workarounds the system may hang unexpectedly.
+
+         Newer Loongson 3 will fix these issues and no workarounds are needed.
+         The workarounds have no significant side effect on them but may
+         decrease the performance of the system so this option should be
+         disabled unless the kernel is intended to be run on old systems.
+
+         If unsure, please say Y.
+
 config CPU_LOONGSON2E
        bool "Loongson 2E"
        depends on SYS_HAS_CPU_LOONGSON2E
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 43fcd35..9409629 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)       
                      \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -85,6 +86,7 @@ static __inline__ int atomic_##op##_return_relaxed(int i, 
atomic_t * v)             \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -118,6 +120,7 @@ static __inline__ int atomic_fetch_##op##_relaxed(int i, 
atomic_t * v)            \
        if (kernel_uses_llsc) {                                               \
                int temp;                                                     \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -256,6 +259,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * 
v)                      \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -283,6 +287,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long 
i, atomic64_t * v) \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
@@ -316,6 +321,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long 
i, atomic64_t * v)  \
        if (kernel_uses_llsc) {                                               \
                long temp;                                                    \
                                                                              \
+               loongson_llsc_mb();                                           \
                __asm__ __volatile__(                                         \
                "       .set    push                                    \n"   \
                "       .set    "MIPS_ISA_LEVEL"                        \n"   \
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index a5eb1bb..df3ac31 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -222,6 +222,12 @@
 #define __smp_mb__before_atomic()      __smp_mb__before_llsc()
 #define __smp_mb__after_atomic()       smp_llsc_mb()
 
+#ifdef CONFIG_CPU_LOONGSON3_WORKAROUNDS /* Loongson-3's LLSC workaround */
+#define loongson_llsc_mb()     __asm__ __volatile__(__WEAK_LLSC_MB : : 
:"memory")
+#else
+#define loongson_llsc_mb()     __asm__ __volatile__("            " : : 
:"memory")
+#endif
+
 #include <asm-generic/barrier.h>
 
 #endif /* __ASM_BARRIER_H */
diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
index c467595..830c93a 100644
--- a/arch/mips/include/asm/bitops.h
+++ b/arch/mips/include/asm/bitops.h
@@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile 
unsigned long *addr)
                : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       " __LL "%0, %1          # set_bit       \n"
@@ -79,6 +80,7 @@ static inline void set_bit(unsigned long nr, volatile 
unsigned long *addr)
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -123,6 +125,7 @@ static inline void clear_bit(unsigned long nr, volatile 
unsigned long *addr)
                : "ir" (~(1UL << bit)));
 #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
        } else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       " __LL "%0, %1          # clear_bit     \n"
@@ -133,6 +136,7 @@ static inline void clear_bit(unsigned long nr, volatile 
unsigned long *addr)
                } while (unlikely(!temp));
 #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
        } else if (kernel_uses_llsc) {
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
@@ -193,6 +197,7 @@ static inline void change_bit(unsigned long nr, volatile 
unsigned long *addr)
                unsigned long *m = ((unsigned long *) addr) + (nr >> 
SZLONG_LOG);
                unsigned long temp;
 
+               loongson_llsc_mb();
                do {
                        __asm__ __volatile__(
                        "       .set    push                            \n"
diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
index c14d798..b83b039 100644
--- a/arch/mips/include/asm/futex.h
+++ b/arch/mips/include/asm/futex.h
@@ -50,6 +50,7 @@
                  "i" (-EFAULT)                                         \
                : "memory");                                            \
        } else if (cpu_has_llsc) {                                      \
+               loongson_llsc_mb();                                     \
                __asm__ __volatile__(                                   \
                "       .set    push                            \n"     \
                "       .set    noat                            \n"     \
@@ -163,6 +164,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                  "i" (-EFAULT)
                : "memory");
        } else if (cpu_has_llsc) {
+               loongson_llsc_mb();
                __asm__ __volatile__(
                "# futex_atomic_cmpxchg_inatomic                        \n"
                "       .set    push                                    \n"
@@ -192,6 +194,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
                : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
                  "i" (-EFAULT)
                : "memory");
+               loongson_llsc_mb();
        } else
                return -ENOSYS;
 
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 57933fc..910851c 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
                        : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
                        : [global] "r" (page_global));
                } else if (kernel_uses_llsc) {
+                       loongson_llsc_mb();
                        __asm__ __volatile__ (
                        "       .set    push                            \n"
                        "       .set    "MIPS_ISA_ARCH_LEVEL"           \n"
@@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
                        "       .set    pop                             \n"
                        : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
                        : [global] "r" (page_global));
+                       loongson_llsc_mb();
                }
 #else /* !CONFIG_SMP */
                if (pte_none(*buddy))
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 0fce460..3700dcf 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
 endif
 
 cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),)
+  cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc
+endif
 #
 # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
 # as MIPS64 R2; older versions as just R1.  This leaves the possibility open
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 37b1cb2..65b6e85 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, 
struct uasm_reloc **r,
                 * to mimic that here by taking a load/istream page
                 * fault.
                 */
+               if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+                       uasm_i_sync(p, 0);
                UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
                uasm_i_jr(p, ptr);
 
@@ -1646,6 +1648,8 @@ static void
 iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
 {
 #ifdef CONFIG_SMP
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(p, 0);
 # ifdef CONFIG_PHYS_ADDR_T_64BIT
        if (cpu_has_64bits)
                uasm_i_lld(p, pte, 0, ptr);
@@ -2259,6 +2263,8 @@ static void build_r4000_tlb_load_handler(void)
 #endif
 
        uasm_l_nopage_tlbl(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_0 & 1) {
@@ -2313,6 +2319,8 @@ static void build_r4000_tlb_store_handler(void)
 #endif
 
        uasm_l_nopage_tlbs(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
@@ -2368,6 +2376,8 @@ static void build_r4000_tlb_modify_handler(void)
 #endif
 
        uasm_l_nopage_tlbm(&l, p);
+       if (IS_ENABLED(CONFIG_CPU_LOONGSON3_WORKAROUNDS))
+               uasm_i_sync(&p, 0);
        build_restore_work_registers(&p);
 #ifdef CONFIG_CPU_MICROMIPS
        if ((unsigned long)tlb_do_page_fault_1 & 1) {
-- 
2.7.4



<Prev in Thread] Current Thread [Next in Thread>
  • [PATCH] MIPS: Loongson: Introduce and use loongson_llsc_mb(), Huang Pei <=