| To: | libc-ports@sources.redhat.com |
|---|---|
| Subject: | Re: [PATCH]: R10000 Needs LL/SC Workaround in Glibc |
| From: | Kumba <kumba@gentoo.org> |
| Date: | Sat, 22 Nov 2008 23:16:18 -0500 |
| Cc: | Daniel Jacobowitz <drow@false.org>, Linux MIPS List <linux-mips@linux-mips.org> |
| In-reply-to: | <490C907A.40005@loowit.net> |
| Original-recipient: | rfc822;linux-mips@linux-mips.org |
| References: | <490A912A.8030901@gentoo.org> <490C907A.40005@loowit.net> |
| Sender: | linux-mips-bounce@linux-mips.org |
| User-agent: | Thunderbird 2.0.0.18 (Windows/20081105) |
James Perkins wrote: Here's try #2. The gcc-side is already sent in and accepted. If I'm still missing anything, please let me know!
Joshua Kinard
Gentoo/MIPS
kumba@gentoo.org
2008-11-22 Joshua Kinard <kumba@gentoo.org>
* ports/sysdeps/mips/bits/atomic.h
(R10K_BEQZ_INSN, R10K_NOPS_INSN): Define depending on ISA.
(__arch_compare_and_exchange_xxx_32_int): Replace 'beqz' insn with
R10K_BEQZ_INSN and add R10K_NOPS_INSN.
(__arch_compare_and_exchange_xxx_64_int): Likewise
(__arch_exchange_xxx_32_int): Likewise
(__arch_exchange_xxx_64_int): Likewise
(__arch_exchange_and_add_32_int): Likewise
(__arch_exchange_and_add_64_int): Likewise
Index: ports/sysdeps/mips/bits/atomic.h
===================================================================
RCS file: /cvs/glibc/ports/sysdeps/mips/bits/atomic.h,v
retrieving revision 1.1
diff -u -p -r1.1 atomic.h
--- ports/sysdeps/mips/bits/atomic.h 28 Mar 2005 09:14:59 -0000 1.1
+++ ports/sysdeps/mips/bits/atomic.h 23 Nov 2008 03:22:53 -0000
@@ -49,6 +49,61 @@ typedef uintmax_t uatomic_max_t;
# define MIPS_SYNC sync
#endif
+/* Certain revisions of the R10000 Processor need an LL/SC Workaround
+ enabled. Revisions before 3.0 misbehave on atomic operations, and
+ Revs 2.6 and lower deadlock after several seconds due to other errata.
+
+ To quote the R10K Errata:
+ Workaround: The basic idea is to inhibit the four instructions
+ from simultaneously becoming active in R10000. Padding all
+ ll/sc sequences with nops or changing the looping branch in the
+ routines to a branch likely (which is always predicted taken
+ by R10000) will work. The nops should go after the loop, and the
+ number of them should be 28. This number could be decremented for
+ each additional instruction in the ll/sc loop such as the lock
+ modifier(s) between the ll and sc, the looping branch and its
+ delay slot. For typical short routines with one ll/sc loop, any
+ instructions after the loop could also count as a decrement. The
+ nop workaround pollutes the cache more but would be a few cycles
+ faster if all the code is in the cache and the looping branch
+ is predicted not taken. */
+
+#if (defined(_MIPS_ARCH_MIPS2) || defined(_MIPS_ARCH_MIPS3) || \
+ defined(_MIPS_ARCH_MIPS4))
+#define R10K_BEQZ_INSN "beqzl"
+#define R10K_NOPS_INSN ""
+#else
+#define R10K_BEQZ_INSN "beqz"
+#define R10K_NOPS_INSN "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n" \
+ "\tnop\n"
+#endif
+
#define MIPS_SYNC_STR_2(X) #X
#define MIPS_SYNC_STR_1(X) MIPS_SYNC_STR_2(X)
#define MIPS_SYNC_STR MIPS_SYNC_STR_1(MIPS_SYNC)
@@ -74,7 +129,8 @@ typedef uintmax_t uatomic_max_t;
"bne %0,%2,2f\n\t" \
"move %1,%3\n\t" \
"sc %1,%4\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
@@ -98,7 +154,8 @@ typedef uintmax_t uatomic_max_t;
"bne %0,%2,2f\n\t" \
"move %1,%3\n\t" \
"scd %1,%4\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
@@ -192,7 +249,8 @@ typedef uintmax_t uatomic_max_t;
"ll %0,%3\n\t" \
"move %1,%2\n\t" \
"sc %1,%3\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
@@ -216,7 +274,8 @@ typedef uintmax_t uatomic_max_t;
"lld %0,%3\n\t" \
"move %1,%2\n\t" \
"scd %1,%3\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
@@ -251,7 +310,8 @@ typedef uintmax_t uatomic_max_t;
"ll %0,%3\n\t" \
"addu %1,%0,%2\n\t"
\
"sc %1,%3\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
@@ -275,7 +335,8 @@ typedef uintmax_t uatomic_max_t;
"lld %0,%3\n\t" \
"daddu %1,%0,%2\n\t"
\
"scd %1,%3\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN " %1,1b\n" \
+ R10K_NOPS_INSN \
acq "\n\t"
\
".set pop\n"
\
"2:\n\t"
\
|
| <Prev in Thread] | Current Thread | [Next in Thread> |
|---|---|---|
| ||
| Previous by Date: | Re: old binutils-2.13-msp.diff and binutils 2.19, Thiemo Seufer |
|---|---|
| Next by Date: | Re: [PATCH] MIPS: Make BUG() __noreturn., Ingo Molnar |
| Previous by Thread: | Re: [PATCH]: R10000 Needs LL/SC Workaround in Glibc, Kumba |
| Next by Thread: | Re: [PATCH] Fix include paths in malta-amon.c, Dmitri Vorobiev |
| Indexes: | [Date] [Thread] [Top] [All Lists] |