On Tue, 3 Aug 2004, Nigel Stephens wrote:
> Note that there is one slightly controversial aspect of these sequences,
> which is that they don't truncate the shift count, so a shift outside of
> the range 0 to 63 will generate an "unusual" result. This didn't cause
> any regression failures, and I believe that this is strictly speaking
> acceptable for C, since a shift is undefined outside of this range  but
> it could cause some "buggy" code to break. It wouldn't be hard to add an
> extra mask with 0x3f if people were nervous about this  it's just that
> I didn't have enough spare temp registers within the constraints of the
> existing DImode patterns.
Well, masking is trivial with no additional temporary :) and for ashrdi3
we can "cheat" and use $at to require only a single additional instruction
compared to the others.
Here are my proposals I've referred to previously. Instruction counts
are 9, 9 and 10, respectively, as I've missed an additional instruction
required to handle shifts by 0 (or actually any multiples of 64). The
semantics they implement corresponds to one of the dsllv, dsrlv and dsrav,
respectively. I've expressed them in terms of functions rather than RTL
patterns, but a conversion is trivial. This form was simply easier to
validate for me and they can be used as libgcc function replacements for
Linux for MIPS IV and higher ISAs.
long long __ashldi3(long long v, int c)
{
long long r;
long r0;
asm(
"sllv %L0, %L2, %3\n\t"
"sllv %M0, %M2, %3\n\t"
"not %1, %3\n\t"
"srlv %1, %L2, %1\n\t"
"srl %1, %1, 1\n\t"
"or %M0, %M0, %1\n\t"
"andi %1, %3, 0x20\n\t"
"movn %M0, %L0, %1\n\t"
"movn %L0, $0, %1"
: "=&r" (r), "=&r" (r0)
: "r" (v), "r" (c));
return r;
}
unsigned long long __lshrdi3(unsigned long long v, int c)
{
unsigned long long r;
long r0;
asm(
"srlv %M0, %M2, %3\n\t"
"srlv %L0, %L2, %3\n\t"
"not %1, %3\n\t"
"sllv %1, %M2, %1\n\t"
"sll %1, %1, 1\n\t"
"or %L0, %L0, %1\n\t"
"andi %1, %3, 0x20\n\t"
"movn %L0, %M0, %1\n\t"
"movn %M0, $0, %1"
: "=&r" (r), "=&r" (r0)
: "r" (v), "r" (c));
return r;
}
long long __ashrdi3(long long v, int c)
{
long long r;
long r0;
asm(
"not %1, %3\n\t"
"srav %M0, %M2, %3\n\t"
"srlv %L0, %L2, %3\n\t"
"sllv %1, %M2, %1\n\t"
"sll %1, %1, 1\n\t"
"or %L0, %L0, %1\n\t"
"andi %1, %3, 0x20\n\t"
".set push\n\t"
".set noat\n\t"
"sra $1, %M2, 31\n\t"
"movn %L0, %M0, %1\n\t"
"movn %M0, $1, %1\n\t"
".set pop"
: "=&r" (r), "=&r" (r0)
: "r" (v), "r" (c));
return r;
}
I don't know if the middleend is capable to express these operations,
but they are pure ALU, so I'd expect it to.
Maciej
