linux-mips
[Top] [All Lists]

another gcc optimization bug?

To: "linux-mips@oss.sgi.com" <linux-mips@oss.sgi.com>
Subject: another gcc optimization bug?
From: Zhang Fuxin <fxzhang@ict.ac.cn>
Date: Tue, 5 Feb 2002 22:54:42 +0800
Sender: owner-linux-mips@oss.sgi.com
hi,
    It seems gcc with optimization > -O2 will produce incorrect code for fp 
code:
I find this example when tracking down the libm-test failures.
  ( expf(NaN) == NaN will report an extra "invalid operation" exception )
The faulting code snippet is:
        float __my_expf(float x)                /* wrapper expf */
{
        float z;
        unsigned int hx;
        z = __ieee754_expf(x);
        if(_LIB_VERSION == _IEEE_) return z;
        if(__finitef(x)) {
            if(x>o_threshold)
                return 1.0;
            else if(x<u_threshold)
                return 2.0;
            printf("haha\n");
        } 
        return z;
}

with -O2(or higher),some statements inside "if (__finitef(x))" are put before
testing the return value of __finitef(x),one of which is a c.lt.s that cause
the extra "invalid operation" exception being raised.

the obj codes is listed below:
1.  without -O
0000000000401290 <__my_expf>:
  401290:       3c1c0fc0        lui     gp,0xfc0
  401294:       279c6dd0        addiu   gp,gp,28112
  401298:       0399e021        addu    gp,gp,t9
  40129c:       27bdffd0        addiu   sp,sp,-48
  4012a0:       afbc0010        sw      gp,16(sp)
  4012a4:       afbf0028        sw      ra,40(sp)
  4012a8:       afbe0024        sw      s8,36(sp)
  4012ac:       afbc0020        sw      gp,32(sp)
  4012b0:       03a0f021        move    s8,sp
  4012b4:       e7cc0030        swc1    $f12,48(s8)
  4012b8:       c7cc0030        lwc1    $f12,48(s8)
  4012bc:       8f998088        lw      t9,-32632(gp)
  4012c0:       00000000        nop
  4012c4:       0320f809        jalr    t9
  4012c8:       00000000        nop
  4012cc:       8fdc0010        lw      gp,16(s8)
  4012d0:       e7c00018        swc1    $f0,24(s8)
  4012d4:       8f8380ac        lw      v1,-32596(gp)
  4012d8:       00000000        nop
  4012dc:       8c630000        lw      v1,0(v1)
  4012e0:       2402ffff        li      v0,-1
  4012e4:       14620004        bne     v1,v0,4012f8 <__my_expf+0x68>
  4012e8:       00000000        nop
  4012ec:       c7c00018        lwc1    $f0,24(s8)
  4012f0:       10000032        b       4013bc <__my_expf+0x12c>
  4012f4:       00000000        nop
  4012f8:       c7cc0030        lwc1    $f12,48(s8)
  4012fc:       8f998090        lw      t9,-32624(gp)
  401300:       00000000        nop
  401304:       0320f809        jalr    t9
  401308:       00000000        nop
  40130c:       8fdc0010        lw      gp,16(s8)
  401310:       10400029        beqz    v0,4013b8 <__my_expf+0x128>
  401314:       00000000        nop
  401318:       c7c20030        lwc1    $f2,48(s8)
  40131c:       8f818018        lw      at,-32744(gp)
  401320:       00000000        nop
  401324:       242142a0        addiu   at,at,17056
  401328:       c4200000        lwc1    $f0,0(at)
  40132c:       00000000        nop
  401330:       4602003c        c.lt.s  $f0,$f2
  401334:       00000000        nop
  401338:       45010003        bc1t    401348 <__my_expf+0xb8>
  40133c:       00000000        nop
  401340:       10000005        b       401358 <__my_expf+0xc8>
  401344:       00000000        nop
  401348:       3c013f80        lui     at,0x3f80
  40134c:       44810000        mtc1    at,$f0
  401350:       1000001a        b       4013bc <__my_expf+0x12c>
  401354:       00000000        nop
  401358:       c7c20030        lwc1    $f2,48(s8)
  40135c:       8f818018        lw      at,-32744(gp)
  401360:       00000000        nop
  401364:       242142a4        addiu   at,at,17060
  401368:       c4200000        lwc1    $f0,0(at)
  40136c:       00000000        nop
  401370:       4600103c        c.lt.s  $f2,$f0
  401374:       00000000        nop
  401378:       45010003        bc1t    401388 <__my_expf+0xf8>
  40137c:       00000000        nop
  401380:       10000005        b       401398 <__my_expf+0x108>
  401384:       00000000        nop
  401388:       3c014000        lui     at,0x4000
  40138c:       44810000        mtc1    at,$f0
  401390:       1000000a        b       4013bc <__my_expf+0x12c>
  401394:       00000000        nop
  401398:       8f848018        lw      a0,-32744(gp)
  40139c:       00000000        nop
  4013a0:       248442a8        addiu   a0,a0,17064
  4013a4:       8f998058        lw      t9,-32680(gp)
  4013a8:       00000000        nop
  4013ac:       0320f809        jalr    t9
  4013b0:       00000000        nop
  4013b4:       8fdc0010        lw      gp,16(s8)
  4013b8:       c7c00018        lwc1    $f0,24(s8)
  4013bc:       03c0e821        move    sp,s8
  4013c0:       8fbf0028        lw      ra,40(sp)
  4013c4:       8fbe0024        lw      s8,36(sp)
  4013c8:       03e00008        jr      ra
  4013cc:       27bd0030        addiu   sp,sp,48

2. with -O
 000000000400fc0 <__my_expf>:
  400fc0:       3c1c0fc0        lui     gp,0xfc0
  400fc4:       279c70a0        addiu   gp,gp,28832
  400fc8:       0399e021        addu    gp,gp,t9
  400fcc:       27bdffd0        addiu   sp,sp,-48
  400fd0:       afbc0010        sw      gp,16(sp)
  400fd4:       e7b60028        swc1    $f22,40(sp)
  400fd8:       e7b7002c        swc1    $f23,44(sp)
  400fdc:       e7b40020        swc1    $f20,32(sp)
  400fe0:       e7b50024        swc1    $f21,36(sp)
  400fe4:       afbf001c        sw      ra,28(sp)
  400fe8:       46006506        mov.s   $f20,$f12
  400fec:       afbc0018        sw      gp,24(sp)
  400ff0:       8f998088        lw      t9,-32632(gp)
  400ff4:       00000000        nop
  400ff8:       0320f809        jalr    t9
  400ffc:       00000000        nop
  401000:       8fbc0010        lw      gp,16(sp)
  401004:       00000000        nop
  401008:       8f8380ac        lw      v1,-32596(gp)
  40100c:       00000000        nop
  401010:       8c630000        lw      v1,0(v1)
  401014:       2402ffff        li      v0,-1
  401018:       46000586        mov.s   $f22,$f0
  40101c:       1062000a        beq     v1,v0,401048 <__my_expf+0x88>
  401020:       4600a306        mov.s   $f12,$f20
  401024:       8f998090        lw      t9,-32624(gp)
  401028:       00000000        nop
  40102c:       0320f809        jalr    t9
  401030:       00000000        nop
  401034:       8fbc0010        lw      gp,16(sp)
  401038:       44800000        mtc1    zero,$f0
  40103c:       14400002        bnez    v0,401048 <__my_expf+0x88>
  401040:       00000000        nop
  401044:       4600b006        mov.s   $f0,$f22
  401048:       8fbf001c        lw      ra,28(sp)
  40104c:       c7b60028        lwc1    $f22,40(sp)
  401050:       c7b7002c        lwc1    $f23,44(sp)
  401054:       c7b40020        lwc1    $f20,32(sp)
  401058:       c7b50024        lwc1    $f21,36(sp)
  40105c:       03e00008        jr      ra
  401060:       27bd0030        addiu   sp,sp,48

3. with -O2
   0000000000400fc0 <__my_expf>:
  400fc0:       3c1c0fc0        lui     gp,0xfc0
  400fc4:       279c70a0        addiu   gp,gp,28832
  400fc8:       0399e021        addu    gp,gp,t9
  400fcc:       27bdffd0        addiu   sp,sp,-48
  400fd0:       afbc0010        sw      gp,16(sp)
  400fd4:       e7b60028        swc1    $f22,40(sp)
  400fd8:       e7b7002c        swc1    $f23,44(sp)
  400fdc:       e7b40020        swc1    $f20,32(sp)
  400fe0:       e7b50024        swc1    $f21,36(sp)
  400fe4:       afbf001c        sw      ra,28(sp)
  400fe8:       46006506        mov.s   $f20,$f12
  400fec:       afbc0018        sw      gp,24(sp)
  400ff0:       8f998088        lw      t9,-32632(gp)
  400ff4:       00000000        nop
  400ff8:       0320f809        jalr    t9
  400ffc:       00000000        nop
  401000:       8fbc0010        lw      gp,16(sp)
  401004:       00000000        nop
  401008:       8f8380ac        lw      v1,-32596(gp)
  40100c:       00000000        nop
  401010:       8c630000        lw      v1,0(v1)
  401014:       2402ffff        li      v0,-1
  401018:       46000586        mov.s   $f22,$f0
  40101c:       10620021        beq     v1,v0,4010a4 <__my_expf+0xe4>
  401020:       4600a306        mov.s   $f12,$f20
  401024:       8f998090        lw      t9,-32624(gp)
  401028:       00000000        nop
  40102c:       0320f809        jalr    t9
  401030:       00000000        nop
  401034:       8fbc0010        lw      gp,16(sp)
  401038:       3c0142b1        lui     at,0x42b1
  40103c:       34217180        ori     at,at,0x7180
  401040:       44811000        mtc1    at,$f2
  401044:       3c013f80        lui     at,0x3f80
  401048:       44810000        mtc1    at,$f0
  40104c:       4614103c        c.lt.s  $f2,$f20  // this is wrongly put ahead ?
  401050:       10400013        beqz    v0,4010a0 <__my_expf+0xe0>
  401054:       00000000        nop
  401058:       45010012        bc1t    4010a4 <__my_expf+0xe4>
  40105c:       00000000        nop
  401060:       3c01c2cf        lui     at,0xc2cf
  401064:       3421f1b5        ori     at,at,0xf1b5
  401068:       44810000        mtc1    at,$f0
  40106c:       8f848018        lw      a0,-32744(gp)
  401070:       00000000        nop
  401074:       24843fa8        addiu   a0,a0,16296
  401078:       4600a03c        c.lt.s  $f20,$f0
  40107c:       3c014000        lui     at,0x4000
  401080:       44810000        mtc1    at,$f0
  401084:       45010007        bc1t    4010a4 <__my_expf+0xe4>
  401088:       00000000        nop
  40108c:       8f998058        lw      t9,-32680(gp)
  401090:       00000000        nop
  401094:       0320f809        jalr    t9
  401098:       00000000        nop
  40109c:       8fbc0010        lw      gp,16(sp)
  4010a0:       4600b006        mov.s   $f0,$f22
  4010a4:       8fbf001c        lw      ra,28(sp)
  4010a8:       c7b60028        lwc1    $f22,40(sp)
  4010ac:       c7b7002c        lwc1    $f23,44(sp)
  4010b0:       c7b40020        lwc1    $f20,32(sp)
  4010b4:       c7b50024        lwc1    $f21,36(sp)
  4010b8:       03e00008        jr      ra
  4010bc:       27bd0030        addiu   sp,sp,48

 
 
  
Regards
            Zhang Fuxin
            fxzhang@ict.ac.cn


<Prev in Thread] Current Thread [Next in Thread>