I've been thinking about CFLAGS such as -mmmx... Does gcc actually have the
ability to actually produce mmx instructions?
for example:
mmx.c:
#include <stdio.h>
main()
{
short x[100000];
int i;
for (i=0; i < 100000; i++)
{
x[i] += 5;
}
}
gcc -mmmx -O3 --save-temps -funroll-all-loops mmx.c
gives
.file "mmx.c"
.def ___main; .scl 2; .type 32; .endef
.text
.align 2
.p2align 4,,15
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp
movl $200008, %eax
movl %esp, %ebp
call __alloca
xorl %eax, %eax
andl $-16, %esp
call __alloca
call ___main
xorl %eax, %eax
.p2align 4,,7
L6:
addw $5, -200008(%ebp,%eax,2)
addw $5, -200006(%ebp,%eax,2)
addw $5, -200004(%ebp,%eax,2)
addw $5, -200002(%ebp,%eax,2)
addw $5, -200000(%ebp,%eax,2)
addw $5, -199998(%ebp,%eax,2)
addw $5, -199996(%ebp,%eax,2)
addw $5, -199994(%ebp,%eax,2)
addw $5, -199992(%ebp,%eax,2)
addw $5, -199990(%ebp,%eax,2)
addl $10, %eax
cmpl $99999, %eax
jle L6
leave
ret
Shouldn't take too much imagination to see how that could be improved with mmx...
Likewise:
fu(int *x)
{
if (*x == 5)
*x = 10;
}
with gcc -march=athlon-xp -O3 --save-temps -funroll-all-loops mmx.c
gives
_fu:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
cmpl $5, (%eax)
je L33
L32:
leave
ret
.p2align 6,,7
L33:
movl $10, (%eax)
jmp L32
the -march=athlon-xp parameter should imply cmov instruction...(?)
(it doesn't even use muliple leave/ret to eliminate an extra jmp...)
Do the gcc developers include these fancy parameters just to give us ricers a
false sense of speed?
any thoughts?
--
gentoo-performance@g.o mailing list
|