#include <stdio.h>
int main(){
int i, a;
for(i = 0; i<100000000; i++){
a = 1;
}
}
He compiled it normally using gcc testopt.c −o testopt, executed it and got a running-time estimate of 0.288 s on an average. Then he recompiled it with gcc −03 testopt.c −o testopt (i.e. level 3 optimization) and got a running-time estimate of 0.003 s on an average. Surprised at this large reduction in time, he obtained the assembly language output for both the versions of translation:
# unoptimized
main:
leal 4(%esp), %ecx
andl $−16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $16, %esp
#----------------------------------------------
movl $0, −12(%ebp)
jmp .L2
.L3:
movl $1, -8(%ebp)
incl −12(%ebp)
.L2:
cmpl $99999999, −12(%ebp)
jle .L3
#----------------------------------------------
addl $16, %esp
popl %ecx
popl %ebp
leal −4(%ecx), %esp
ret
# optimized
main:
leal 4(%esp), %ecx
andl $−16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
#-----------------------------------------------
popl %ecx
popl %ebp
leal −4(%ecx), %esp
ret
Comparing the two codes he found that the loop forming instructions, marked by two horizontal lines in the unoptimized code, were completely missing in the optimized code. Also, the local variables i and a were not created. What was going on?
3.15.26.221