I want to learn more about how GCC optimizes C programs. I have did a disas of a random function both optimized and unoptimized and I want to look at some of the differences. Off the top of my head, the optimized assembly has less jumps, and seems to use registers mostly, while the unoptimized is using memory more often. What other differences are there to note about these two?
C code
uint countPairsUpTo(int index, int* intArray, int first, int second)
{
uint i;
uint sum = 0;
for (i = 0; i < index; i++)
if ((first == intArray[i]) && (second == intArray[i+2]))
sum++;
return sum ;
}
Unoptimized
0x080485b1 <countPairsUpTo+0>: push %ebp
0x080485b2 <countPairsUpTo+1>: mov %esp,%ebp
0x080485b4 <countPairsUpTo+3>: sub $0x10,%esp
0x080485b7 <countPairsUpTo+6>: call 0x8048418 <mcount@plt>
0x080485bc <countPairsUpTo+11>: movl $0x0,-0x4(%ebp)
0x080485c3 <countPairsUpTo+18>: movl $0x0,-0x8(%ebp)
0x080485ca <countPairsUpTo+25>: jmp 0x80485fa <countPairsUpTo+73>
0x080485cc <countPairsUpTo+27>: mov -0x8(%ebp),%eax
0x080485cf <countPairsUpTo+30>: shl $0x2,%eax
0x080485d2 <countPairsUpTo+33>: add 0xc(%ebp),%eax
0x080485d5 <countPairsUpTo+36>: mov (%eax),%eax
0x080485d7 <countPairsUpTo+38>: cmp 0x10(%ebp),%eax
0x080485da <countPairsUpTo+41>: jne 0x80485f6 <countPairsUpTo+69>
0x080485dc <countPairsUpTo+43>: mov 0xc(%ebp),%edx
0x080485df <countPairsUpTo+46>: add $0x8,%edx
0x080485e2 <countPairsUpTo+49>: mov -0x8(%ebp),%eax
0x080485e5 <countPairsUpTo+52>: shl $0x2,%eax
0x080485e8 <countPairsUpTo+55>: lea (%edx,%eax,1),%eax
0x080485eb <countPairsUpTo+58>: mov (%eax),%eax
0x080485ed <countPairsUpTo+60>: cmp 0x14(%ebp),%eax
0x080485f0 <countPairsUpTo+63>: jne 0x80485f6 <countPairsUpTo+69>
0x080485f2 <countPairsUpTo+65>: addl $0x1,-0x4(%ebp)
0x080485f6 <countPairsUpTo+69>: addl $0x1,-0x8(%ebp)
0x080485fa <countPairsUpTo+73>: mov 0x8(%ebp),%eax
0x080485fd <countPairsUpTo+76>: cmp -0x8(%ebp),%eax
0x08048600 <countPairsUpTo+79>: ja 0x80485cc <countPairsUpTo+27>
0x08048602 <countPairsUpTo+81>: mov -0x4(%ebp),%eax
0x08048605 <countPairsUpTo+84>: leave
0x08048606 <countPairsUpTo+85>: ret
Optimized
0x08048570 <countPairsUpTo+0>: push %ebp
0x08048571 <countPairsUpTo+1>: mov %esp,%ebp
0x08048573 <countPairsUpTo+3>: push %edi
0x08048574 <countPairsUpTo+4>: push %esi
0x08048575 <countPairsUpTo+5>: push %ebx
0x08048576 <countPairsUpTo+6>: call 0x8048418 <mcount@plt>
0x0804857b <countPairsUpTo+11>: mov 0xc(%ebp),%ebx
0x0804857e <countPairsUpTo+14>: mov 0x10(%ebp),%esi
0x08048581 <countPairsUpTo+17>: mov 0x8(%ebp),%ecx
0x08048584 <countPairsUpTo+20>: mov $0x0,%edi
0x08048589 <countPairsUpTo+25>: test %ecx,%ecx
0x0804858b <countPairsUpTo+27>: je 0x80485b2 <countPairsUpTo+66>
0x0804858d <countPairsUpTo+29>: mov $0x0,%edi
0x08048592 <countPairsUpTo+34>: mov $0x0,%edx
0x08048597 <countPairsUpTo+39>: cmp %esi,(%ebx,%edx,4)
0x0804859a <countPairsUpTo+42>: jne 0x80485ab <countPairsUpTo+59>
0x0804859c <countPairsUpTo+44>: mov 0x14(%ebp),%eax
0x0804859f <countPairsUpTo+47>: cmp %eax,0x8(%ebx,%edx,4)
0x080485a3 <countPairsUpTo+51>: sete %al
0x080485a6 <countPairsUpTo+54>: movzbl %al,%eax
0x080485a9 <countPairsUpTo+57>: add %eax,%edi
0x080485ab <countPairsUpTo+59>: add $0x1,%edx
0x080485ae <countPairsUpTo+62>: cmp %ecx,%edx
0x080485b0 <countPairsUpTo+64>: jne 0x8048597 <countPairsUpTo+39>
0x080485b2 <countPairsUpTo+66>: mov %edi,%eax
0x080485b4 <countPairsUpTo+68>: pop %ebx
0x080485b5 <countPairsUpTo+69>: pop %esi
0x080485b6 <countPairsUpTo+70>: pop %edi
0x080485b7 <countPairsUpTo+71>: pop %ebp
0x080485b8 <countPairsUpTo+72>: ret