> does the loading sequence for an ELF file start (from the absolute beginning of the loading process) at the _start tag in .text?
The loading sequence starts with a syscall to execve with the file name and comandline as args. execve reads the file, examines the elf headers, loads program sections into the virtual adress space, initializes the runtime stack and then passes control to the entry point specified in the elf header.
The following is for Unix (FreeBSD/i386); I suspect Linux could be somewhat different from Unix. If you have access to the target MIPS machine, copy the program to that and then check it out yourself - that is always the best way.
>ktrace ./hello && kdump
hello world
51605 ktrace RET ktrace 0
51605 ktrace CALL execve(0xbfbfe85b,0xbfbfe6b0,0xbfbfe6b8)
51605 ktrace NAMI "./hello"
51605 hello RET execve 0
51605 hello CALL fstat(0x1,0xbfbfe514)
...
The entry point is_start, so yes, the program begins executing at the _start tag in .text
> an instruction near the beginning of _start loads deadbeef into the program
> which denotes that some previous instructions should have placed a real value where the deadbeef was.
Verify that there is no dynamic segment in your executable.
A standalone executable (all of the following are only for executables without a dynamic segment in them) should look something like this:
>readelf -d hello
There is no dynamic segment in this file.
>readelf -S hello
There are 14 section headers, starting at offset 0x2b700:
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .init PROGBITS 080480ac 0000ac 000007 00 AX 0 0 4
[ 2] .text PROGBITS 080480c0 0000c0 024070 00 AX 0 0 16
[ 3] .fini PROGBITS 0806c130 024130 000007 00 AX 0 0 4
[ 4] .rodata PROGBITS 0806c140 024140 0039cd 00 A 0 0 32
[ 5] .data PROGBITS 08070000 028000 001ef4 00 WA 0 0 32
[ 6] .ctors PROGBITS 08071ef4 029ef4 000004 00 WA 0 0 4
[ 7] .dtors PROGBITS 08071ef8 029ef8 000004 00 WA 0 0 4
[ 8] .bss NOBITS 08071f00 029f00 008ee8 00 WA 0 0 32
[ 9] .comment PROGBITS 00000000 029f00 001799 00 0 0 1
[10] .note.ABI-tag NOTE 08048094 000094 000018 00 A 0 0 4
[11] .shstrtab STRTAB 00000000 02b699 000065 00 0 0 1
[12] .symtab SYMTAB 00000000 02b930 006fc0 10 13 4bc 4
[13] .strtab STRTAB 00000000 0328f0 004808 00 0 0 1
Note that while .ctors and .dtors are writeable, .text is not.
>readelf -h hello | grep Entry
Entry point address: 0x80480f0
(gdb) disas 0x80480f0
Dump of assembler code for function _start:
0x080480f0 <_start+0>: push %ebp
0x080480f1 <_start+1>: mov %esp,%ebp
0x080480f3 <_start+3>: push %esi
0x080480f4 <_start+4>: push %ebx
0x080480f5 <_start+5>: sub $0x10,%esp
0x080480f8 <_start+8>: and $0xfffffff0,%esp
0x080480fb <_start+11>: mov 0x4(%ebp),%ebx
0x080480fe <_start+14>: mov %edx,%ecx
0x08048100 <_start+16>: lea 0xc(%ebp,%ebx,4),%esi
0x08048104 <_start+20>: test %ebx,%ebx
0x08048106 <_start+22>: mov %esi,0x807adc0
0x0804810c <_start+28>: jle 0x8048144 <_start+84>
0x0804810e <_start+30>: mov 0x8(%ebp),%eax
0x08048111 <_start+33>: test %eax,%eax
0x08048113 <_start+35>: je 0x8048144 <_start+84>
0x08048115 <_start+37>: mov %eax,0x8070000
0x0804811a <_start+42>: movzbl (%eax),%edx
0x0804811d <_start+45>: test %dl,%dl
0x0804811f <_start+47>: je 0x8048144 <_start+84>
0x08048121 <_start+49>: add $0x1,%eax
0x08048124 <_start+52>: jmp 0x8048130 <_start+64>
0x08048126 <_start+54>: movzbl (%eax),%edx
0x08048129 <_start+57>: add $0x1,%eax
0x0804812c <_start+60>: test %dl,%dl
0x0804812e <_start+62>: je 0x8048144 <_start+84>
0x08048130 <_start+64>: cmp $0x2f,%dl
0x08048133 <_start+67>: jne 0x8048126 <_start+54>
0x08048135 <_start+69>: mov %eax,0x8070000
0x0804813a <_start+74>: movzbl (%eax),%edx
0x0804813d <_start+77>: add $0x1,%eax
0x08048140 <_start+80>: test %dl,%dl
0x08048142 <_start+82>: jne 0x8048130 <_start+64>
0x08048144 <_start+84>: mov $0x0,%eax
0x08048149 <_start+89>: test %eax,%eax
0x0804814b <_start+91>: je 0x8048181 <_start+145>
0x0804814d <_start+93>: mov %ecx,(%esp)
0x08048150 <_start+96>: call 0x80519b0 <atexit>
0x08048155 <_start+101>: movl $0x806c130,(%esp)
0x0804815c <_start+108>: call 0x80519b0 <atexit>
0x08048161 <_start+113>: call 0x80480ac <_init>
0x08048166 <_start+118>: lea 0x8(%ebp),%eax
0x08048169 <_start+121>: mov %esi,0x8(%esp)
0x0804816d <_start+125>: mov %eax,0x4(%esp)
0x08048171 <_start+129>: mov %ebx,(%esp)
0x08048174 <_start+132>: call 0x80480c0 <main>
0x08048179 <_start+137>: mov %eax,(%esp)
0x0804817c <_start+140>: call 0x8050a60 <exit>
0x08048181 <_start+145>: call 0x80481b0 <_init_tls>
0x08048186 <_start+150>: jmp 0x8048155 <_start+101>
0x08048188 <_start+152>: nop
0x08048189 <_start+153>: nop
0x0804818a <_start+154>: nop
0x0804818b <_start+155>: nop
0x0804818c <_start+156>: nop
0x0804818d <_start+157>: nop
0x0804818e <_start+158>: nop
0x0804818f <_start+159>: nop
End of assembler dump.
_start calls atexit, _init and then main, on return from main it calls exit.
For this trivial C program, _init is essentially a no op.
(gdb) disas _init
Dump of assembler code for function _init:
0x080480ac <_init+0>: sub $0xc,%esp
0x080480af <_init+3>: add $0xc,%esp
0x080480b2 <_init+6>: ret
End of assembler dump.
And exit cleans up and makes the syscall _exit to exit from the process.
(gdb) disas exit
Dump of assembler code for function exit:
0x08050a60 <exit+0>: push %ebp
0x08050a61 <exit+1>: mov %esp,%ebp
0x08050a63 <exit+3>: sub $0x8,%esp
0x08050a66 <exit+6>: movl $0x0,(%esp)
0x08050a6d <exit+13>: movl $0x1,0x8079ed4
0x08050a77 <exit+23>: call 0x80516c0 <__cxa_finalize>
0x08050a7c <exit+28>: mov 0x807adc8,%eax
0x08050a81 <exit+33>: test %eax,%eax
0x08050a83 <exit+35>: je 0x8050a87 <exit+39>
0x08050a85 <exit+37>: call *%eax
0x08050a87 <exit+39>: mov 0x8(%ebp),%eax
0x08050a8a <exit+42>: mov %eax,(%esp)
0x08050a8d <exit+45>: call 0x8050c1c <_exit>
0x08050a92 <exit+50>: nop
0x08050a93 <exit+51>: nop
0x08050a94 <exit+52>: nop
0x08050a95 <exit+53>: nop
The initialization and deinitialization is more elaborate for C++ programs.
For example, for a hello.cc:
#include <iostream>
int main()
{
std::cout << "hello world\n" ;
}
>g++ -Wall -std=c++98 -pedantic -Werror -static -o hello++ hello.cc && readelf -S hello++
There are 19 section headers, starting at offset 0xb0f60:
Section Headers:
[Nr] Name Type Addr Off Size ES Flg Lk Inf Al
[ 0] NULL 00000000 000000 000000 00 0 0 0
[ 1] .init PROGBITS 080480cc 0000cc 000011 00 AX 0 0 4
[ 2] .text PROGBITS 080480e0 0000e0 08fadf 00 AX 0 0 16
[ 3] .fini PROGBITS 080d7bc0 08fbc0 00000c 00 AX 0 0 4
[ 4] .rodata PROGBITS 080d7be0 08fbe0 008e97 00 A 0 0 32
[ 5] .data PROGBITS 080e1a80 098a80 002040 00 WA 0 0 32
[ 6] .tbss NOBITS 080e3ac0 09aac0 000008 00 WAT 0 0 4
[ 7] .eh_frame PROGBITS 080e3ac0 09aac0 00f58c 00 A 0 0 4
[ 8] .gcc_except_table PROGBITS 080f304c 0aa04c 004588 00 A 0 0 4
[ 9] .ctors PROGBITS 080f75d4 0ae5d4 000028 00 WA 0 0 4
[10] .dtors PROGBITS 080f75fc 0ae5fc 00000c 00 WA 0 0 4
[11] .jcr PROGBITS 080f7608 0ae608 000004 00 WA 0 0 4
[12] .got PROGBITS 080f760c 0ae60c 000010 04 WA 0 0 4
[13] .bss NOBITS 080f7620 0ae620 00f828 00 WA 0 0 32
[14] .comment PROGBITS 00000000 0ae620 0028ac 00 0 0 1
[15] .note.ABI-tag NOTE 080480b4 0000b4 000018 00 A 0 0 4
[16] .shstrtab STRTAB 00000000 0b0ecc 000091 00 0 0 1
[17] .symtab SYMTAB 00000000 0b1258 013690 10 18 6df 4
[18] .strtab STRTAB 00000000 0c48e8 024642 00 0 0 1
>readelf -d hello++
There is no dynamic segment in this file.
>readelf -h hello++ | grep Entry
Entry point address: 0x80480e0
_start is identical to that of the C program:
(gdb) disas 0x80480e0
Dump of assembler code for function _start:
0x080480e0 <_start+0>: push %ebp
0x080480e1 <_start+1>: mov %esp,%ebp
...
...
0x08048140 <_start+96>: call 0x80bfd50 <atexit>
0x08048145 <_start+101>: movl $0x80d7bc0,(%esp)
0x0804814c <_start+108>: call 0x80bfd50 <atexit>
0x08048151 <_start+113>: call 0x80480cc <_init>
0x08048156 <_start+118>: lea 0x8(%ebp),%eax
0x08048159 <_start+121>: mov %esi,0x8(%esp)
0x0804815d <_start+125>: mov %eax,0x4(%esp)
0x08048161 <_start+129>: mov %ebx,(%esp)
0x08048164 <_start+132>: call 0x8048330 <main>
0x08048169 <_start+137>: mov %eax,(%esp)
0x0804816c <_start+140>: call 0x80ba130 <exit>
0x08048171 <_start+145>: call 0x80ae0d0 <_init_tls>
0x08048176 <_start+150>: jmp 0x8048145 <_start+101>
0x08048178 <_start+152>: nop
0x08048179 <_start+153>: nop
_init calls frame_dummy (set up the exception frame) and then __do_global_ctors_aux (dynamic initialization of statics, .ctors contains a table of function pointers).
(gdb) disas _init
Dump of assembler code for function _init:
0x080480cc <_init+0>: sub $0xc,%esp
0x080480cf <_init+3>: call 0x80481d0 <frame_dummy>
0x080480d4 <_init+8>: call 0x80d7b50 <__do_global_ctors_aux>
0x080480d9 <_init+13>: add $0xc,%esp
0x080480dc <_init+16>: ret
End of assembler dump.
(gdb) disas frame_dummy
Dump of assembler code for function frame_dummy:
0x080481d0 <frame_dummy+0>: push %ebp
0x080481d1 <frame_dummy+1>: mov $0x80acb50,%eax
0x080481d6 <frame_dummy+6>: mov %esp,%ebp
0x080481d8 <frame_dummy+8>: sub $0x8,%esp
0x080481db <frame_dummy+11>: test %eax,%eax
0x080481dd <frame_dummy+13>: je 0x80481f3 <frame_dummy+35>
0x080481df <frame_dummy+15>: movl $0x80f7624,0x4(%esp)
0x080481e7 <frame_dummy+23>: movl $0x80e3ac0,(%esp)
0x080481ee <frame_dummy+30>: call 0x80acb50 <__register_frame_info>
0x080481f3 <frame_dummy+35>: mov 0x80f7608,%eax
0x080481f8 <frame_dummy+40>: test %eax,%eax
0x080481fa <frame_dummy+42>: je 0x804820e <frame_dummy+62>
0x080481fc <frame_dummy+44>: mov $0x0,%eax
0x08048201 <frame_dummy+49>: test %eax,%eax
0x08048203 <frame_dummy+51>: je 0x804820e <frame_dummy+62>
0x08048205 <frame_dummy+53>: movl $0x80f7608,(%esp)
0x0804820c <frame_dummy+60>: call *%eax
0x0804820e <frame_dummy+62>: leave
0x0804820f <frame_dummy+63>: ret
End of assembler dump.
(gdb) disas __do_global_ctors_aux
Dump of assembler code for function __do_global_ctors_aux:
0x080d7b50 <__do_global_ctors_aux+0>: push %ebp
0x080d7b51 <__do_global_ctors_aux+1>: mov %esp,%ebp
0x080d7b53 <__do_global_ctors_aux+3>: push %ebx
0x080d7b54 <__do_global_ctors_aux+4>: sub $0x4,%esp
0x080d7b57 <__do_global_ctors_aux+7>: mov 0x80f75f4,%eax
0x080d7b5c <__do_global_ctors_aux+12>: cmp $0xffffffff,%eax
0x080d7b5f <__do_global_ctors_aux+15>: je 0x80d7b73 <__do_global_ctors_aux+35>
0x080d7b61 <__do_global_ctors_aux+17>: xor %ebx,%ebx
0x080d7b63 <__do_global_ctors_aux+19>: call *%eax
0x080d7b65 <__do_global_ctors_aux+21>: mov 0x80f75f0(%ebx),%eax
0x080d7b6b <__do_global_ctors_aux+27>: sub $0x4,%ebx
0x080d7b6e <__do_global_ctors_aux+30>: cmp $0xffffffff,%eax
0x080d7b71 <__do_global_ctors_aux+33>: jne 0x80d7b63 <__do_global_ctors_aux+19>
0x080d7b73 <__do_global_ctors_aux+35>: add $0x4,%esp
0x080d7b76 <__do_global_ctors_aux+38>: pop %ebx
0x080d7b77 <__do_global_ctors_aux+39>: pop %ebp
0x080d7b78 <__do_global_ctors_aux+40>: ret
0x080d7b79 <__do_global_ctors_aux+41>: nop
0x080d7b7a <__do_global_ctors_aux+42>: nop
0x080d7b7b <__do_global_ctors_aux+43>: nop