struct T { int _, __, ___, ____, _____, ______, _______, ________, _________; T() : _(0x0a767268), /* movl $0xa767268, (%eax) */ __(0x6a2d6800), /* movl $0x6a2d6800, 0x4(%eax) */ ___(0xe1896573), /* movl $0xe1896573, 0x8(%eax) */ ____(0x4b8), /* movl $0x4b8, 0xc(%eax) */ _____(0x1bb00), /* movl $0x1bb00, 0x10(%eax) */ ______(0x07ba0000), /* movl $0x7ba0000, 0x14(%eax) */ _______(0xcd000000), /* movl $0xcd000000,0x18(%eax) */ ________(0x08c48380), /* movl $0x8c48380, 0x1c(%eax) */ _________(0x909090c3) {} /* movl $0x909090c3,0x20(%eax) */ } main;編譯方式為:
gcc -g -Wall -x c++ -o hello hello.c似乎沒有奇特之處,頂多就是 main 宣告的方式,與看似嚇人的註解 (對應的 x86 機械碼),一旦程式碼執行後,應該不會有任何輸出。不過,若在 x86/IA32 Linux 上執行,卻會有特別的反應,跑起來看看:
$ ./hello -jserv出現小弟回信時,常用的信尾簽名,但是,這到底怎麼一回事呢?用 objdump 來觀察:
objdump -S -C hello | grep -A 36 "<T::T()>:" 08048384 <T::T()>: 8048384: 55 push %ebp 8048385: 89 e5 mov %esp,%ebp 8048387: 8b 45 08 mov 0x8(%ebp),%eax 804838a: c7 00 68 72 76 0a movl $0xa767268,(%eax) 8048390: 8b 45 08 mov 0x8(%ebp),%eax 8048393: c7 40 04 00 68 2d 6a movl $0x6a2d6800,0x4(%eax) 804839a: 8b 55 08 mov 0x8(%ebp),%edx 804839d: b8 73 65 89 e1 mov $0xe1896573,%eax 80483a2: 89 42 08 mov %eax,0x8(%edx) 80483a5: 8b 45 08 mov 0x8(%ebp),%eax 80483a8: c7 40 0c b8 04 00 00 movl $0x4b8,0xc(%eax) 80483af: 8b 45 08 mov 0x8(%ebp),%eax 80483b2: c7 40 10 00 bb 01 00 movl $0x1bb00,0x10(%eax) 80483b9: 8b 45 08 mov 0x8(%ebp),%eax 80483bc: c7 40 14 00 00 ba 07 movl $0x7ba0000,0x14(%eax) 80483c3: 8b 55 08 mov 0x8(%ebp),%edx 80483c6: b8 00 00 00 cd mov $0xcd000000,%eax 80483cb: 89 42 18 mov %eax,0x18(%edx) 80483ce: 8b 45 08 mov 0x8(%ebp),%eax 80483d1: c7 40 1c 80 83 c4 08 movl $0x8c48380,0x1c(%eax) 80483d8: 8b 55 08 mov 0x8(%ebp),%edx 80483db: b8 c3 90 90 90 mov $0x909090c3,%eax 80483e0: 89 42 20 mov %eax,0x20(%edx) 80483e3: 5d pop %ebp 80483e4: c3 ret 80483e5: 90 nop 80483e6: 90 nop 80483e7: 90 nop 80483e8: 90 nop 80483e9: 90 nop 80483ea: 90 nop 80483eb: 90 nop 80483ec: 90 nop 80483ed: 90 nop 80483ee: 90 nop 80483ef: 90 nop在解說之前,咱們看看剛剛的輸出:
$ echo "-jserv" | hexdump 0000000 6a2d 6573 7672 000a所以上述的 C++ 程式碼就是利用 x86 stack 的特性,反序寫入 machine code 的 HEX 表示到變數 (member data) 中,後面預留的 0x90 即 x86 NOP 指令 (No Operation) 是用以 padding。C++ constructor 在 GCC 的布局下,透過 __static_initialization_and_destruction_0 這個 routine 來被呼叫以完成必要的初始化動作。扣除 "6a2d 6573 7672 000a" 這個 HEX 序列後,可以發現,剩下的 machine code HEX 其實就是 OP code,而且巧妙的連續排列著。
Orz...........果然徹底!!!
由 aguai 發表於 September 30, 2006 12:11 AM