.section A
.subsection 1
.word 0x1234
.subsection 2
.word 0x5678
.previous
.word 0x9abc
.section .text
.global _start
_start:
nop
movl $1, %eax
movl $0, %ebx
int $0x80
objdump -D previous
previous: file format elf32-i386
Disassembly of section .text:
00000000 <_start>:
0: 90 nop
1: b8 01 00 00 00 mov $0x1,%eax
6: bb 00 00 00 00 mov $0x0,%ebx
b: cd 80 int $0x80
Disassembly of section A:
00000000 <A>:
0: 34 12 xor $0x12,%al
2: bc .byte 0xbc
3: 9a .byte 0x9a
4: 78 56 js 5c <_start+0x5c>
objdump -h previous
previous: file format elf32-i386
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 0000000d 00000000 00000000 00000034 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .data 00000000 00000000 00000000 00000044 2**2
CONTENTS, ALLOC, LOAD, DATA
2 .bss 00000000 00000000 00000000 00000044 2**2
ALLOC
3 A 00000006 00000000 00000000 00000044 2**0
CONTENTS, READONLY
.section A
.subsection 1
# Now in section A subsection 1
.word 0x1234
.section B
.subsection 0
# Now in section B subsection 0
.word 0x5678
.subsection 1
# Now in section B subsection 1
.word 0x9abc
.previous
# Now in section B subsection 0
.word 0xdef0
.section .text
.global _start
_start:
nop
movl $1, %eax
movl $0, %ebx
int $0x80
> objdump -D previous4
previous4: file format elf32-i386
Disassembly of section .text:
00000000 <_start>:
0: 90 nop
1: b8 01 00 00 00 mov $0x1,%eax
6: bb 00 00 00 00 mov $0x0,%ebx
b: cd 80 int $0x80
Disassembly of section A:
00000000 <A>:
0: 34 12 xor $0x12,%al
Disassembly of section B:
00000000 <B>:
0: 78 56 js 58 <_start+0x58>
2: f0 lock
3: de .byte 0xde
4: bc .byte 0xbc
5: 9a .byte 0x9a
objdump -h previous4
previous4: file format elf32-i386
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 0000000d 00000000 00000000 00000034 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .data 00000000 00000000 00000000 00000044 2**2
CONTENTS, ALLOC, LOAD, DATA
2 .bss 00000000 00000000 00000000 00000044 2**2
ALLOC
3 A 00000002 00000000 00000000 00000044 2**0
CONTENTS, READONLY
4 B 00000006 00000000 00000000 00000046 2**0
CONTENTS, READONLY
.section A
.subsection 1
.word 0x1234
.subsection 2
.word 0x5678
.section .text
.global _start
_start:
nop
.previous
.word 0x9090
movl $3, %ecx
movl $1, %eax
movl $0, %ebx
int $0x80
objdump -D previous3
previous3: file format elf32-i386
Disassembly of section .text:
00000000 <_start>:
0: 90 nop
Disassembly of section A:
00000000 <A>:
0: 34 12 xor $0x12,%al
2: 78 56 js 5a <_start+0x5a>
4: 90 nop
5: 90 nop
6: b9 03 00 00 00 mov $0x3,%ecx
b: b8 01 00 00 00 mov $0x1,%eax
10: bb 00 00 00 00 mov $0x0,%ebx
15: cd 80 int $0x80
$ objdump -h previous3
previous3: file format elf32-i386
Sections:
Idx Name Size VMA LMA File off Algn
0 .text 00000001 00000000 00000000 00000034 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .data 00000000 00000000 00000000 00000038 2**2
CONTENTS, ALLOC, LOAD, DATA
2 .bss 00000000 00000000 00000000 00000038 2**2
ALLOC
3 A 00000017 00000000 00000000 00000038 2**0
#define spin_lock_string \
n1:
"\ t
\
"
\
lock ; decb
" 0
% n
\ t
\
"
\
t
"2:\
"
\
cmpb $0,
" 0
% n
\ t
\
"
\
rep;nop
" n
\ t
\
"
\
jle 2b
" n
\ t
\
"
\
jmp 1b
" n
\
"
\
.previous
"
"
[/table] |
我们将这段代模拟成纯汇编以方便阅读分析: |
.section .text |
.global _start |
_start: |
.previous |
movb $0, %al |
[table=100%,#e3d2d2]1: |
2: |
cmpb $0, %al
rep;nop
jle 2b
jmp 1b
movl $0, %ebx
int $0x80
[root@SLinux assembly]# objdump -D previous.o
previous.o: file format elf32-i386
Disassembly of section .text:
00000000 <_start>:
0: b0 00 mov $0x0,%al
2: fe c8 dec %al
4: 0f 88 fc ff ff ff js 6 <_start+0x6>
a: bb 00 00 00 00 mov $0x0,%ebx
f: cd 80 int $0x80
Disassembly of section .text.lock:
00000000 <.text.lock>:
0: 3c 00 cmp $0x0,%al
2: f3 90 pause
4: 7e fa jle 0 <.text.lock>
6: e9 fe ff ff ff jmp 9 <.text.lock+0x9>
之所以定义成一个单独的区,原因是在大多数情况下,spin lock是能获取成功的,从.section 到.previous的这一段代码并不经常被调用,如果把它跟别的常用指令混在一起,会浪费指令缓存的空间。从这里也可以看出,linux内核的实现,要时时注意效率。
这是一条很有趣的指令:),咋一看,这只是一条空指令,但实际上这条指令可以降低CPU的运行 频率,减低电的消耗量,但最重要的是,提高了整体的效率。因为这段指令执行太快的话,会生成 很多读取内存变量的指令,另外的一个CPU可能也要写这个内存变量,现在的CPU经常需要重新排序指令来提高效率,如果读指令太多的话,为了保证指令之间的依赖性,CPU会以牺牲流水线 执行(pipeline)所带来的好处。从pentium 4以后,intel引进了一条pause指令,专门用于spin lock这种情况,据intel的文档说,加上pause可以提高25倍的效率!nop指令前加rep前缀意思是:Spin-Wait and Idle Loops 。
欢迎光临 曲径通幽论坛 (http://www.groad.net/bbs/) | Powered by Discuz! X3.2 |