#include <stdio.h>
#define nops(times) __asm__ __volatile__("rep;nop":"=c"(result):"c"(times))
#define movstr(src,des) __asm__ __volatile__( "cld\n\t" \
"rep;movsb" \
:"=c"(result) \
:"S"(src),"D"(des),"c"(times))
int main()
{
unsigned int times, result;
char src[5] = {'a', 'b', 'c', 'd', 'e'};
char des[5];
int i;
times = 5;
result = 5;
movstr(src,des);
printf ("result = %d\n", result);
for (i = 0; i < times; i++)
printf ("%c ", des);
printf ("\n");
times = 5;
nops(times);
printf ("result = %d\n", result);
return (0);
}
[beyes@SLinux C]$ ./rep
result = 0
a b c d e
result = 5
PAUSE—Spin Loop Hint
Description
Improves the performance of spin-wait loops. When executing a “spin-wait loop,” a Pentium 4
processor suffers a severe performance penalty when exiting the loop because it detects a
possible memory order violation. The PAUSE instruction provides a hint to the processor that
the code sequence is a spin-wait loop. The processor uses this hint to bypass the memory order
violation in most situations, which greatly improves processor performance. For this reason, it
is recommended that a PAUSE instruction be placed in all spin-wait loops.
提升 spin-wait loops(自旋锁循环等待)的性能。在执行一个 spin-wait loop 时,Pentium4 处理器会
遇到严重的性能损失.PAUSE 指令会向处理器提供一种提示:告诉处理器所执行的代码序列是一个 spin-wait loop。
处理器会根据这个提示而避开内存序列冲突(memory order violation),也就是说对 spin-wait loop 不做缓存,不做指令
重新排序等动作。这样就可以大大的提高了处理器的性能。正是基于此,才建议在 spin-wait loops 中使用 pasuse 指令。
An additional function of the PAUSE instruction is to reduce the power consumed by a Pentium
4 processor while executing a spin loop. The Pentium 4 processor can execute a spin-wait loop
extremely quickly, causing the processor to consume a lot of power while it waits for the
resource it is spinning on to become available. Inserting a pause instruction in a spin-wait loop
greatly reduces the processor’s power consumption.
PAUSE指令的另外一个功能是让 Pentium4 处理器在执行 spin-wait loop 时可以减少电源的消耗。
在等待资源而执行自旋锁等待时,Pentium4 处理器以极快的速度执行自旋等待时,将会消耗很多电能,
但使用 pause 指令则可以极大的减少处理器的电能消耗。
This instruction was introduced in the Pentium 4 processors, but is backward compatible with
all IA-32 processors. In earlier IA-32 processors, the PAUSE instruction operates like a NOP
instruction.
PAUSE 指令在 Pentium4 处理器中引入,但它也是向前兼容的。在早先的 IA-32 处理器里,PAUSE 指令实际上就相当于 NOP 指令。
The Pentium 4 processor implements the PAUSE instruction as a pre-defined delay. The delay
is finite and can be zero for some processors. This instruction does not change the architectural
state of the processor (that is, it performs essentially a delaying no-op operation).
Pentium4 处理器以一种 预延迟(pre-defined delay)的技术来实现 PAUSE 指令。这种延迟也是有限度的,并且在一些处理器上是零延迟。该指令不会改变处理器的处理器的状态。
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static inline void rep_nop(void)
{
__asm__ __volatile__("rep;nop": : :"memory");
}
static void delay_tsc(unsigned long loops)
{
unsigned long bclock, now;
preempt_disable(); /* TSC's are per-cpu */
rdtscl(bclock);
do {
rep_nop();
rdtscl(now);
} while ((now-bclock) < loops);
preempt_enable();
}
欢迎光临 曲径通幽论坛 (http://www.groad.net/bbs/) | Powered by Discuz! X3.2 |