From: Fenghua Yu The patch adds cpu_relax() in the body of some spin loops for 2.6.9. The patch also removes redundant barrier() code after cpu_relax() on ia32. In the PAUSE instruction section, IA32 SDM claims "it is recommended that a PASUE instruction be placed in all spin-wait loops". And x86_64 SDM says that PAUSE instruction is same as legacy mode in IA-32e mode operation. This patch is against 2.6.9 (kernel.org). It was tested on ia32 and x86_64. Signed-off-by: Andrew Morton --- 25-akpm/arch/i386/kernel/cpu/mtrr/main.c | 24 +++++++++--------------- 25-akpm/arch/i386/kernel/smp.c | 4 ++-- 25-akpm/arch/x86_64/kernel/smp.c | 4 ++-- 25-akpm/include/asm-i386/apic.h | 3 ++- 4 files changed, 15 insertions(+), 20 deletions(-) diff -puN arch/i386/kernel/cpu/mtrr/main.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 arch/i386/kernel/cpu/mtrr/main.c --- 25/arch/i386/kernel/cpu/mtrr/main.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 2004-11-09 20:39:52.881225136 -0800 +++ 25-akpm/arch/i386/kernel/cpu/mtrr/main.c 2004-11-09 20:39:52.892223464 -0800 @@ -149,10 +149,8 @@ static void ipi_handler(void *info) local_irq_save(flags); atomic_dec(&data->count); - while(!atomic_read(&data->gate)) { + while(!atomic_read(&data->gate)) cpu_relax(); - barrier(); - } /* The master has cleared me to execute */ if (data->smp_reg != ~0U) @@ -162,10 +160,9 @@ static void ipi_handler(void *info) mtrr_if->set_all(); atomic_dec(&data->count); - while(atomic_read(&data->gate)) { + while(atomic_read(&data->gate)) cpu_relax(); - barrier(); - } + atomic_dec(&data->count); local_irq_restore(flags); } @@ -230,10 +227,9 @@ static void set_mtrr(unsigned int reg, u local_irq_save(flags); - while(atomic_read(&data.count)) { + while(atomic_read(&data.count)) cpu_relax(); - barrier(); - } + /* ok, reset count and toggle gate */ atomic_set(&data.count, num_booting_cpus() - 1); atomic_set(&data.gate,1); @@ -250,10 +246,9 @@ static void set_mtrr(unsigned int reg, u mtrr_if->set(reg,base,size,type); /* wait for the others */ - while(atomic_read(&data.count)) { + while(atomic_read(&data.count)) cpu_relax(); - barrier(); - } + atomic_set(&data.count, num_booting_cpus() - 1); atomic_set(&data.gate,0); @@ -261,10 +256,9 @@ static void set_mtrr(unsigned int reg, u * Wait here for everyone to have seen the gate change * So we're the last ones to touch 'data' */ - while(atomic_read(&data.count)) { + while(atomic_read(&data.count)) cpu_relax(); - barrier(); - } + local_irq_restore(flags); } diff -puN arch/i386/kernel/smp.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 arch/i386/kernel/smp.c --- 25/arch/i386/kernel/smp.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 2004-11-09 20:39:52.883224832 -0800 +++ 25-akpm/arch/i386/kernel/smp.c 2004-11-09 20:39:52.891223616 -0800 @@ -538,11 +538,11 @@ int smp_call_function (void (*func) (voi /* Wait for response */ while (atomic_read(&data.started) != cpus) - barrier(); + cpu_relax(); if (wait) while (atomic_read(&data.finished) != cpus) - barrier(); + cpu_relax(); spin_unlock(&call_lock); return 0; diff -puN arch/x86_64/kernel/smp.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 arch/x86_64/kernel/smp.c --- 25/arch/x86_64/kernel/smp.c~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 2004-11-09 20:39:52.884224680 -0800 +++ 25-akpm/arch/x86_64/kernel/smp.c 2004-11-09 20:40:22.224764240 -0800 @@ -310,10 +310,10 @@ static void __smp_call_function (void (* /* Wait for response */ while (atomic_read(&data.started) != cpus) - barrier(); + cpu_relax(); while (atomic_read(&data.finished) != cpus) - barrier(); + cpu_relax(); } /* diff -puN include/asm-i386/apic.h~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 include/asm-i386/apic.h --- 25/include/asm-i386/apic.h~add-cpu_relax-in-spin-loops-clean-up-barrier-for-269 2004-11-09 20:39:52.886224376 -0800 +++ 25-akpm/include/asm-i386/apic.h 2004-11-09 20:39:52.891223616 -0800 @@ -53,7 +53,8 @@ static __inline unsigned long apic_read( static __inline__ void apic_wait_icr_idle(void) { - do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ); + while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) + cpu_relax(); } int get_physical_broadcast(void); _