From: Anton Blanchard Based on some profiles we noticed the first vmalloc region was being continually cast out and replaced. All modules end up there so it is one of our hottest segments. This patch bolts the vmalloc region into the second segment. SLB misses on an NFS benchmark were reduced by about 10% with this patch. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton --- 25-akpm/arch/ppc64/kernel/head.S | 2 - 25-akpm/arch/ppc64/kernel/stab.c | 45 +++++++++++++++++++++++++++++--- 25-akpm/include/asm-ppc64/mmu_context.h | 10 +++++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff -puN arch/ppc64/kernel/head.S~ppc64-bolt-first-vmalloc-segment-into-slb arch/ppc64/kernel/head.S --- 25/arch/ppc64/kernel/head.S~ppc64-bolt-first-vmalloc-segment-into-slb 2004-05-31 17:04:23.220741992 -0700 +++ 25-akpm/arch/ppc64/kernel/head.S 2004-05-31 17:05:44.109445040 -0700 @@ -1130,7 +1130,7 @@ SLB_NUM_ENTRIES = 64 addi r21,r22,1 cmpdi r21,SLB_NUM_ENTRIES blt+ 2f - li r21,1 /* dont touch bolted slot 0 */ + li r21,2 /* dont touch slot 0 or 1 */ 2: std r21,PACASTABRR(r20) /* r20 = paca, r22 = entry */ diff -puN arch/ppc64/kernel/stab.c~ppc64-bolt-first-vmalloc-segment-into-slb arch/ppc64/kernel/stab.c --- 25/arch/ppc64/kernel/stab.c~ppc64-bolt-first-vmalloc-segment-into-slb 2004-05-31 17:04:23.222741688 -0700 +++ 25-akpm/arch/ppc64/kernel/stab.c 2004-05-31 17:05:44.113444432 -0700 @@ -44,6 +44,11 @@ void stab_initialize(unsigned long stab) /* Invalidate the entire SLB & all the ERATS */ #ifdef CONFIG_PPC_ISERIES asm volatile("isync; slbia; isync":::"memory"); + /* + * The hypervisor loads SLB entry 0, but we need to increment + * next_round_robin to avoid overwriting it + */ + get_paca()->xStab_data.next_round_robin = 1; #else asm volatile("isync":::"memory"); asm volatile("slbmte %0,%0"::"r" (0) : "memory"); @@ -51,6 +56,14 @@ void stab_initialize(unsigned long stab) make_slbe(esid, vsid, seg0_largepages, 1); asm volatile("isync":::"memory"); #endif + + /* + * Bolt in the first vmalloc segment. Since modules end + * up there it gets hit very heavily. + */ + esid = GET_ESID(VMALLOCBASE); + vsid = get_kernel_vsid(VMALLOCBASE); + make_slbe(esid, vsid, 0, 1); } else { asm volatile("isync; slbia; isync":::"memory"); make_ste(stab, esid, vsid); @@ -317,6 +330,7 @@ static void make_slbe(unsigned long esid unsigned long word0; slb_dword1 data; } vsid_data; + struct paca_struct *lpaca = get_paca(); /* * We take the next entry, round robin. Previously we tried @@ -330,18 +344,25 @@ static void make_slbe(unsigned long esid * for the kernel stack during the first part of exception exit * which gets invalidated due to a tlbie from another cpu at a * non recoverable point (after setting srr0/1) - Anton + * + * paca Ksave is always valid (even when on the interrupt stack) + * so we use that. */ - castout_entry = get_paca()->xStab_data.next_round_robin; + castout_entry = lpaca->xStab_data.next_round_robin; do { entry = castout_entry; castout_entry++; + /* + * We bolt in the first kernel segment and the first + * vmalloc segment. + */ if (castout_entry >= naca->slb_size) - castout_entry = 1; + castout_entry = 2; asm volatile("slbmfee %0,%1" : "=r" (esid_data) : "r" (entry)); } while (esid_data.data.v && - esid_data.data.esid == GET_ESID(__get_SP())); + esid_data.data.esid == GET_ESID(lpaca->xKsave)); - get_paca()->xStab_data.next_round_robin = castout_entry; + lpaca->xStab_data.next_round_robin = castout_entry; /* slbie not needed as the previous mapping is still valid. */ @@ -422,6 +443,8 @@ int slb_allocate(unsigned long ea) } esid = GET_ESID(ea); + + BUG_ON((esid << SID_SHIFT) == VMALLOCBASE); __slb_allocate(esid, vsid, context); return 0; @@ -478,7 +501,9 @@ void flush_slb(struct task_struct *tsk, unsigned long word0; slb_dword0 data; } esid_data; + unsigned long esid, vsid; + WARN_ON(!irqs_disabled()); if (offset <= NR_STAB_CACHE_ENTRIES) { int i; @@ -486,11 +511,23 @@ void flush_slb(struct task_struct *tsk, for (i = 0; i < offset; i++) { esid_data.word0 = 0; esid_data.data.esid = __get_cpu_var(stab_cache[i]); + BUG_ON(esid_data.data.esid == GET_ESID(VMALLOCBASE)); asm volatile("slbie %0" : : "r" (esid_data)); } asm volatile("isync" : : : "memory"); } else { asm volatile("isync; slbia; isync" : : : "memory"); + + /* + * Bolt in the first vmalloc segment. Since modules end + * up there it gets hit very heavily. We must not touch + * the vmalloc region between the slbia and here, thats + * why we require interrupts off. + */ + esid = GET_ESID(VMALLOCBASE); + vsid = get_kernel_vsid(VMALLOCBASE); + get_paca()->xStab_data.next_round_robin = 1; + make_slbe(esid, vsid, 0, 1); } /* Workaround POWER5 < DD2.1 issue */ diff -puN include/asm-ppc64/mmu_context.h~ppc64-bolt-first-vmalloc-segment-into-slb include/asm-ppc64/mmu_context.h --- 25/include/asm-ppc64/mmu_context.h~ppc64-bolt-first-vmalloc-segment-into-slb 2004-05-31 17:04:23.223741536 -0700 +++ 25-akpm/include/asm-ppc64/mmu_context.h 2004-05-31 17:04:23.231740320 -0700 @@ -172,8 +172,14 @@ static inline void switch_mm(struct mm_s * After we have set current->mm to a new value, this activates * the context for the new mm so we see the new mappings. */ -#define activate_mm(active_mm, mm) \ - switch_mm(active_mm, mm, current); +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) +{ + unsigned long flags; + + local_irq_save(flags); + switch_mm(prev, next, current); + local_irq_restore(flags); +} #define VSID_RANDOMIZER 42470972311UL #define VSID_MASK 0xfffffffffUL _