From: Andi Kleen Copy ioperm bitmaps more efficiently at context switch time. Only copy upto the highest used port. Originally from Ingo Molnar Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton --- 25-akpm/arch/x86_64/kernel/ioport.c | 34 +++++++++++++++++++------ 25-akpm/arch/x86_64/kernel/process.c | 44 ++++++++++++++++++--------------- 25-akpm/arch/x86_64/kernel/setup64.c | 8 +++--- 25-akpm/include/asm-x86_64/processor.h | 1 4 files changed, 57 insertions(+), 30 deletions(-) diff -puN arch/x86_64/kernel/ioport.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/ioport.c --- 25/arch/x86_64/kernel/ioport.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch 2004-09-15 02:33:21.557569592 -0700 +++ 25-akpm/arch/x86_64/kernel/ioport.c 2004-09-15 02:33:21.567568072 -0700 @@ -28,12 +28,12 @@ static void set_bitmap(unsigned long *bi clear_bit(i, bitmap); } - /* * this changes the io permissions bitmap in the current task. */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) { + unsigned int i, max_long, bytes, bytes_updated; struct thread_struct * t = ¤t->thread; struct tss_struct * tss; unsigned long *bitmap; @@ -59,16 +59,34 @@ asmlinkage long sys_ioperm(unsigned long /* * do it in the per-thread copy and in the TSS ... + * + * Disable preemption via get_cpu() - we must not switch away + * because the ->io_bitmap_max value must match the bitmap + * contents: */ - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); tss = &per_cpu(init_tss, get_cpu()); - if (tss->io_bitmap_base == IO_BITMAP_OFFSET) { /* already active? */ - set_bitmap(tss->io_bitmap, from, num, !turn_on); - } else { - memcpy(tss->io_bitmap, t->io_bitmap_ptr, IO_BITMAP_BYTES); - tss->io_bitmap_base = IO_BITMAP_OFFSET; /* Activate it in the TSS */ - } + + set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); + + /* + * Search for a (possibly new) maximum. This is simple and stupid, + * to keep it obviously correct: + */ + max_long = 0; + for (i = 0; i < IO_BITMAP_LONGS; i++) + if (t->io_bitmap_ptr[i] != ~0UL) + max_long = i; + + bytes = (max_long + 1) * sizeof(long); + bytes_updated = max(bytes, t->io_bitmap_max); + + t->io_bitmap_max = bytes; + + /* Update the TSS: */ + memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + put_cpu(); + return 0; } diff -puN arch/x86_64/kernel/process.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/process.c --- 25/arch/x86_64/kernel/process.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch 2004-09-15 02:33:21.558569440 -0700 +++ 25-akpm/arch/x86_64/kernel/process.c 2004-09-15 02:33:21.566568224 -0700 @@ -250,11 +250,17 @@ void show_regs(struct pt_regs *regs) void exit_thread(void) { struct task_struct *me = current; + struct thread_struct *t = &me->thread; if (me->thread.io_bitmap_ptr) { - struct tss_struct *tss = &per_cpu(init_tss,get_cpu()); - kfree(me->thread.io_bitmap_ptr); - me->thread.io_bitmap_ptr = NULL; - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; + /* + * Careful, clear this in the TSS too: + */ + memset(tss->io_bitmap, 0xff, t->io_bitmap_max); + t->io_bitmap_max = 0; put_cpu(); } } @@ -362,8 +368,10 @@ int copy_thread(int nr, unsigned long cl if (unlikely(me->thread.io_bitmap_ptr != NULL)) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!p->thread.io_bitmap_ptr) + if (!p->thread.io_bitmap_ptr) { + p->thread.io_bitmap_max = 0; return -ENOMEM; + } memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); } @@ -382,8 +390,10 @@ int copy_thread(int nr, unsigned long cl } err = 0; out: - if (err && p->thread.io_bitmap_ptr) + if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); + p->thread.io_bitmap_max = 0; + } return err; } @@ -490,22 +500,18 @@ struct task_struct *__switch_to(struct t * Handle the IO bitmap */ if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { - if (next->io_bitmap_ptr) { + if (next->io_bitmap_ptr) /* - * 2 cachelines copy ... not good, but not that - * bad either. Anyone got something better? - * This only affects processes which use ioperm(). - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, IO_BITMAP_BYTES); - tss->io_bitmap_base = IO_BITMAP_OFFSET; - } else { + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: + */ + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + else { /* - * a bitmap offset pointing outside of the TSS limit - * causes a nicely controllable SIGSEGV if a process - * tries to use a port IO instruction. The first - * sys_ioperm() call sets up the bitmap properly. + * Clear any possible leftover bits: */ - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } } diff -puN arch/x86_64/kernel/setup64.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch arch/x86_64/kernel/setup64.c --- 25/arch/x86_64/kernel/setup64.c~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch 2004-09-15 02:33:21.560569136 -0700 +++ 25-akpm/arch/x86_64/kernel/setup64.c 2004-09-15 02:33:21.565568376 -0700 @@ -241,6 +241,7 @@ void __init cpu_init (void) unsigned long v; char *estacks = NULL; struct task_struct *me; + int i; /* CPU 0 is initialised in head64.c */ if (cpu != 0) { @@ -304,12 +305,13 @@ void __init cpu_init (void) t->ist[v] = (unsigned long)estacks; } - t->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; + t->io_bitmap_base = offsetof(struct tss_struct, io_bitmap); /* - * This is required because the CPU will access up to + * <= is required because the CPU will access up to * 8 bits beyond the end of the IO permission bitmap. */ - t->io_bitmap[IO_BITMAP_LONGS] = ~0UL; + for (i = 0; i <= IO_BITMAP_LONGS; i++) + t->io_bitmap[i] = ~0UL; atomic_inc(&init_mm.mm_count); me->active_mm = &init_mm; diff -puN include/asm-x86_64/processor.h~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch include/asm-x86_64/processor.h --- 25/include/asm-x86_64/processor.h~x86-64-copy-ioperm-bitmaps-more-efficiently-at-context-switch 2004-09-15 02:33:21.561568984 -0700 +++ 25-akpm/include/asm-x86_64/processor.h 2004-09-15 02:33:21.567568072 -0700 @@ -254,6 +254,7 @@ struct thread_struct { switch faster for a limited number of ioperm using tasks. -AK */ int ioperm; unsigned long *io_bitmap_ptr; + unsigned io_bitmap_max; /* cached TLS descriptors. */ u64 tls_array[GDT_ENTRY_TLS_ENTRIES]; } __attribute__((aligned(16))); _