diff -Nru a/fs/xfs/Makefile b/fs/xfs/Makefile
--- a/fs/xfs/Makefile	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/Makefile	Thu Jan 29 23:15:58 2004
@@ -30,7 +30,7 @@
 # http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
 #
 
-EXTRA_CFLAGS +=	 -Ifs/xfs -funsigned-char
+EXTRA_CFLAGS +=	 -Ifs/xfs -Ifs/xfs/linux -funsigned-char
 
 ifeq ($(CONFIG_XFS_DEBUG),y)
 	EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG -DXFSDEBUG
@@ -83,6 +83,7 @@
 				   xfs_alloc_btree.o \
 				   xfs_attr.o \
 				   xfs_attr_leaf.o \
+				   xfs_behavior.o \
 				   xfs_bit.o \
 				   xfs_bmap.o \
 				   xfs_bmap_btree.o \
@@ -106,6 +107,7 @@
 				   xfs_inode.o \
 				   xfs_inode_item.o \
 				   xfs_iocore.o \
+				   xfs_iomap.o \
 				   xfs_itable.o \
 				   xfs_dfrag.o \
 				   xfs_log.o \
@@ -126,18 +128,15 @@
 
 xfs-$(CONFIG_XFS_TRACE)		+= xfs_dir2_trace.o
 
-# Objects in pagebuf/
-xfs-y				+= pagebuf/page_buf.o
-
 # Objects in linux/
 xfs-y				+= $(addprefix linux/, \
+				   mrlock.o \
 				   xfs_aops.o \
-				   xfs_behavior.o \
+				   xfs_buf.o \
 				   xfs_file.o \
 				   xfs_fs_subr.o \
 				   xfs_globals.o \
 				   xfs_ioctl.o \
-				   xfs_iomap.o \
 				   xfs_iops.o \
 				   xfs_lrw.o \
 				   xfs_super.o \
@@ -148,7 +147,6 @@
 xfs-y				+= $(addprefix support/, \
 				   debug.o \
 				   move.o \
-				   mrlock.o \
 				   qsort.o \
 				   uuid.o)
 
diff -Nru a/fs/xfs/linux/kmem.h b/fs/xfs/linux/kmem.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/kmem.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_KMEM_H__
+#define __XFS_SUPPORT_KMEM_H__
+
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/*
+ * Cutoff point to use vmalloc instead of kmalloc.
+ */
+#define MAX_SLAB_SIZE	0x10000
+
+/*
+ * XFS uses slightly different names for these due to the
+ * IRIX heritage.
+ */
+#define	kmem_zone	kmem_cache_s
+#define kmem_zone_t	kmem_cache_t
+
+#define KM_SLEEP	0x0001
+#define KM_NOSLEEP	0x0002
+#define KM_NOFS		0x0004
+
+typedef unsigned long xfs_pflags_t;
+
+#define PFLAGS_TEST_FSTRANS()		(current->flags & PF_FSTRANS)
+
+#define PFLAGS_SET_FSTRANS(STATEP) do {	\
+	*(STATEP) = current->flags;	\
+	current->flags |= PF_FSTRANS;	\
+} while (0)
+
+#define PFLAGS_RESTORE(STATEP) do {	\
+	current->flags = *(STATEP);	\
+} while (0)
+
+#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
+	*(NSTATEP) = *(OSTATEP);	\
+} while (0)
+
+/*
+ * XXX get rid of the unconditional  __GFP_NOFAIL by adding
+ * a KM_FAIL flag and using it where we're allowed to fail.
+ */
+static __inline unsigned int
+kmem_flags_convert(int flags)
+{
+	int lflags;
+
+#if DEBUG
+	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) {
+		printk(KERN_WARNING
+		    "XFS: memory allocation with wrong flags (%x)\n", flags);
+		BUG();
+	}
+#endif
+
+	lflags = (flags & KM_NOSLEEP) ? GFP_ATOMIC : (GFP_KERNEL|__GFP_NOFAIL);
+
+	/* avoid recusive callbacks to filesystem during transactions */
+	if (PFLAGS_TEST_FSTRANS() || (flags & KM_NOFS))
+		lflags &= ~__GFP_FS;
+
+	return lflags;
+}
+
+static __inline void *
+kmem_alloc(size_t size, int flags)
+{
+	if (unlikely(MAX_SLAB_SIZE < size))
+		/* Avoid doing filesystem sensitive stuff to get this */
+		return __vmalloc(size, kmem_flags_convert(flags), PAGE_KERNEL);
+	return kmalloc(size, kmem_flags_convert(flags));
+}
+
+static __inline void *
+kmem_zalloc(size_t size, int flags)
+{
+	void *ptr = kmem_alloc(size, flags);
+	if (likely(ptr != NULL))
+		memset(ptr, 0, size);
+	return ptr;
+}
+
+static __inline void
+kmem_free(void *ptr, size_t size)
+{
+	if (unlikely((unsigned long)ptr < VMALLOC_START ||
+		     (unsigned long)ptr >= VMALLOC_END))
+		kfree(ptr);
+	else
+		vfree(ptr);
+}
+
+static __inline void *
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+{
+	void *new = kmem_alloc(newsize, flags);
+
+	if (likely(ptr != NULL)) {
+		if (likely(new != NULL))
+			memcpy(new, ptr, min(oldsize, newsize));
+		kmem_free(ptr, oldsize);
+	}
+
+	return new;
+}
+
+static __inline kmem_zone_t *
+kmem_zone_init(int size, char *zone_name)
+{
+	return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
+}
+
+static __inline void *
+kmem_zone_alloc(kmem_zone_t *zone, int flags)
+{
+	return kmem_cache_alloc(zone, kmem_flags_convert(flags));
+}
+
+static __inline void *
+kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+{
+	void *ptr = kmem_zone_alloc(zone, flags);
+	if (likely(ptr != NULL))
+		memset(ptr, 0, kmem_cache_size(zone));
+	return ptr;
+}
+
+static __inline void
+kmem_zone_free(kmem_zone_t *zone, void *ptr)
+{
+	kmem_cache_free(zone, ptr);
+}
+
+typedef struct shrinker *kmem_shaker_t;
+typedef int (*kmem_shake_func_t)(int, unsigned int);
+
+static __inline kmem_shaker_t
+kmem_shake_register(kmem_shake_func_t sfunc)
+{
+	return set_shrinker(DEFAULT_SEEKS, sfunc);
+}
+
+static __inline void
+kmem_shake_deregister(kmem_shaker_t shrinker)
+{
+	remove_shrinker(shrinker);
+}
+
+static __inline int
+kmem_shake_allow(unsigned int gfp_mask)
+{
+	return (gfp_mask & __GFP_WAIT);
+}
+
+#endif /* __XFS_SUPPORT_KMEM_H__ */
diff -Nru a/fs/xfs/linux/mrlock.c b/fs/xfs/linux/mrlock.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/mrlock.c	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <asm/system.h>
+#include <linux/interrupt.h>
+#include <asm/current.h>
+
+#include "mrlock.h"
+
+
+#if USE_RW_WAIT_QUEUE_SPINLOCK
+# define wq_write_lock	write_lock
+#else
+# define wq_write_lock	spin_lock
+#endif
+
+/*
+ * We don't seem to need lock_type (only one supported), name, or
+ * sequence. But, XFS will pass it so let's leave them here for now.
+ */
+/* ARGSUSED */
+void
+mrlock_init(mrlock_t *mrp, int lock_type, char *name, long sequence)
+{
+	mrp->mr_count = 0;
+	mrp->mr_reads_waiting = 0;
+	mrp->mr_writes_waiting = 0;
+	init_waitqueue_head(&mrp->mr_readerq);
+	init_waitqueue_head(&mrp->mr_writerq);
+	mrp->mr_lock = SPIN_LOCK_UNLOCKED;
+}
+
+/*
+ * Macros to lock/unlock the mrlock_t.
+ */
+
+#define MRLOCK(m)		spin_lock(&(m)->mr_lock);
+#define MRUNLOCK(m)		spin_unlock(&(m)->mr_lock);
+
+
+/*
+ * lock_wait should never be called in an interrupt thread.
+ *
+ * mrlocks can sleep (i.e. call schedule) and so they can't ever
+ * be called from an interrupt thread.
+ *
+ * threads that wake-up should also never be invoked from interrupt threads.
+ *
+ * But, waitqueue_lock is locked from interrupt threads - and we are
+ * called with interrupts disabled, so it is all OK.
+ */
+
+/* ARGSUSED */
+void
+lock_wait(wait_queue_head_t *q, spinlock_t *lock, int rw)
+{
+	DECLARE_WAITQUEUE( wait, current );
+
+	__set_current_state(TASK_UNINTERRUPTIBLE);
+
+	spin_lock(&q->lock);
+	if (rw) {
+		__add_wait_queue_tail(q, &wait);
+	} else {
+		__add_wait_queue(q, &wait);
+	}
+
+	spin_unlock(&q->lock);
+	spin_unlock(lock);
+
+	schedule();
+
+	spin_lock(&q->lock);
+	__remove_wait_queue(q, &wait);
+	spin_unlock(&q->lock);
+
+	spin_lock(lock);
+
+	/* return with lock held */
+}
+
+/* ARGSUSED */
+void
+mrfree(mrlock_t *mrp)
+{
+}
+
+/* ARGSUSED */
+void
+mrlock(mrlock_t *mrp, int type, int flags)
+{
+	if (type == MR_ACCESS)
+		mraccess(mrp);
+	else
+		mrupdate(mrp);
+}
+
+/* ARGSUSED */
+void
+mraccessf(mrlock_t *mrp, int flags)
+{
+	MRLOCK(mrp);
+	if(mrp->mr_writes_waiting > 0) {
+		mrp->mr_reads_waiting++;
+		lock_wait(&mrp->mr_readerq, &mrp->mr_lock, 0);
+		mrp->mr_reads_waiting--;
+	}
+	while (mrp->mr_count < 0) {
+		mrp->mr_reads_waiting++;
+		lock_wait(&mrp->mr_readerq, &mrp->mr_lock, 0);
+		mrp->mr_reads_waiting--;
+	}
+	mrp->mr_count++;
+	MRUNLOCK(mrp);
+}
+
+/* ARGSUSED */
+void
+mrupdatef(mrlock_t *mrp, int flags)
+{
+	MRLOCK(mrp);
+	while(mrp->mr_count) {
+		mrp->mr_writes_waiting++;
+		lock_wait(&mrp->mr_writerq, &mrp->mr_lock, 1);
+		mrp->mr_writes_waiting--;
+	}
+
+	mrp->mr_count = -1; /* writer on it */
+	MRUNLOCK(mrp);
+}
+
+int
+mrtryaccess(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+	/*
+	 * If anyone is waiting for update access or the lock is held for update
+	 * fail the request.
+	 */
+	if(mrp->mr_writes_waiting > 0 || mrp->mr_count < 0) {
+		MRUNLOCK(mrp);
+		return 0;
+	}
+	mrp->mr_count++;
+	MRUNLOCK(mrp);
+	return 1;
+}
+
+int
+mrtrypromote(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+
+	if(mrp->mr_count == 1) { /* We are the only thread with the lock */
+		mrp->mr_count = -1; /* writer on it */
+		MRUNLOCK(mrp);
+		return 1;
+	}
+
+	MRUNLOCK(mrp);
+	return 0;
+}
+
+int
+mrtryupdate(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+
+	if(mrp->mr_count) {
+		MRUNLOCK(mrp);
+		return 0;
+	}
+
+	mrp->mr_count = -1; /* writer on it */
+	MRUNLOCK(mrp);
+	return 1;
+}
+
+static __inline__ void mrwake(mrlock_t *mrp)
+{
+	/*
+	 * First, if the count is now 0, we need to wake-up anyone waiting.
+	 */
+	if (!mrp->mr_count) {
+		if (mrp->mr_writes_waiting) {	/* Wake-up first writer waiting */
+			wake_up(&mrp->mr_writerq);
+		} else if (mrp->mr_reads_waiting) {	/* Wakeup any readers waiting */
+			wake_up(&mrp->mr_readerq);
+		}
+	}
+}
+
+void
+mraccunlock(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+	mrp->mr_count--;
+	mrwake(mrp);
+	MRUNLOCK(mrp);
+}
+
+void
+mrunlock(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+	if (mrp->mr_count < 0) {
+		mrp->mr_count = 0;
+	} else {
+		mrp->mr_count--;
+	}
+	mrwake(mrp);
+	MRUNLOCK(mrp);
+}
+
+int
+ismrlocked(mrlock_t *mrp, int type)	/* No need to lock since info can change */
+{
+	if (type == MR_ACCESS)
+		return (mrp->mr_count > 0); /* Read lock */
+	else if (type == MR_UPDATE)
+		return (mrp->mr_count < 0); /* Write lock */
+	else if (type == (MR_UPDATE | MR_ACCESS))
+		return (mrp->mr_count);	/* Any type of lock held */
+	else /* Any waiters */
+		return (mrp->mr_reads_waiting | mrp->mr_writes_waiting);
+}
+
+/*
+ * Demote from update to access. We better be the only thread with the
+ * lock in update mode so it should be easy to set to 1.
+ * Wake-up any readers waiting.
+ */
+
+void
+mrdemote(mrlock_t *mrp)
+{
+	MRLOCK(mrp);
+	mrp->mr_count = 1;
+	if (mrp->mr_reads_waiting) {	/* Wakeup all readers waiting */
+		wake_up(&mrp->mr_readerq);
+	}
+	MRUNLOCK(mrp);
+}
diff -Nru a/fs/xfs/linux/mrlock.h b/fs/xfs/linux/mrlock.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/mrlock.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MRLOCK_H__
+#define __XFS_SUPPORT_MRLOCK_H__
+
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/*
+ * Implement mrlocks on Linux that work for XFS.
+ *
+ * These are sleep locks and not spinlocks. If one wants read/write spinlocks,
+ * use read_lock, write_lock, ... see spinlock.h.
+ */
+
+typedef struct mrlock_s {
+	int			mr_count;
+	unsigned short		mr_reads_waiting;
+	unsigned short		mr_writes_waiting;
+	wait_queue_head_t	mr_readerq;
+	wait_queue_head_t	mr_writerq;
+	spinlock_t		mr_lock;
+} mrlock_t;
+
+#define MR_ACCESS	1
+#define MR_UPDATE	2
+
+#define MRLOCK_BARRIER		0x1
+#define MRLOCK_ALLOW_EQUAL_PRI	0x8
+
+/*
+ * mraccessf/mrupdatef take flags to be passed in while sleeping;
+ * only PLTWAIT is currently supported.
+ */
+
+extern void	mraccessf(mrlock_t *, int);
+extern void	mrupdatef(mrlock_t *, int);
+extern void     mrlock(mrlock_t *, int, int);
+extern void     mrunlock(mrlock_t *);
+extern void     mraccunlock(mrlock_t *);
+extern int      mrtryupdate(mrlock_t *);
+extern int      mrtryaccess(mrlock_t *);
+extern int	mrtrypromote(mrlock_t *);
+extern void     mrdemote(mrlock_t *);
+
+extern int	ismrlocked(mrlock_t *, int);
+extern void     mrlock_init(mrlock_t *, int type, char *name, long sequence);
+extern void     mrfree(mrlock_t *);
+
+#define mrinit(mrp, name)	mrlock_init(mrp, MRLOCK_BARRIER, name, -1)
+#define mraccess(mrp)		mraccessf(mrp, 0) /* grab for READ/ACCESS */
+#define mrupdate(mrp)		mrupdatef(mrp, 0) /* grab for WRITE/UPDATE */
+#define mrislocked_access(mrp)	((mrp)->mr_count > 0)
+#define mrislocked_update(mrp)	((mrp)->mr_count < 0)
+
+#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff -Nru a/fs/xfs/linux/mutex.h b/fs/xfs/linux/mutex.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/mutex.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_MUTEX_H__
+#define __XFS_SUPPORT_MUTEX_H__
+
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+
+/*
+ * Map the mutex'es from IRIX to Linux semaphores.
+ *
+ * Destroy just simply initializes to -99 which should block all other
+ * callers.
+ */
+#define MUTEX_DEFAULT		0x0
+typedef struct semaphore	mutex_t;
+
+#define mutex_init(lock, type, name)		sema_init(lock, 1)
+#define mutex_destroy(lock)			sema_init(lock, -99)
+#define mutex_lock(lock, num)			down(lock)
+#define mutex_trylock(lock)			(down_trylock(lock) ? 0 : 1)
+#define mutex_unlock(lock)			up(lock)
+
+#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff -Nru a/fs/xfs/linux/sema.h b/fs/xfs/linux/sema.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/sema.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SEMA_H__
+#define __XFS_SUPPORT_SEMA_H__
+
+#include <linux/time.h>
+#include <linux/wait.h>
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+/*
+ * sema_t structure just maps to struct semaphore in Linux kernel.
+ */
+
+typedef struct semaphore sema_t;
+
+#define init_sema(sp, val, c, d)	sema_init(sp, val)
+#define initsema(sp, val)		sema_init(sp, val)
+#define initnsema(sp, val, name)	sema_init(sp, val)
+#define psema(sp, b)			down(sp)
+#define vsema(sp)			up(sp)
+#define valusema(sp)			(atomic_read(&(sp)->count))
+#define freesema(sema)
+
+/*
+ * Map cpsema (try to get the sema) to down_trylock. We need to switch
+ * the return values since cpsema returns 1 (acquired) 0 (failed) and
+ * down_trylock returns the reverse 0 (acquired) 1 (failed).
+ */
+
+#define cpsema(sp)			(down_trylock(sp) ? 0 : 1)
+
+/*
+ * Didn't do cvsema(sp). Not sure how to map this to up/down/...
+ * It does a vsema if the values is < 0 other wise nothing.
+ */
+
+#endif /* __XFS_SUPPORT_SEMA_H__ */
diff -Nru a/fs/xfs/linux/spin.h b/fs/xfs/linux/spin.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/spin.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SPIN_H__
+#define __XFS_SUPPORT_SPIN_H__
+
+#include <linux/sched.h>	/* preempt needs this */
+#include <linux/spinlock.h>
+
+/*
+ * Map lock_t from IRIX to Linux spinlocks.
+ *
+ * Note that linux turns on/off spinlocks depending on CONFIG_SMP.
+ * We don't need to worry about SMP or not here.
+ */
+
+#define SPLDECL(s)		unsigned long s
+
+typedef spinlock_t lock_t;
+
+#define spinlock_init(lock, name)	spin_lock_init(lock)
+#define	spinlock_destroy(lock)
+
+static inline unsigned long mutex_spinlock(lock_t *lock)
+{
+	spin_lock(lock);
+	return 0;
+}
+
+/*ARGSUSED*/
+static inline void mutex_spinunlock(lock_t *lock, unsigned long s)
+{
+	spin_unlock(lock);
+}
+
+static inline void nested_spinlock(lock_t *lock)
+{
+	spin_lock(lock);
+}
+
+static inline void nested_spinunlock(lock_t *lock)
+{
+	spin_unlock(lock);
+}
+
+#endif /* __XFS_SUPPORT_SPIN_H__ */
diff -Nru a/fs/xfs/linux/sv.h b/fs/xfs/linux/sv.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/sv.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_SV_H__
+#define __XFS_SUPPORT_SV_H__
+
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+/*
+ * Synchronisation variables.
+ *
+ * (Parameters "pri", "svf" and "rts" are not implemented)
+ */
+
+typedef struct sv_s {
+	wait_queue_head_t waiters;
+} sv_t;
+
+#define SV_FIFO		0x0		/* sv_t is FIFO type */
+#define SV_LIFO		0x2		/* sv_t is LIFO type */
+#define SV_PRIO		0x4		/* sv_t is PRIO type */
+#define SV_KEYED	0x6		/* sv_t is KEYED type */
+#define SV_DEFAULT      SV_FIFO
+
+
+static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
+			     unsigned long timeout)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue_exclusive(&sv->waiters, &wait);
+	__set_current_state(state);
+	spin_unlock(lock);
+
+	schedule_timeout(timeout);
+
+	remove_wait_queue(&sv->waiters, &wait);
+}
+
+#define init_sv(sv,type,name,flag) \
+	init_waitqueue_head(&(sv)->waiters)
+#define sv_init(sv,flag,name) \
+	init_waitqueue_head(&(sv)->waiters)
+#define sv_destroy(sv) \
+	/*NOTHING*/
+#define sv_wait(sv, pri, lock, s) \
+	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_wait_sig(sv, pri, lock, s)   \
+	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
+#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
+	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
+	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
+#define sv_signal(sv) \
+	wake_up(&(sv)->waiters)
+#define sv_broadcast(sv) \
+	wake_up_all(&(sv)->waiters)
+
+#endif /* __XFS_SUPPORT_SV_H__ */
diff -Nru a/fs/xfs/linux/time.h b/fs/xfs/linux/time.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/time.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_SUPPORT_TIME_H__
+#define __XFS_SUPPORT_TIME_H__
+
+#include <linux/sched.h>
+#include <linux/time.h>
+
+typedef struct timespec timespec_t;
+
+static inline void delay(long ticks)
+{
+	current->state = TASK_UNINTERRUPTIBLE;
+	schedule_timeout(ticks);
+}
+
+static inline void nanotime(struct timespec *tvp)
+{
+	*tvp = CURRENT_TIME;
+}
+
+#endif /* __XFS_SUPPORT_TIME_H__ */
diff -Nru a/fs/xfs/linux/xfs_behavior.c b/fs/xfs/linux/xfs_behavior.c
--- a/fs/xfs/linux/xfs_behavior.c	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- *
- */
-#include "xfs.h"
-
-/*
- * Source file used to associate/disassociate behaviors with virtualized
- * objects.  See xfs_behavior.h for more information about behaviors, etc.
- *
- * The implementation is split between functions in this file and macros
- * in xfs_behavior.h.
- */
-
-/*
- * Insert a new behavior descriptor into a behavior chain.
- *
- * The behavior chain is ordered based on the 'position' number which
- * lives in the first field of the ops vector (higher numbers first).
- *
- * Attemps to insert duplicate ops result in an EINVAL return code.
- * Otherwise, return 0 to indicate success.
- */
-int
-bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
-{
-	bhv_desc_t	*curdesc, *prev;
-	int		position;
-
-	/*
-	 * Validate the position value of the new behavior.
-	 */
-	position = BHV_POSITION(bdp);
-	ASSERT(position >= BHV_POSITION_BASE && position <= BHV_POSITION_TOP);
-
-	/*
-	 * Find location to insert behavior.  Check for duplicates.
-	 */
-	prev = NULL;
-	for (curdesc = bhp->bh_first;
-	     curdesc != NULL;
-	     curdesc = curdesc->bd_next) {
-
-		/* Check for duplication. */
-		if (curdesc->bd_ops == bdp->bd_ops) {
-			ASSERT(0);
-			return EINVAL;
-		}
-
-		/* Find correct position */
-		if (position >= BHV_POSITION(curdesc)) {
-			ASSERT(position != BHV_POSITION(curdesc));
-			break;		/* found it */
-		}
-
-		prev = curdesc;
-	}
-
-	if (prev == NULL) {
-		/* insert at front of chain */
-		bdp->bd_next = bhp->bh_first;
-		bhp->bh_first = bdp;
-	} else {
-		/* insert after prev */
-		bdp->bd_next = prev->bd_next;
-		prev->bd_next = bdp;
-	}
-
-	return 0;
-}
-
-/*
- * Remove a behavior descriptor from a position in a behavior chain;
- * the postition is guaranteed not to be the first position.
- * Should only be called by the bhv_remove() macro.
- */
-void
-bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp)
-{
-	bhv_desc_t	*curdesc, *prev;
-
-	ASSERT(bhp->bh_first != NULL);
-	ASSERT(bhp->bh_first->bd_next != NULL);
-
-	prev = bhp->bh_first;
-	for (curdesc = bhp->bh_first->bd_next;
-	     curdesc != NULL;
-	     curdesc = curdesc->bd_next) {
-
-		if (curdesc == bdp)
-			break;		/* found it */
-		prev = curdesc;
-	}
-
-	ASSERT(curdesc == bdp);
-	prev->bd_next = bdp->bd_next;	/* remove from after prev */
-}
-
-/*
- * Look for a specific ops vector on the specified behavior chain.
- * Return the associated behavior descriptor.  Or NULL, if not found.
- */
-bhv_desc_t *
-bhv_lookup(bhv_head_t *bhp, void *ops)
-{
-	bhv_desc_t	*curdesc;
-
-	for (curdesc = bhp->bh_first;
-	     curdesc != NULL;
-	     curdesc = curdesc->bd_next) {
-
-		if (curdesc->bd_ops == ops)
-			return curdesc;
-	}
-
-	return NULL;
-}
-
-/*
- * Looks for the first behavior within a specified range of positions.
- * Return the associated behavior descriptor.  Or NULL, if none found.
- */
-bhv_desc_t *
-bhv_lookup_range(bhv_head_t *bhp, int low, int high)
-{
-	bhv_desc_t	*curdesc;
-
-	for (curdesc = bhp->bh_first;
-	     curdesc != NULL;
-	     curdesc = curdesc->bd_next) {
-
-		int	position = BHV_POSITION(curdesc);
-
-		if (position <= high) {
-			if (position >= low)
-				return curdesc;
-			return NULL;
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Return the base behavior in the chain, or NULL if the chain
- * is empty.
- *
- * The caller has not read locked the behavior chain, so acquire the
- * lock before traversing the chain.
- */
-bhv_desc_t *
-bhv_base(bhv_head_t *bhp)
-{
-	bhv_desc_t	*curdesc;
-
-	for (curdesc = bhp->bh_first;
-	     curdesc != NULL;
-	     curdesc = curdesc->bd_next) {
-
-		if (curdesc->bd_next == NULL) {
-			return curdesc;
-		}
-	}
-
-	return NULL;
-}
-
-void
-bhv_head_init(
-	bhv_head_t *bhp,
-	char *name)
-{
-	bhp->bh_first = NULL;
-}
-
-void
-bhv_insert_initial(
-	bhv_head_t *bhp,
-	bhv_desc_t *bdp)
-{
-	ASSERT(bhp->bh_first == NULL);
-	(bhp)->bh_first = bdp;
-}
-
-void
-bhv_head_destroy(
-	bhv_head_t *bhp)
-{
-	ASSERT(bhp->bh_first == NULL);
-}
diff -Nru a/fs/xfs/linux/xfs_behavior.h b/fs/xfs/linux/xfs_behavior.h
--- a/fs/xfs/linux/xfs_behavior.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,204 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_BEHAVIOR_H__
-#define __XFS_BEHAVIOR_H__
-
-/*
- * Header file used to associate behaviors with virtualized objects.
- *
- * A virtualized object is an internal, virtualized representation of
- * OS entities such as persistent files, processes, or sockets.  Examples
- * of virtualized objects include vnodes, vprocs, and vsockets.  Often
- * a virtualized object is referred to simply as an "object."
- *
- * A behavior is essentially an implementation layer associated with
- * an object.  Multiple behaviors for an object are chained together,
- * the order of chaining determining the order of invocation.  Each
- * behavior of a given object implements the same set of interfaces
- * (e.g., the VOP interfaces).
- *
- * Behaviors may be dynamically inserted into an object's behavior chain,
- * such that the addition is transparent to consumers that already have
- * references to the object.  Typically, a given behavior will be inserted
- * at a particular location in the behavior chain.  Insertion of new
- * behaviors is synchronized with operations-in-progress (oip's) so that
- * the oip's always see a consistent view of the chain.
- *
- * The term "interpostion" is used to refer to the act of inserting
- * a behavior such that it interposes on (i.e., is inserted in front
- * of) a particular other behavior.  A key example of this is when a
- * system implementing distributed single system image wishes to
- * interpose a distribution layer (providing distributed coherency)
- * in front of an object that is otherwise only accessed locally.
- *
- * Note that the traditional vnode/inode combination is simply a virtualized
- * object that has exactly one associated behavior.
- *
- * Behavior synchronization is logic which is necessary under certain
- * circumstances that there is no conflict between ongoing operations
- * traversing the behavior chain and those dunamically modifying the
- * behavior chain.  Because behavior synchronization adds extra overhead
- * to virtual operation invocation, we want to restrict, as much as
- * we can, the requirement for this extra code, to those situations
- * in which it is truly necessary.
- *
- * Behavior synchronization is needed whenever there's at least one class
- * of object in the system for which:
- * 1) multiple behaviors for a given object are supported,
- * -- AND --
- * 2a) insertion of a new behavior can happen dynamically at any time during
- *     the life of an active object,
- *	-- AND --
- *	3a) insertion of a new behavior needs to synchronize with existing
- *	    ops-in-progress.
- *	-- OR --
- *	3b) multiple different behaviors can be dynamically inserted at
- *	    any time during the life of an active object
- *	-- OR --
- *	3c) removal of a behavior can occur at any time during the life of
- *	    an active object.
- * -- OR --
- * 2b) removal of a behavior can occur at any time during the life of an
- *     active object
- *
- */
-
-struct bhv_head_lock;
-
-/*
- * Behavior head.  Head of the chain of behaviors.
- * Contained within each virtualized object data structure.
- */
-typedef struct bhv_head {
-	struct bhv_desc *bh_first;	/* first behavior in chain */
-	struct bhv_head_lock *bh_lockp;	/* pointer to lock info struct */
-} bhv_head_t;
-
-/*
- * Behavior descriptor.	 Descriptor associated with each behavior.
- * Contained within the behavior's private data structure.
- */
-typedef struct bhv_desc {
-	void		*bd_pdata;	/* private data for this behavior */
-	void		*bd_vobj;	/* virtual object associated with */
-	void		*bd_ops;	/* ops for this behavior */
-	struct bhv_desc *bd_next;	/* next behavior in chain */
-} bhv_desc_t;
-
-/*
- * Behavior identity field.  A behavior's identity determines the position
- * where it lives within a behavior chain, and it's always the first field
- * of the behavior's ops vector. The optional id field further identifies the
- * subsystem responsible for the behavior.
- */
-typedef struct bhv_identity {
-	__u16	bi_id;		/* owning subsystem id */
-	__u16	bi_position;	/* position in chain */
-} bhv_identity_t;
-
-typedef bhv_identity_t bhv_position_t;
-
-#define BHV_IDENTITY_INIT(id,pos)	{id, pos}
-#define BHV_IDENTITY_INIT_POSITION(pos) BHV_IDENTITY_INIT(0, pos)
-
-/*
- * Define boundaries of position values.
- */
-#define BHV_POSITION_INVALID	0	/* invalid position number */
-#define BHV_POSITION_BASE	1	/* base (last) implementation layer */
-#define BHV_POSITION_TOP	63	/* top (first) implementation layer */
-
-/*
- * Plumbing macros.
- */
-#define BHV_HEAD_FIRST(bhp)	(ASSERT((bhp)->bh_first), (bhp)->bh_first)
-#define BHV_NEXT(bdp)		(ASSERT((bdp)->bd_next), (bdp)->bd_next)
-#define BHV_NEXTNULL(bdp)	((bdp)->bd_next)
-#define BHV_VOBJ(bdp)		(ASSERT((bdp)->bd_vobj), (bdp)->bd_vobj)
-#define BHV_VOBJNULL(bdp)	((bdp)->bd_vobj)
-#define BHV_PDATA(bdp)		(bdp)->bd_pdata
-#define BHV_OPS(bdp)		(bdp)->bd_ops
-#define BHV_IDENTITY(bdp)	((bhv_identity_t *)(bdp)->bd_ops)
-#define BHV_POSITION(bdp)	(BHV_IDENTITY(bdp)->bi_position)
-
-extern void bhv_head_init(bhv_head_t *, char *);
-extern void bhv_head_destroy(bhv_head_t *);
-extern int  bhv_insert(bhv_head_t *, bhv_desc_t *);
-extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *);
-
-/*
- * Initialize a new behavior descriptor.
- * Arguments:
- *   bdp - pointer to behavior descriptor
- *   pdata - pointer to behavior's private data
- *   vobj - pointer to associated virtual object
- *   ops - pointer to ops for this behavior
- */
-#define bhv_desc_init(bdp, pdata, vobj, ops)		\
- {							\
-	(bdp)->bd_pdata = pdata;			\
-	(bdp)->bd_vobj = vobj;				\
-	(bdp)->bd_ops = ops;				\
-	(bdp)->bd_next = NULL;				\
- }
-
-/*
- * Remove a behavior descriptor from a behavior chain.
- */
-#define bhv_remove(bhp, bdp)				\
- {							\
-	if ((bhp)->bh_first == (bdp)) {			\
-		/*					\
-		* Remove from front of chain.		\
-		* Atomic wrt oip's.			\
-		*/					\
-	       (bhp)->bh_first = (bdp)->bd_next;	\
-	} else {					\
-	       /* remove from non-front of chain */	\
-	       bhv_remove_not_first(bhp, bdp);		\
-	}						\
-	(bdp)->bd_vobj = NULL;				\
- }
-
-/*
- * Behavior module prototypes.
- */
-extern void		bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp);
-extern bhv_desc_t *	bhv_lookup(bhv_head_t *bhp, void *ops);
-extern bhv_desc_t *	bhv_lookup_range(bhv_head_t *bhp, int low, int high);
-extern bhv_desc_t *	bhv_base(bhv_head_t *bhp);
-
-/* No bhv locking on Linux */
-#define bhv_lookup_unlocked	bhv_lookup
-#define bhv_base_unlocked	bhv_base
-
-#endif /* __XFS_BEHAVIOR_H__ */
diff -Nru a/fs/xfs/linux/xfs_buf.c b/fs/xfs/linux/xfs_buf.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/xfs_buf.c	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,2058 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ *	page_buf.c
+ *
+ *	The page_buf module provides an abstract buffer cache model on top of
+ *	the Linux page cache.  Cached metadata blocks for a file system are
+ *	hashed to the inode for the block device.  The page_buf module
+ *	assembles buffer (page_buf_t) objects on demand to aggregate such
+ *	cached pages for I/O.
+ *
+ *
+ *      Written by Steve Lord, Jim Mostek, Russell Cattelan
+ *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
+ *
+ */
+
+#include <linux/stddef.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+#include <linux/vmalloc.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/workqueue.h>
+#include <linux/suspend.h>
+#include <linux/percpu.h>
+
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include "kmem.h"
+
+#include "xfs_types.h"
+#include "xfs_cred.h"
+#include "xfs_lrw.h"
+#include "xfs_buf.h"
+
+#define BBSHIFT		9
+#define BN_ALIGN_MASK	((1 << (PAGE_CACHE_SHIFT - BBSHIFT)) - 1)
+
+#ifndef GFP_READAHEAD
+#define GFP_READAHEAD	(__GFP_NOWARN|__GFP_NORETRY)
+#endif
+
+/*
+ * File wide globals
+ */
+
+STATIC kmem_cache_t *pagebuf_cache;
+STATIC void pagebuf_daemon_wakeup(void);
+STATIC void pagebuf_delwri_queue(page_buf_t *, int);
+STATIC struct workqueue_struct *pagebuf_logio_workqueue;
+STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
+
+/*
+ * Pagebuf module configuration parameters, exported via
+ * /proc/sys/vm/pagebuf
+ */
+
+typedef struct pb_sysctl_val {
+	int	min;
+	int	val;
+	int	max;
+} pb_sysctl_val_t;
+
+struct {
+	pb_sysctl_val_t	flush_interval;	/* interval between runs of the
+					 * delwri flush daemon.  */
+	pb_sysctl_val_t	age_buffer;	/* time for buffer to age before
+					 * we flush it.  */
+	pb_sysctl_val_t	stats_clear;	/* clear the pagebuf stats */
+	pb_sysctl_val_t	debug;		/* debug tracing on or off */
+} pb_params = {
+			  /*	MIN	DFLT	MAX	*/
+	.flush_interval	= {	HZ/2,	HZ,	30*HZ	},
+	.age_buffer	= {	1*HZ,	15*HZ,	300*HZ	},
+	.stats_clear	= {	0,	0,	1	},
+	.debug		= {	0,	0,	1	},
+};
+
+enum {
+	PB_FLUSH_INT = 1,
+	PB_FLUSH_AGE = 2,
+	PB_STATS_CLEAR = 3,
+	PB_DEBUG = 4,
+};
+
+/*
+ * Pagebuf statistics variables
+ */
+
+struct pbstats {
+	u_int32_t	pb_get;
+	u_int32_t	pb_create;
+	u_int32_t	pb_get_locked;
+	u_int32_t	pb_get_locked_waited;
+	u_int32_t	pb_busy_locked;
+	u_int32_t	pb_miss_locked;
+	u_int32_t	pb_page_retries;
+	u_int32_t	pb_page_found;
+	u_int32_t	pb_get_read;
+} pbstats;
+DEFINE_PER_CPU(struct pbstats, pbstats);
+
+/* We don't disable preempt, not too worried about poking the
+ * wrong cpu's stat for now */
+#define PB_STATS_INC(count)	(__get_cpu_var(pbstats).count++)
+
+/*
+ * Pagebuf debugging
+ */
+
+#ifdef PAGEBUF_TRACE
+void
+pagebuf_trace(
+	page_buf_t	*pb,
+	char		*id,
+	void		*data,
+	void		*ra)
+{
+	if (!pb_params.debug.val)
+		return;
+	ktrace_enter(pagebuf_trace_buf,
+		pb, id,
+		(void *)(unsigned long)pb->pb_flags,
+		(void *)(unsigned long)pb->pb_hold.counter,
+		(void *)(unsigned long)pb->pb_sema.count.counter,
+		(void *)current,
+		data, ra,
+		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
+		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
+		(void *)(unsigned long)pb->pb_buffer_length,
+		NULL, NULL, NULL, NULL, NULL);
+}
+ktrace_t *pagebuf_trace_buf;
+#define PAGEBUF_TRACE_SIZE	4096
+#define PB_TRACE(pb, id, data)	\
+	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
+#else
+#define PB_TRACE(pb, id, data)	do { } while (0)
+#endif
+
+#ifdef PAGEBUF_LOCK_TRACKING
+# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)
+# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)
+# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)
+#else
+# define PB_SET_OWNER(pb)	do { } while (0)
+# define PB_CLEAR_OWNER(pb)	do { } while (0)
+# define PB_GET_OWNER(pb)	do { } while (0)
+#endif
+
+/*
+ * Pagebuf allocation / freeing.
+ */
+
+#define pb_to_gfp(flags) \
+	(((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \
+	 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)
+
+#define pb_to_km(flags) \
+	 (((flags) & PBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
+
+
+#define pagebuf_allocate(flags) \
+	kmem_zone_alloc(pagebuf_cache, pb_to_km(flags))
+#define pagebuf_deallocate(pb) \
+	kmem_zone_free(pagebuf_cache, (pb));
+
+/*
+ * Pagebuf hashing
+ */
+
+#define NBITS	8
+#define NHASH	(1<<NBITS)
+
+typedef struct {
+	struct list_head	pb_hash;
+	spinlock_t		pb_hash_lock;
+} pb_hash_t;
+
+STATIC pb_hash_t	pbhash[NHASH];
+#define pb_hash(pb)	&pbhash[pb->pb_hash_index]
+
+STATIC int
+_bhash(
+	struct block_device *bdev,
+	loff_t		base)
+{
+	int		bit, hval;
+
+	base >>= 9;
+	base ^= (unsigned long)bdev / L1_CACHE_BYTES;
+	for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {
+		hval ^= (int)base & (NHASH-1);
+		base >>= NBITS;
+	}
+	return hval;
+}
+
+/*
+ * Mapping of multi-page buffers into contiguous virtual space
+ */
+
+STATIC void *pagebuf_mapout_locked(page_buf_t *);
+
+typedef struct a_list {
+	void		*vm_addr;
+	struct a_list	*next;
+} a_list_t;
+
+STATIC a_list_t		*as_free_head;
+STATIC int		as_list_len;
+STATIC spinlock_t	as_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * Try to batch vunmaps because they are costly.
+ */
+STATIC void
+free_address(
+	void		*addr)
+{
+	a_list_t	*aentry;
+
+	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);
+	if (aentry) {
+		spin_lock(&as_lock);
+		aentry->next = as_free_head;
+		aentry->vm_addr = addr;
+		as_free_head = aentry;
+		as_list_len++;
+		spin_unlock(&as_lock);
+	} else {
+		vunmap(addr);
+	}
+}
+
+STATIC void
+purge_addresses(void)
+{
+	a_list_t	*aentry, *old;
+
+	if (as_free_head == NULL)
+		return;
+
+	spin_lock(&as_lock);
+	aentry = as_free_head;
+	as_free_head = NULL;
+	as_list_len = 0;
+	spin_unlock(&as_lock);
+
+	while ((old = aentry) != NULL) {
+		vunmap(aentry->vm_addr);
+		aentry = aentry->next;
+		kfree(old);
+	}
+}
+
+/*
+ *	Internal pagebuf object manipulation
+ */
+
+STATIC void
+_pagebuf_initialize(
+	page_buf_t		*pb,
+	pb_target_t		*target,
+	loff_t			range_base,
+	size_t			range_length,
+	page_buf_flags_t	flags)
+{
+	/*
+	 * We don't want certain flags to appear in pb->pb_flags.
+	 */
+	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
+
+	memset(pb, 0, sizeof(page_buf_t));
+	atomic_set(&pb->pb_hold, 1);
+	init_MUTEX_LOCKED(&pb->pb_iodonesema);
+	INIT_LIST_HEAD(&pb->pb_list);
+	INIT_LIST_HEAD(&pb->pb_hash_list);
+	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
+	PB_SET_OWNER(pb);
+	pb->pb_target = target;
+	pb->pb_file_offset = range_base;
+	/*
+	 * Set buffer_length and count_desired to the same value initially.
+	 * IO routines should use count_desired, which will be the same in
+	 * most cases but may be reset (e.g. XFS recovery).
+	 */
+	pb->pb_buffer_length = pb->pb_count_desired = range_length;
+	pb->pb_flags = flags | PBF_NONE;
+	pb->pb_bn = PAGE_BUF_DADDR_NULL;
+	atomic_set(&pb->pb_pin_count, 0);
+	init_waitqueue_head(&pb->pb_waiters);
+
+	PB_STATS_INC(pb_create);
+	PB_TRACE(pb, "initialize", target);
+}
+
+/*
+ * Allocate a page array capable of holding a specified number
+ * of pages, and point the page buf at it.
+ */
+STATIC int
+_pagebuf_get_pages(
+	page_buf_t		*pb,
+	int			page_count,
+	page_buf_flags_t	flags)
+{
+	int			gpf_mask = pb_to_gfp(flags);
+
+	/* Make sure that we have a page list */
+	if (pb->pb_pages == NULL) {
+		pb->pb_offset = page_buf_poff(pb->pb_file_offset);
+		pb->pb_page_count = page_count;
+		if (page_count <= PB_PAGES) {
+			pb->pb_pages = pb->pb_page_array;
+		} else {
+			pb->pb_pages = kmalloc(sizeof(struct page *) *
+					page_count, gpf_mask);
+			if (pb->pb_pages == NULL)
+				return -ENOMEM;
+		}
+		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
+	}
+	return 0;
+}
+
+/*
+ * Walk a pagebuf releasing all the pages contained within it.
+ */
+STATIC inline void
+_pagebuf_freepages(
+	page_buf_t		*pb)
+{
+	int			buf_index;
+
+	for (buf_index = 0; buf_index < pb->pb_page_count; buf_index++) {
+		struct page	*page = pb->pb_pages[buf_index];
+
+		if (page) {
+			pb->pb_pages[buf_index] = NULL;
+			page_cache_release(page);
+		}
+	}
+}
+
+/*
+ *	pagebuf_free
+ *
+ *	pagebuf_free releases the specified buffer.  The modification
+ *	state of any associated pages is left unchanged.
+ */
+void
+pagebuf_free(
+	page_buf_t		*pb)
+{
+	PB_TRACE(pb, "free", 0);
+
+	if (pb->pb_flags & _PBF_LOCKABLE) {
+		 pb_hash_t	*hash = pb_hash(pb);
+
+		spin_lock(&hash->pb_hash_lock);
+		/*
+		 * Someone grabbed a reference while we weren't looking,
+		 * try again later.
+		 */
+		if (unlikely(atomic_read(&pb->pb_hold))) {
+			spin_unlock(&hash->pb_hash_lock);
+			return;
+		} else if (!list_empty(&pb->pb_hash_list))
+			list_del_init(&pb->pb_hash_list);
+		spin_unlock(&hash->pb_hash_lock);
+	}
+
+	/* release any virtual mapping */ ;
+	if (pb->pb_flags & _PBF_ADDR_ALLOCATED) {
+		void *vaddr = pagebuf_mapout_locked(pb);
+		if (vaddr) {
+			free_address(vaddr);
+		}
+	}
+
+	if (pb->pb_flags & _PBF_MEM_ALLOCATED) {
+		if (pb->pb_pages) {
+			/* release the pages in the address list */
+			if ((pb->pb_pages[0]) &&
+			    (pb->pb_flags & _PBF_MEM_SLAB)) {
+				kfree(pb->pb_addr);
+			} else {
+				_pagebuf_freepages(pb);
+			}
+			if (pb->pb_pages != pb->pb_page_array)
+				kfree(pb->pb_pages);
+			pb->pb_pages = NULL;
+		}
+		pb->pb_flags &= ~(_PBF_MEM_ALLOCATED|_PBF_MEM_SLAB);
+	}
+
+	pagebuf_deallocate(pb);
+}
+
+/*
+ *	_pagebuf_lookup_pages
+ *
+ *	_pagebuf_lookup_pages finds all pages which match the buffer
+ *	in question and the range of file offsets supplied,
+ *	and builds the page list for the buffer, if the
+ *	page list is not already formed or if not all of the pages are
+ *	already in the list. Invalid pages (pages which have not yet been
+ *	read in from disk) are assigned for any pages which are not found.
+ */
+STATIC int
+_pagebuf_lookup_pages(
+	page_buf_t		*pb,
+	struct address_space	*aspace,
+	page_buf_flags_t	flags)
+{
+	loff_t			next_buffer_offset;
+	unsigned long		page_count, pi, index;
+	struct page		*page;
+	int			gfp_mask, retry_count = 5, rval = 0;
+	int			all_mapped, good_pages, nbytes;
+	unsigned int		blocksize, sectorshift;
+	size_t			size, offset;
+
+
+	/* For pagebufs where we want to map an address, do not use
+	 * highmem pages - so that we do not need to use kmap resources
+	 * to access the data.
+	 *
+	 * For pages where the caller has indicated there may be resource
+	 * contention (e.g. called from a transaction) do not flush
+	 * delalloc pages to obtain memory.
+	 */
+
+	if (flags & PBF_READ_AHEAD) {
+		gfp_mask = GFP_READAHEAD;
+		retry_count = 0;
+	} else if (flags & PBF_DONT_BLOCK) {
+		gfp_mask = GFP_NOFS;
+	} else if (flags & PBF_MAPPABLE) {
+		gfp_mask = GFP_KERNEL;
+	} else {
+		gfp_mask = GFP_HIGHUSER;
+	}
+
+	next_buffer_offset = pb->pb_file_offset + pb->pb_buffer_length;
+
+	good_pages = page_count = (page_buf_btoc(next_buffer_offset) -
+				   page_buf_btoct(pb->pb_file_offset));
+
+	if (pb->pb_flags & _PBF_ALL_PAGES_MAPPED) {
+		/* Bring pages forward in cache */
+		for (pi = 0; pi < page_count; pi++) {
+			mark_page_accessed(pb->pb_pages[pi]);
+		}
+		if ((flags & PBF_MAPPED) && !(pb->pb_flags & PBF_MAPPED)) {
+			all_mapped = 1;
+			goto mapit;
+		}
+		return 0;
+	}
+
+	/* Ensure pb_pages field has been initialised */
+	rval = _pagebuf_get_pages(pb, page_count, flags);
+	if (rval)
+		return rval;
+
+	rval = pi = 0;
+	blocksize = pb->pb_target->pbr_bsize;
+	sectorshift = pb->pb_target->pbr_sshift;
+	size = pb->pb_count_desired;
+	offset = pb->pb_offset;
+
+	/* Enter the pages in the page list */
+	index = (pb->pb_file_offset - pb->pb_offset) >> PAGE_CACHE_SHIFT;
+	for (all_mapped = 1; pi < page_count; pi++, index++) {
+		if (pb->pb_pages[pi] == 0) {
+		      retry:
+			page = find_or_create_page(aspace, index, gfp_mask);
+			if (!page) {
+				if (--retry_count > 0) {
+					PB_STATS_INC(pb_page_retries);
+					pagebuf_daemon_wakeup();
+					current->state = TASK_UNINTERRUPTIBLE;
+					schedule_timeout(10);
+					goto retry;
+				}
+				rval = -ENOMEM;
+				all_mapped = 0;
+				continue;
+			}
+			PB_STATS_INC(pb_page_found);
+			mark_page_accessed(page);
+			pb->pb_pages[pi] = page;
+		} else {
+			page = pb->pb_pages[pi];
+			lock_page(page);
+		}
+
+		nbytes = PAGE_CACHE_SIZE - offset;
+		if (nbytes > size)
+			nbytes = size;
+		size -= nbytes;
+
+		if (!PageUptodate(page)) {
+			if (blocksize == PAGE_CACHE_SIZE) {
+				if (flags & PBF_READ)
+					pb->pb_locked = 1;
+				good_pages--;
+			} else if (!PagePrivate(page)) {
+				unsigned long	i, range;
+
+				/*
+				 * In this case page->private holds a bitmap
+				 * of uptodate sectors within the page
+				 */
+				ASSERT(blocksize < PAGE_CACHE_SIZE);
+				range = (offset + nbytes) >> sectorshift;
+				for (i = offset >> sectorshift; i < range; i++)
+					if (!test_bit(i, &page->private))
+						break;
+				if (i != range)
+					good_pages--;
+			} else {
+				good_pages--;
+			}
+		}
+		offset = 0;
+	}
+
+	if (!pb->pb_locked) {
+		for (pi = 0; pi < page_count; pi++) {
+			if (pb->pb_pages[pi])
+				unlock_page(pb->pb_pages[pi]);
+		}
+	}
+
+mapit:
+	pb->pb_flags |= _PBF_MEM_ALLOCATED;
+	if (all_mapped) {
+		pb->pb_flags |= _PBF_ALL_PAGES_MAPPED;
+
+		/* A single page buffer is always mappable */
+		if (page_count == 1) {
+			pb->pb_addr = (caddr_t)
+				page_address(pb->pb_pages[0]) + pb->pb_offset;
+			pb->pb_flags |= PBF_MAPPED;
+		} else if (flags & PBF_MAPPED) {
+			if (as_list_len > 64)
+				purge_addresses();
+			pb->pb_addr = vmap(pb->pb_pages, page_count,
+					VM_MAP, PAGE_KERNEL);
+			if (pb->pb_addr == NULL)
+				return -ENOMEM;
+			pb->pb_addr += pb->pb_offset;
+			pb->pb_flags |= PBF_MAPPED | _PBF_ADDR_ALLOCATED;
+		}
+	}
+	/* If some pages were found with data in them
+	 * we are not in PBF_NONE state.
+	 */
+	if (good_pages != 0) {
+		pb->pb_flags &= ~(PBF_NONE);
+		if (good_pages != page_count) {
+			pb->pb_flags |= PBF_PARTIAL;
+		}
+	}
+
+	PB_TRACE(pb, "lookup_pages", (long)good_pages);
+
+	return rval;
+}
+
+/*
+ *	Finding and Reading Buffers
+ */
+
+/*
+ *	_pagebuf_find
+ *
+ *	Looks up, and creates if absent, a lockable buffer for
+ *	a given range of an inode.  The buffer is returned
+ *	locked.	 If other overlapping buffers exist, they are
+ *	released before the new buffer is created and locked,
+ *	which may imply that this call will block until those buffers
+ *	are unlocked.  No I/O is implied by this call.
+ */
+STATIC page_buf_t *
+_pagebuf_find(				/* find buffer for block	*/
+	pb_target_t		*target,/* target for block		*/
+	loff_t			ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/
+	page_buf_t		*new_pb)/* newly allocated buffer	*/
+{
+	loff_t			range_base;
+	size_t			range_length;
+	int			hval;
+	pb_hash_t		*h;
+	struct list_head	*p;
+	page_buf_t		*pb;
+	int			not_locked;
+
+	range_base = (ioff << BBSHIFT);
+	range_length = (isize << BBSHIFT);
+
+	/* Ensure we never do IOs smaller than the sector size */
+	BUG_ON(range_length < (1 << target->pbr_sshift));
+
+	/* Ensure we never do IOs that are not sector aligned */
+	BUG_ON(range_base & (loff_t)target->pbr_smask);
+
+	hval = _bhash(target->pbr_bdev, range_base);
+	h = &pbhash[hval];
+
+	spin_lock(&h->pb_hash_lock);
+	list_for_each(p, &h->pb_hash) {
+		pb = list_entry(p, page_buf_t, pb_hash_list);
+
+		if (pb->pb_target == target &&
+		    pb->pb_file_offset == range_base &&
+		    pb->pb_buffer_length == range_length &&
+		    atomic_read(&pb->pb_hold)) {
+			/* If we look at something bring it to the
+			 * front of the list for next time
+			 */
+			atomic_inc(&pb->pb_hold);
+			list_move(&pb->pb_hash_list, &h->pb_hash);
+			goto found;
+		}
+	}
+
+	/* No match found */
+	if (new_pb) {
+		_pagebuf_initialize(new_pb, target, range_base,
+				range_length, flags | _PBF_LOCKABLE);
+		new_pb->pb_hash_index = hval;
+		list_add(&new_pb->pb_hash_list, &h->pb_hash);
+	} else {
+		PB_STATS_INC(pb_miss_locked);
+	}
+
+	spin_unlock(&h->pb_hash_lock);
+	return (new_pb);
+
+found:
+	spin_unlock(&h->pb_hash_lock);
+
+	/* Attempt to get the semaphore without sleeping,
+	 * if this does not work then we need to drop the
+	 * spinlock and do a hard attempt on the semaphore.
+	 */
+	not_locked = down_trylock(&pb->pb_sema);
+	if (not_locked) {
+		if (!(flags & PBF_TRYLOCK)) {
+			/* wait for buffer ownership */
+			PB_TRACE(pb, "get_lock", 0);
+			pagebuf_lock(pb);
+			PB_STATS_INC(pb_get_locked_waited);
+		} else {
+			/* We asked for a trylock and failed, no need
+			 * to look at file offset and length here, we
+			 * know that this pagebuf at least overlaps our
+			 * pagebuf and is locked, therefore our buffer
+			 * either does not exist, or is this buffer
+			 */
+
+			pagebuf_rele(pb);
+			PB_STATS_INC(pb_busy_locked);
+			return (NULL);
+		}
+	} else {
+		/* trylock worked */
+		PB_SET_OWNER(pb);
+	}
+
+	if (pb->pb_flags & PBF_STALE)
+		pb->pb_flags &= PBF_MAPPABLE | \
+				PBF_MAPPED | \
+				_PBF_LOCKABLE | \
+				_PBF_ALL_PAGES_MAPPED | \
+				_PBF_ADDR_ALLOCATED | \
+				_PBF_MEM_ALLOCATED | \
+				_PBF_MEM_SLAB;
+	PB_TRACE(pb, "got_lock", 0);
+	PB_STATS_INC(pb_get_locked);
+	return (pb);
+}
+
+
+/*
+ *	pagebuf_find
+ *
+ *	pagebuf_find returns a buffer matching the specified range of
+ *	data for the specified target, if any of the relevant blocks
+ *	are in memory.  The buffer may have unallocated holes, if
+ *	some, but not all, of the blocks are in memory.  Even where
+ *	pages are present in the buffer, not all of every page may be
+ *	valid.
+ */
+page_buf_t *
+pagebuf_find(				/* find buffer for block	*/
+					/* if the block is in memory	*/
+	pb_target_t		*target,/* target for block		*/
+	loff_t			ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
+{
+	return _pagebuf_find(target, ioff, isize, flags, NULL);
+}
+
+/*
+ *	pagebuf_get
+ *
+ *	pagebuf_get assembles a buffer covering the specified range.
+ *	Some or all of the blocks in the range may be valid.  Storage
+ *	in memory for all portions of the buffer will be allocated,
+ *	although backing storage may not be.  If PBF_READ is set in
+ *	flags, pagebuf_iostart is called also.
+ */
+page_buf_t *
+pagebuf_get(				/* allocate a buffer		*/
+	pb_target_t		*target,/* target for buffer		*/
+	loff_t			ioff,	/* starting offset of range	*/
+	size_t			isize,	/* length of range		*/
+	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
+{
+	page_buf_t		*pb, *new_pb;
+	int			error;
+
+	new_pb = pagebuf_allocate(flags);
+	if (unlikely(!new_pb))
+		return (NULL);
+
+	pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
+	if (pb != new_pb) {
+		pagebuf_deallocate(new_pb);
+		if (unlikely(!pb))
+			return (NULL);
+	}
+
+	PB_STATS_INC(pb_get);
+
+	/* fill in any missing pages */
+	error = _pagebuf_lookup_pages(pb, pb->pb_target->pbr_mapping, flags);
+	if (unlikely(error)) {
+		pagebuf_free(pb);
+		return (NULL);
+	}
+
+	/*
+	 * Always fill in the block number now, the mapped cases can do
+	 * their own overlay of this later.
+	 */
+	pb->pb_bn = ioff;
+	pb->pb_count_desired = pb->pb_buffer_length;
+
+	if (flags & PBF_READ) {
+		if (PBF_NOT_DONE(pb)) {
+			PB_TRACE(pb, "get_read", (unsigned long)flags);
+			PB_STATS_INC(pb_get_read);
+			pagebuf_iostart(pb, flags);
+		} else if (flags & PBF_ASYNC) {
+			PB_TRACE(pb, "get_read_async", (unsigned long)flags);
+			/*
+			 * Read ahead call which is already satisfied,
+			 * drop the buffer
+			 */
+			if (flags & (PBF_LOCK | PBF_TRYLOCK))
+				pagebuf_unlock(pb);
+			pagebuf_rele(pb);
+			return NULL;
+		} else {
+			PB_TRACE(pb, "get_read_done", (unsigned long)flags);
+			/* We do not want read in the flags */
+			pb->pb_flags &= ~PBF_READ;
+		}
+	} else {
+		PB_TRACE(pb, "get_write", (unsigned long)flags);
+	}
+	return (pb);
+}
+
+/*
+ * Create a skeletal pagebuf (no pages associated with it).
+ */
+page_buf_t *
+pagebuf_lookup(
+	struct pb_target	*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	page_buf_t		*pb;
+
+	pb = pagebuf_allocate(flags);
+	if (pb) {
+		_pagebuf_initialize(pb, target, ioff, isize, flags);
+	}
+	return pb;
+}
+
+/*
+ * If we are not low on memory then do the readahead in a deadlock
+ * safe manner.
+ */
+void
+pagebuf_readahead(
+	pb_target_t		*target,
+	loff_t			ioff,
+	size_t			isize,
+	page_buf_flags_t	flags)
+{
+	struct backing_dev_info *bdi;
+
+	bdi = target->pbr_mapping->backing_dev_info;
+	if (bdi_read_congested(bdi))
+		return;
+	if (bdi_write_congested(bdi))
+		return;
+
+	flags |= (PBF_TRYLOCK|PBF_READ|PBF_ASYNC|PBF_MAPPABLE|PBF_READ_AHEAD);
+	pagebuf_get(target, ioff, isize, flags);
+}
+
+page_buf_t *
+pagebuf_get_empty(
+	size_t			len,
+	pb_target_t		*target)
+{
+	page_buf_t		*pb;
+
+	pb = pagebuf_allocate(_PBF_LOCKABLE);
+	if (pb)
+		_pagebuf_initialize(pb, target, 0, len, _PBF_LOCKABLE);
+	return pb;
+}
+
+static inline struct page *
+mem_to_page(
+	void			*addr)
+{
+	if (((unsigned long)addr < VMALLOC_START) ||
+	    ((unsigned long)addr >= VMALLOC_END)) {
+		return virt_to_page(addr);
+	} else {
+		return vmalloc_to_page(addr);
+	}
+}
+
+int
+pagebuf_associate_memory(
+	page_buf_t		*pb,
+	void			*mem,
+	size_t			len)
+{
+	int			rval;
+	int			i = 0;
+	size_t			ptr;
+	size_t			end, end_cur;
+	off_t			offset;
+	int			page_count;
+
+	page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+	offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
+	if (offset && (len > PAGE_CACHE_SIZE))
+		page_count++;
+
+	/* Free any previous set of page pointers */
+	if (pb->pb_pages && (pb->pb_pages != pb->pb_page_array)) {
+		kfree(pb->pb_pages);
+	}
+	pb->pb_pages = NULL;
+	pb->pb_addr = mem;
+
+	rval = _pagebuf_get_pages(pb, page_count, 0);
+	if (rval)
+		return rval;
+
+	pb->pb_offset = offset;
+	ptr = (size_t) mem & PAGE_CACHE_MASK;
+	end = PAGE_CACHE_ALIGN((size_t) mem + len);
+	end_cur = end;
+	/* set up first page */
+	pb->pb_pages[0] = mem_to_page(mem);
+
+	ptr += PAGE_CACHE_SIZE;
+	pb->pb_page_count = ++i;
+	while (ptr < end) {
+		pb->pb_pages[i] = mem_to_page((void *)ptr);
+		pb->pb_page_count = ++i;
+		ptr += PAGE_CACHE_SIZE;
+	}
+	pb->pb_locked = 0;
+
+	pb->pb_count_desired = pb->pb_buffer_length = len;
+	pb->pb_flags |= PBF_MAPPED;
+
+	return 0;
+}
+
+page_buf_t *
+pagebuf_get_no_daddr(
+	size_t			len,
+	pb_target_t		*target)
+{
+	int			rval;
+	void			*rmem = NULL;
+	page_buf_flags_t	flags = _PBF_LOCKABLE | PBF_FORCEIO;
+	page_buf_t		*pb;
+	size_t			tlen = 0;
+
+	if (unlikely(len > 0x20000))
+		return NULL;
+
+	pb = pagebuf_allocate(flags);
+	if (!pb)
+		return NULL;
+
+	_pagebuf_initialize(pb, target, 0, len, flags);
+
+	do {
+		if (tlen == 0) {
+			tlen = len; /* first time */
+		} else {
+			kfree(rmem); /* free the mem from the previous try */
+			tlen <<= 1; /* double the size and try again */
+		}
+		if ((rmem = kmalloc(tlen, GFP_KERNEL)) == 0) {
+			pagebuf_free(pb);
+			return NULL;
+		}
+	} while ((size_t)rmem != ((size_t)rmem & ~target->pbr_smask));
+
+	if ((rval = pagebuf_associate_memory(pb, rmem, len)) != 0) {
+		kfree(rmem);
+		pagebuf_free(pb);
+		return NULL;
+	}
+	/* otherwise pagebuf_free just ignores it */
+	pb->pb_flags |= (_PBF_MEM_ALLOCATED | _PBF_MEM_SLAB);
+	PB_CLEAR_OWNER(pb);
+	up(&pb->pb_sema);	/* Return unlocked pagebuf */
+
+	PB_TRACE(pb, "no_daddr", rmem);
+
+	return pb;
+}
+
+
+/*
+ *	pagebuf_hold
+ *
+ *	Increment reference count on buffer, to hold the buffer concurrently
+ *	with another thread which may release (free) the buffer asynchronously.
+ *
+ *	Must hold the buffer already to call this function.
+ */
+void
+pagebuf_hold(
+	page_buf_t		*pb)
+{
+	atomic_inc(&pb->pb_hold);
+	PB_TRACE(pb, "hold", 0);
+}
+
+/*
+ *	pagebuf_rele
+ *
+ *	pagebuf_rele releases a hold on the specified buffer.  If the
+ *	the hold count is 1, pagebuf_rele calls pagebuf_free.
+ */
+void
+pagebuf_rele(
+	page_buf_t		*pb)
+{
+	PB_TRACE(pb, "rele", pb->pb_relse);
+
+	if (atomic_dec_and_test(&pb->pb_hold)) {
+		int		do_free = 1;
+
+		if (pb->pb_relse) {
+			atomic_inc(&pb->pb_hold);
+			(*(pb->pb_relse)) (pb);
+			do_free = 0;
+		}
+		if (pb->pb_flags & PBF_DELWRI) {
+			pb->pb_flags |= PBF_ASYNC;
+			atomic_inc(&pb->pb_hold);
+			pagebuf_delwri_queue(pb, 0);
+			do_free = 0;
+		} else if (pb->pb_flags & PBF_FS_MANAGED) {
+			do_free = 0;
+		}
+
+		if (do_free) {
+			pagebuf_free(pb);
+		}
+	}
+}
+
+
+/*
+ *	Mutual exclusion on buffers.  Locking model:
+ *
+ *	Buffers associated with inodes for which buffer locking
+ *	is not enabled are not protected by semaphores, and are
+ *	assumed to be exclusively owned by the caller.  There is a
+ *	spinlock in the buffer, used by the caller when concurrent
+ *	access is possible.
+ */
+
+/*
+ *	pagebuf_cond_lock
+ *
+ *	pagebuf_cond_lock locks a buffer object, if it is not already locked.
+ *	Note that this in no way
+ *	locks the underlying pages, so it is only useful for synchronizing
+ *	concurrent use of page buffer objects, not for synchronizing independent
+ *	access to the underlying pages.
+ */
+int
+pagebuf_cond_lock(			/* lock buffer, if not locked	*/
+					/* returns -EBUSY if locked)	*/
+	page_buf_t		*pb)
+{
+	int			locked;
+
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
+	locked = down_trylock(&pb->pb_sema) == 0;
+	if (locked) {
+		PB_SET_OWNER(pb);
+	}
+	PB_TRACE(pb, "cond_lock", (long)locked);
+	return(locked ? 0 : -EBUSY);
+}
+
+/*
+ *	pagebuf_lock_value
+ *
+ *	Return lock value for a pagebuf
+ */
+int
+pagebuf_lock_value(
+	page_buf_t		*pb)
+{
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
+	return(atomic_read(&pb->pb_sema.count));
+}
+
+/*
+ *	pagebuf_lock
+ *
+ *	pagebuf_lock locks a buffer object.  Note that this in no way
+ *	locks the underlying pages, so it is only useful for synchronizing
+ *	concurrent use of page buffer objects, not for synchronizing independent
+ *	access to the underlying pages.
+ */
+int
+pagebuf_lock(
+	page_buf_t		*pb)
+{
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
+
+	PB_TRACE(pb, "lock", 0);
+	if (atomic_read(&pb->pb_io_remaining))
+		blk_run_queues();
+	down(&pb->pb_sema);
+	PB_SET_OWNER(pb);
+	PB_TRACE(pb, "locked", 0);
+	return 0;
+}
+
+/*
+ *	pagebuf_unlock
+ *
+ *	pagebuf_unlock releases the lock on the buffer object created by
+ *	pagebuf_lock or pagebuf_cond_lock (not any
+ *	pinning of underlying pages created by pagebuf_pin).
+ */
+void
+pagebuf_unlock(				/* unlock buffer		*/
+	page_buf_t		*pb)	/* buffer to unlock		*/
+{
+	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
+	PB_CLEAR_OWNER(pb);
+	up(&pb->pb_sema);
+	PB_TRACE(pb, "unlock", 0);
+}
+
+
+/*
+ *	Pinning Buffer Storage in Memory
+ */
+
+/*
+ *	pagebuf_pin
+ *
+ *	pagebuf_pin locks all of the memory represented by a buffer in
+ *	memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
+ *	the same or different buffers affecting a given page, will
+ *	properly count the number of outstanding "pin" requests.  The
+ *	buffer may be released after the pagebuf_pin and a different
+ *	buffer used when calling pagebuf_unpin, if desired.
+ *	pagebuf_pin should be used by the file system when it wants be
+ *	assured that no attempt will be made to force the affected
+ *	memory to disk.	 It does not assure that a given logical page
+ *	will not be moved to a different physical page.
+ */
+void
+pagebuf_pin(
+	page_buf_t		*pb)
+{
+	atomic_inc(&pb->pb_pin_count);
+	PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
+}
+
+/*
+ *	pagebuf_unpin
+ *
+ *	pagebuf_unpin reverses the locking of memory performed by
+ *	pagebuf_pin.  Note that both functions affected the logical
+ *	pages associated with the buffer, not the buffer itself.
+ */
+void
+pagebuf_unpin(
+	page_buf_t		*pb)
+{
+	if (atomic_dec_and_test(&pb->pb_pin_count)) {
+		wake_up_all(&pb->pb_waiters);
+	}
+	PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
+}
+
+int
+pagebuf_ispin(
+	page_buf_t		*pb)
+{
+	return atomic_read(&pb->pb_pin_count);
+}
+
+/*
+ *	pagebuf_wait_unpin
+ *
+ *	pagebuf_wait_unpin waits until all of the memory associated
+ *	with the buffer is not longer locked in memory.  It returns
+ *	immediately if none of the affected pages are locked.
+ */
+static inline void
+_pagebuf_wait_unpin(
+	page_buf_t		*pb)
+{
+	DECLARE_WAITQUEUE	(wait, current);
+
+	if (atomic_read(&pb->pb_pin_count) == 0)
+		return;
+
+	add_wait_queue(&pb->pb_waiters, &wait);
+	for (;;) {
+		current->state = TASK_UNINTERRUPTIBLE;
+		if (atomic_read(&pb->pb_pin_count) == 0)
+			break;
+		if (atomic_read(&pb->pb_io_remaining))
+			blk_run_queues();
+		schedule();
+	}
+	remove_wait_queue(&pb->pb_waiters, &wait);
+	current->state = TASK_RUNNING;
+}
+
+/*
+ *	Buffer Utility Routines
+ */
+
+/*
+ *	pagebuf_iodone
+ *
+ *	pagebuf_iodone marks a buffer for which I/O is in progress
+ *	done with respect to that I/O.	The pb_iodone routine, if
+ *	present, will be called as a side-effect.
+ */
+void
+pagebuf_iodone_work(
+	void			*v)
+{
+	page_buf_t		*pb = (page_buf_t *)v;
+
+	if (pb->pb_iodone) {
+		(*(pb->pb_iodone)) (pb);
+		return;
+	}
+
+	if (pb->pb_flags & PBF_ASYNC) {
+		if ((pb->pb_flags & _PBF_LOCKABLE) && !pb->pb_relse)
+			pagebuf_unlock(pb);
+		pagebuf_rele(pb);
+	}
+}
+
+void
+pagebuf_iodone(
+	page_buf_t		*pb,
+	int			dataio,
+	int			schedule)
+{
+	pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
+	if (pb->pb_error == 0) {
+		pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
+	}
+
+	PB_TRACE(pb, "iodone", pb->pb_iodone);
+
+	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
+		if (schedule) {
+			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
+			queue_work(dataio ? pagebuf_dataio_workqueue :
+				pagebuf_logio_workqueue, &pb->pb_iodone_work);
+		} else {
+			pagebuf_iodone_work(pb);
+		}
+	} else {
+		up(&pb->pb_iodonesema);
+	}
+}
+
+/*
+ *	pagebuf_ioerror
+ *
+ *	pagebuf_ioerror sets the error code for a buffer.
+ */
+void
+pagebuf_ioerror(			/* mark/clear buffer error flag */
+	page_buf_t		*pb,	/* buffer to mark		*/
+	unsigned int		error)	/* error to store (0 if none)	*/
+{
+	pb->pb_error = error;
+	PB_TRACE(pb, "ioerror", (unsigned long)error);
+}
+
+/*
+ *	pagebuf_iostart
+ *
+ *	pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
+ *	If necessary, it will arrange for any disk space allocation required,
+ *	and it will break up the request if the block mappings require it.
+ *	The pb_iodone routine in the buffer supplied will only be called
+ *	when all of the subsidiary I/O requests, if any, have been completed.
+ *	pagebuf_iostart calls the pagebuf_ioinitiate routine or
+ *	pagebuf_iorequest, if the former routine is not defined, to start
+ *	the I/O on a given low-level request.
+ */
+int
+pagebuf_iostart(			/* start I/O on a buffer	  */
+	page_buf_t		*pb,	/* buffer to start		  */
+	page_buf_flags_t	flags)	/* PBF_LOCK, PBF_ASYNC, PBF_READ, */
+					/* PBF_WRITE, PBF_DELWRI,	  */
+					/* PBF_SYNC, PBF_DONT_BLOCK	  */
+{
+	int			status = 0;
+
+	PB_TRACE(pb, "iostart", (unsigned long)flags);
+
+	if (flags & PBF_DELWRI) {
+		pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
+		pb->pb_flags |= flags &
+				(PBF_DELWRI | PBF_ASYNC | PBF_SYNC);
+		pagebuf_delwri_queue(pb, 1);
+		return status;
+	}
+
+	pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | \
+			PBF_DELWRI | PBF_READ_AHEAD | PBF_RUN_QUEUES);
+	pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
+			PBF_SYNC | PBF_READ_AHEAD | PBF_RUN_QUEUES);
+
+	BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL);
+
+	/* For writes allow an alternate strategy routine to precede
+	 * the actual I/O request (which may not be issued at all in
+	 * a shutdown situation, for example).
+	 */
+	status = (flags & PBF_WRITE) ?
+		pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
+
+	/* Wait for I/O if we are not an async request.
+	 * Note: async I/O request completion will release the buffer,
+	 * and that can already be done by this point.  So using the
+	 * buffer pointer from here on, after async I/O, is invalid.
+	 */
+	if (!status && !(flags & PBF_ASYNC))
+		status = pagebuf_iowait(pb);
+
+	return status;
+}
+
+/*
+ * Helper routine for pagebuf_iorequest
+ */
+
+STATIC __inline__ int
+_pagebuf_iolocked(
+	page_buf_t		*pb)
+{
+	ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
+	if (pb->pb_flags & PBF_READ)
+		return pb->pb_locked;
+	return ((pb->pb_flags & _PBF_LOCKABLE) == 0);
+}
+
+STATIC __inline__ void
+_pagebuf_iodone(
+	page_buf_t		*pb,
+	int			schedule)
+{
+	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
+		pb->pb_locked = 0;
+		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
+	}
+}
+
+STATIC int
+bio_end_io_pagebuf(
+	struct bio		*bio,
+	unsigned int		bytes_done,
+	int			error)
+{
+	page_buf_t		*pb = (page_buf_t *)bio->bi_private;
+	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
+	unsigned int		sectorshift = pb->pb_target->pbr_sshift;
+	struct bio_vec		*bvec = bio->bi_io_vec;
+
+	if (bio->bi_size)
+		return 1;
+
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		pb->pb_error = EIO;
+
+	for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+		struct page	*page = bvec->bv_page;
+
+		if (pb->pb_error) {
+			SetPageError(page);
+		} else if (blocksize == PAGE_CACHE_SIZE) {
+			SetPageUptodate(page);
+		} else if (!PagePrivate(page)) {
+			unsigned int	j, range;
+
+			ASSERT(blocksize < PAGE_CACHE_SIZE);
+			range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
+			for (j = bvec->bv_offset >> sectorshift; j < range; j++)
+				set_bit(j, &page->private);
+			if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))
+				SetPageUptodate(page);
+		}
+
+		if (_pagebuf_iolocked(pb)) {
+			unlock_page(page);
+		}
+	}
+
+	_pagebuf_iodone(pb, 1);
+	bio_put(bio);
+	return 0;
+}
+
+void
+_pagebuf_ioapply(
+	page_buf_t		*pb)
+{
+	int			i, map_i, total_nr_pages, nr_pages;
+	struct bio		*bio;
+	int			offset = pb->pb_offset;
+	int			size = pb->pb_count_desired;
+	sector_t		sector = pb->pb_bn;
+	unsigned int		blocksize = pb->pb_target->pbr_bsize;
+	int			locking = _pagebuf_iolocked(pb);
+
+	total_nr_pages = pb->pb_page_count;
+	map_i = 0;
+
+	/* Special code path for reading a sub page size pagebuf in --
+	 * we populate up the whole page, and hence the other metadata
+	 * in the same page.  This optimization is only valid when the
+	 * filesystem block size and the page size are equal.
+	 */
+	if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
+	    (pb->pb_flags & PBF_READ) && locking &&
+	    (blocksize == PAGE_CACHE_SIZE)) {
+		bio = bio_alloc(GFP_NOIO, 1);
+
+		bio->bi_bdev = pb->pb_target->pbr_bdev;
+		bio->bi_sector = sector - (offset >> BBSHIFT);
+		bio->bi_end_io = bio_end_io_pagebuf;
+		bio->bi_private = pb;
+
+		bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
+		size = 0;
+
+		atomic_inc(&pb->pb_io_remaining);
+
+		goto submit_io;
+	}
+
+	/* Lock down the pages which we need to for the request */
+	if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
+		for (i = 0; size; i++) {
+			int		nbytes = PAGE_CACHE_SIZE - offset;
+			struct page	*page = pb->pb_pages[i];
+
+			if (nbytes > size)
+				nbytes = size;
+
+			lock_page(page);
+
+			size -= nbytes;
+			offset = 0;
+		}
+		offset = pb->pb_offset;
+		size = pb->pb_count_desired;
+	}
+
+next_chunk:
+	atomic_inc(&pb->pb_io_remaining);
+	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
+	if (nr_pages > total_nr_pages)
+		nr_pages = total_nr_pages;
+
+	bio = bio_alloc(GFP_NOIO, nr_pages);
+	bio->bi_bdev = pb->pb_target->pbr_bdev;
+	bio->bi_sector = sector;
+	bio->bi_end_io = bio_end_io_pagebuf;
+	bio->bi_private = pb;
+
+	for (; size && nr_pages; nr_pages--, map_i++) {
+		int	nbytes = PAGE_CACHE_SIZE - offset;
+
+		if (nbytes > size)
+			nbytes = size;
+
+		if (bio_add_page(bio, pb->pb_pages[map_i],
+					nbytes, offset) < nbytes)
+			break;
+
+		offset = 0;
+		sector += nbytes >> BBSHIFT;
+		size -= nbytes;
+		total_nr_pages--;
+	}
+
+submit_io:
+	if (likely(bio->bi_size)) {
+		submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio);
+		if (size)
+			goto next_chunk;
+	} else {
+		bio_put(bio);
+		pagebuf_ioerror(pb, EIO);
+	}
+
+	if (pb->pb_flags & PBF_RUN_QUEUES) {
+		pb->pb_flags &= ~PBF_RUN_QUEUES;
+		if (atomic_read(&pb->pb_io_remaining) > 1)
+			blk_run_queues();
+	}
+}
+
+/*
+ *	pagebuf_iorequest
+ *
+ *	pagebuf_iorequest is the core I/O request routine.
+ *	It assumes that the buffer is well-formed and
+ *	mapped and ready for physical I/O, unlike
+ *	pagebuf_iostart() and pagebuf_iophysio().  Those
+ *	routines call the pagebuf_ioinitiate routine to start I/O,
+ *	if it is present, or else call pagebuf_iorequest()
+ *	directly if the pagebuf_ioinitiate routine is not present.
+ *
+ *	This function will be responsible for ensuring access to the
+ *	pages is restricted whilst I/O is in progress - for locking
+ *	pagebufs the pagebuf lock is the mediator, for non-locking
+ *	pagebufs the pages will be locked. In the locking case we
+ *	need to use the pagebuf lock as multiple meta-data buffers
+ *	will reference the same page.
+ */
+int
+pagebuf_iorequest(			/* start real I/O		*/
+	page_buf_t		*pb)	/* buffer to convey to device	*/
+{
+	PB_TRACE(pb, "iorequest", 0);
+
+	if (pb->pb_flags & PBF_DELWRI) {
+		pagebuf_delwri_queue(pb, 1);
+		return 0;
+	}
+
+	if (pb->pb_flags & PBF_WRITE) {
+		_pagebuf_wait_unpin(pb);
+	}
+
+	pagebuf_hold(pb);
+
+	/* Set the count to 1 initially, this will stop an I/O
+	 * completion callout which happens before we have started
+	 * all the I/O from calling pagebuf_iodone too early.
+	 */
+	atomic_set(&pb->pb_io_remaining, 1);
+	_pagebuf_ioapply(pb);
+	_pagebuf_iodone(pb, 0);
+
+	pagebuf_rele(pb);
+	return 0;
+}
+
+/*
+ *	pagebuf_iowait
+ *
+ *	pagebuf_iowait waits for I/O to complete on the buffer supplied.
+ *	It returns immediately if no I/O is pending.  In any case, it returns
+ *	the error code, if any, or 0 if there is no error.
+ */
+int
+pagebuf_iowait(
+	page_buf_t		*pb)
+{
+	PB_TRACE(pb, "iowait", 0);
+	if (atomic_read(&pb->pb_io_remaining))
+		blk_run_queues();
+	down(&pb->pb_iodonesema);
+	PB_TRACE(pb, "iowaited", (long)pb->pb_error);
+	return pb->pb_error;
+}
+
+STATIC void *
+pagebuf_mapout_locked(
+	page_buf_t		*pb)
+{
+	void			*old_addr = NULL;
+
+	if (pb->pb_flags & PBF_MAPPED) {
+		if (pb->pb_flags & _PBF_ADDR_ALLOCATED)
+			old_addr = pb->pb_addr - pb->pb_offset;
+		pb->pb_addr = NULL;
+		pb->pb_flags &= ~(PBF_MAPPED | _PBF_ADDR_ALLOCATED);
+	}
+
+	return old_addr;	/* Caller must free the address space,
+				 * we are under a spin lock, probably
+				 * not safe to do vfree here
+				 */
+}
+
+caddr_t
+pagebuf_offset(
+	page_buf_t		*pb,
+	size_t			offset)
+{
+	struct page		*page;
+
+	offset += pb->pb_offset;
+
+	page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
+	return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
+}
+
+/*
+ *	pagebuf_iomove
+ *
+ *	Move data into or out of a buffer.
+ */
+void
+pagebuf_iomove(
+	page_buf_t		*pb,	/* buffer to process		*/
+	size_t			boff,	/* starting buffer offset	*/
+	size_t			bsize,	/* length to copy		*/
+	caddr_t			data,	/* data address			*/
+	page_buf_rw_t		mode)	/* read/write flag		*/
+{
+	size_t			bend, cpoff, csize;
+	struct page		*page;
+
+	bend = boff + bsize;
+	while (boff < bend) {
+		page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
+		cpoff = page_buf_poff(boff + pb->pb_offset);
+		csize = min_t(size_t,
+			      PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
+
+		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
+
+		switch (mode) {
+		case PBRW_ZERO:
+			memset(page_address(page) + cpoff, 0, csize);
+			break;
+		case PBRW_READ:
+			memcpy(data, page_address(page) + cpoff, csize);
+			break;
+		case PBRW_WRITE:
+			memcpy(page_address(page) + cpoff, data, csize);
+		}
+
+		boff += csize;
+		data += csize;
+	}
+}
+
+
+/*
+ * Pagebuf delayed write buffer handling
+ */
+
+STATIC LIST_HEAD(pbd_delwrite_queue);
+STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
+
+STATIC void
+pagebuf_delwri_queue(
+	page_buf_t		*pb,
+	int			unlock)
+{
+	PB_TRACE(pb, "delwri_q", (long)unlock);
+	spin_lock(&pbd_delwrite_lock);
+	/* If already in the queue, dequeue and place at tail */
+	if (!list_empty(&pb->pb_list)) {
+		if (unlock) {
+			atomic_dec(&pb->pb_hold);
+		}
+		list_del(&pb->pb_list);
+	}
+
+	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
+	pb->pb_flushtime = jiffies + pb_params.age_buffer.val;
+	spin_unlock(&pbd_delwrite_lock);
+
+	if (unlock && (pb->pb_flags & _PBF_LOCKABLE)) {
+		pagebuf_unlock(pb);
+	}
+}
+
+void
+pagebuf_delwri_dequeue(
+	page_buf_t		*pb)
+{
+	PB_TRACE(pb, "delwri_uq", 0);
+	spin_lock(&pbd_delwrite_lock);
+	list_del_init(&pb->pb_list);
+	pb->pb_flags &= ~PBF_DELWRI;
+	spin_unlock(&pbd_delwrite_lock);
+}
+
+STATIC void
+pagebuf_runall_queues(
+	struct workqueue_struct	*queue)
+{
+	flush_workqueue(queue);
+}
+
+/* Defines for pagebuf daemon */
+STATIC DECLARE_COMPLETION(pagebuf_daemon_done);
+STATIC struct task_struct *pagebuf_daemon_task;
+STATIC int pagebuf_daemon_active;
+STATIC int force_flush;
+
+STATIC void
+pagebuf_daemon_wakeup(void)
+{
+	force_flush = 1;
+	barrier();
+	wake_up_process(pagebuf_daemon_task);
+}
+
+STATIC int
+pagebuf_daemon(
+	void			*data)
+{
+	int			count;
+	page_buf_t		*pb;
+	struct list_head	*curr, *next, tmp;
+
+	/*  Set up the thread  */
+	daemonize("pagebufd");
+	current->flags |= PF_MEMALLOC;
+
+	pagebuf_daemon_task = current;
+	pagebuf_daemon_active = 1;
+	barrier();
+
+	INIT_LIST_HEAD(&tmp);
+	do {
+		/* swsusp */
+		if (current->flags & PF_FREEZE)
+			refrigerator(PF_IOTHREAD);
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(pb_params.flush_interval.val);
+
+		spin_lock(&pbd_delwrite_lock);
+
+		count = 0;
+		list_for_each_safe(curr, next, &pbd_delwrite_queue) {
+			pb = list_entry(curr, page_buf_t, pb_list);
+
+			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+
+			if ((pb->pb_flags & PBF_DELWRI) && !pagebuf_ispin(pb) &&
+			    (((pb->pb_flags & _PBF_LOCKABLE) == 0) ||
+			     !pagebuf_cond_lock(pb))) {
+
+				if (!force_flush &&
+				    time_before(jiffies, pb->pb_flushtime)) {
+					pagebuf_unlock(pb);
+					break;
+				}
+
+				pb->pb_flags &= ~PBF_DELWRI;
+				pb->pb_flags |= PBF_WRITE;
+
+				list_del(&pb->pb_list);
+				list_add(&pb->pb_list, &tmp);
+
+				count++;
+			}
+		}
+
+		spin_unlock(&pbd_delwrite_lock);
+		while (!list_empty(&tmp)) {
+			pb = list_entry(tmp.next, page_buf_t, pb_list);
+			list_del_init(&pb->pb_list);
+
+			pagebuf_iostrategy(pb);
+		}
+
+		if (as_list_len > 0)
+			purge_addresses();
+		if (count)
+			blk_run_queues();
+
+		force_flush = 0;
+	} while (pagebuf_daemon_active);
+
+	complete_and_exit(&pagebuf_daemon_done, 0);
+}
+
+void
+pagebuf_delwri_flush(
+	pb_target_t		*target,
+	u_long			flags,
+	int			*pinptr)
+{
+	page_buf_t		*pb;
+	struct list_head	*curr, *next, tmp;
+	int			pincount = 0;
+	int			flush_cnt = 0;
+
+	pagebuf_runall_queues(pagebuf_dataio_workqueue);
+	pagebuf_runall_queues(pagebuf_logio_workqueue);
+
+	spin_lock(&pbd_delwrite_lock);
+	INIT_LIST_HEAD(&tmp);
+
+	list_for_each_safe(curr, next, &pbd_delwrite_queue) {
+		pb = list_entry(curr, page_buf_t, pb_list);
+
+		/*
+		 * Skip other targets, markers and in progress buffers
+		 */
+
+		if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
+		    !(pb->pb_flags & PBF_DELWRI)) {
+			continue;
+		}
+
+		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
+		if (pagebuf_ispin(pb)) {
+			pincount++;
+			continue;
+		}
+
+		pb->pb_flags &= ~PBF_DELWRI;
+		pb->pb_flags |= PBF_WRITE;
+		list_move(&pb->pb_list, &tmp);
+	}
+	/* ok found all the items that can be worked on 
+	 * drop the lock and process the private list */
+	spin_unlock(&pbd_delwrite_lock);
+
+	list_for_each_safe(curr, next, &tmp) {
+		pb = list_entry(curr, page_buf_t, pb_list);
+
+		if (flags & PBDF_WAIT)
+			pb->pb_flags &= ~PBF_ASYNC;
+		else
+			list_del_init(curr);
+
+		pagebuf_lock(pb);
+		pagebuf_iostrategy(pb);
+		if (++flush_cnt > 32) {
+			blk_run_queues();
+			flush_cnt = 0;
+		}
+	}
+
+	blk_run_queues();
+
+	while (!list_empty(&tmp)) {
+		pb = list_entry(tmp.next, page_buf_t, pb_list);
+
+		list_del_init(&pb->pb_list);
+		pagebuf_iowait(pb);
+		if (!pb->pb_relse)
+			pagebuf_unlock(pb);
+		pagebuf_rele(pb);
+	}
+
+	if (pinptr)
+		*pinptr = pincount;
+}
+
+STATIC int
+pagebuf_daemon_start(void)
+{
+	int		rval;
+
+	pagebuf_logio_workqueue = create_workqueue("xfslogd");
+	if (!pagebuf_logio_workqueue)
+		return -ENOMEM;
+
+	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
+	if (!pagebuf_dataio_workqueue) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		return -ENOMEM;
+	}
+
+	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
+	if (rval < 0) {
+		destroy_workqueue(pagebuf_logio_workqueue);
+		destroy_workqueue(pagebuf_dataio_workqueue);
+	}
+
+	return rval;
+}
+
+/*
+ * pagebuf_daemon_stop
+ *
+ * Note: do not mark as __exit, it is called from pagebuf_terminate.
+ */
+STATIC void
+pagebuf_daemon_stop(void)
+{
+	pagebuf_daemon_active = 0;
+	barrier();
+	wait_for_completion(&pagebuf_daemon_done);
+
+	destroy_workqueue(pagebuf_logio_workqueue);
+	destroy_workqueue(pagebuf_dataio_workqueue);
+}
+
+
+/*
+ * Pagebuf sysctl interface
+ */
+
+STATIC int
+pb_stats_clear_handler(
+	ctl_table		*ctl,
+	int			write,
+	struct file		*filp,
+	void			*buffer,
+	size_t			*lenp)
+{
+	int			c, ret;
+	int			*valp = ctl->data;
+
+	ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp);
+
+	if (!ret && write && *valp) {
+		printk("XFS Clearing pbstats\n");
+		for (c = 0; c < NR_CPUS; c++) {
+			if (!cpu_possible(c)) continue;
+				memset(&per_cpu(pbstats, c), 0,
+				       sizeof(struct pbstats));
+		}
+		pb_params.stats_clear.val = 0;
+	}
+
+	return ret;
+}
+
+STATIC struct ctl_table_header *pagebuf_table_header;
+
+STATIC ctl_table pagebuf_table[] = {
+	{PB_FLUSH_INT, "flush_int", &pb_params.flush_interval.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL,
+	&pb_params.flush_interval.min, &pb_params.flush_interval.max},
+
+	{PB_FLUSH_AGE, "flush_age", &pb_params.age_buffer.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&pb_params.age_buffer.min, &pb_params.age_buffer.max},
+
+	{PB_STATS_CLEAR, "stats_clear", &pb_params.stats_clear.val,
+	sizeof(int), 0644, NULL, &pb_stats_clear_handler,
+	&sysctl_intvec, NULL, 
+	&pb_params.stats_clear.min, &pb_params.stats_clear.max},
+
+#ifdef PAGEBUF_TRACE
+	{PB_DEBUG, "debug", &pb_params.debug.val,
+	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
+	&sysctl_intvec, NULL, 
+	&pb_params.debug.min, &pb_params.debug.max},
+#endif
+	{0}
+};
+
+STATIC ctl_table pagebuf_dir_table[] = {
+	{VM_PAGEBUF, "pagebuf", NULL, 0, 0555, pagebuf_table},
+	{0}
+};
+
+STATIC ctl_table pagebuf_root_table[] = {
+	{CTL_VM, "vm",  NULL, 0, 0555, pagebuf_dir_table},
+	{0}
+};
+
+#ifdef CONFIG_PROC_FS
+STATIC int
+pagebuf_readstats(
+	char			*buffer,
+	char			**start,
+	off_t			offset,
+	int			count,
+	int			*eof,
+	void			*data)
+{
+	int			c, i, len, val;
+
+	len = 0;
+	len += sprintf(buffer + len, "pagebuf");
+	for (i = 0; i < sizeof(struct pbstats) / sizeof(u_int32_t); i++) {
+		val = 0;
+		for (c = 0 ; c < NR_CPUS; c++) {
+			if (!cpu_possible(c)) continue;
+			val += *(((u_int32_t*)&per_cpu(pbstats, c) + i));
+		}
+		len += sprintf(buffer + len, " %u", val);
+	}
+	buffer[len++] = '\n';
+
+	if (offset >= len) {
+		*start = buffer;
+		*eof = 1;
+		return 0;
+	}
+	*start = buffer + offset;
+	if ((len -= offset) > count)
+		return count;
+	*eof = 1;
+
+	return len;
+}
+#endif  /* CONFIG_PROC_FS */
+
+/*
+ *	Initialization and Termination
+ */
+
+int __init
+pagebuf_init(void)
+{
+	int			i;
+
+	pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1);
+
+#ifdef CONFIG_PROC_FS
+	if (proc_mkdir("fs/pagebuf", 0))
+		create_proc_read_entry(
+			"fs/pagebuf/stat", 0, 0, pagebuf_readstats, NULL);
+#endif
+
+	pagebuf_cache = kmem_cache_create("page_buf_t", sizeof(page_buf_t), 0,
+			SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (pagebuf_cache == NULL) {
+		printk("pagebuf: couldn't init pagebuf cache\n");
+		pagebuf_terminate();
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < NHASH; i++) {
+		spin_lock_init(&pbhash[i].pb_hash_lock);
+		INIT_LIST_HEAD(&pbhash[i].pb_hash);
+	}
+
+#ifdef PAGEBUF_TRACE
+	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
+#endif
+
+	pagebuf_daemon_start();
+	return 0;
+}
+
+
+/*
+ *	pagebuf_terminate.
+ *
+ *	Note: do not mark as __exit, this is also called from the __init code.
+ */
+void
+pagebuf_terminate(void)
+{
+	pagebuf_daemon_stop();
+
+#ifdef PAGEBUF_TRACE
+	ktrace_free(pagebuf_trace_buf);
+#endif
+
+	kmem_cache_destroy(pagebuf_cache);
+
+	unregister_sysctl_table(pagebuf_table_header);
+#ifdef  CONFIG_PROC_FS
+	remove_proc_entry("fs/pagebuf/stat", NULL);
+	remove_proc_entry("fs/pagebuf", NULL);
+#endif
+}
diff -Nru a/fs/xfs/linux/xfs_buf.h b/fs/xfs/linux/xfs_buf.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/linux/xfs_buf.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,616 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+/*
+ * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
+ */
+
+#ifndef __XFS_BUF_H__
+#define __XFS_BUF_H__
+
+#include <linux/config.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <asm/system.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/uio.h>
+
+/*
+ *	Base types
+ */
+
+/* daddr must be signed since -1 is used for bmaps that are not yet allocated */
+typedef loff_t page_buf_daddr_t;
+
+#define PAGE_BUF_DADDR_NULL ((page_buf_daddr_t) (-1LL))
+
+#define page_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
+#define page_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
+#define page_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
+#define page_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
+
+typedef enum page_buf_rw_e {
+	PBRW_READ = 1,			/* transfer into target memory */
+	PBRW_WRITE = 2,			/* transfer from target memory */
+	PBRW_ZERO = 3			/* Zero target memory */
+} page_buf_rw_t;
+
+
+typedef enum page_buf_flags_e {		/* pb_flags values */
+	PBF_READ = (1 << 0),	/* buffer intended for reading from device */
+	PBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
+	PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
+	PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
+	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
+	PBF_NONE = (1 << 5),    /* buffer not read at all                  */
+	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
+	PBF_SYNC = (1 << 8),    /* force updates to disk                   */
+	PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages          */
+	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
+	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
+	PBF_FS_DATAIOD = (1 << 12), /* schedule IO completion on fs datad  */
+
+	/* flags used only as arguments to access routines */
+	PBF_LOCK = (1 << 13),	/* lock requested			   */
+	PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait	   */
+	PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread	   */
+
+	/* flags used only internally */
+	_PBF_LOCKABLE = (1 << 16), /* page_buf_t may be locked		   */
+	_PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped	   */
+	_PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated	   */
+	_PBF_MEM_ALLOCATED = (1 << 20), /* underlying pages are allocated  */
+	_PBF_MEM_SLAB = (1 << 21), /* underlying pages are slab allocated  */
+
+	PBF_FORCEIO = (1 << 22), /* ignore any cache state		   */
+	PBF_FLUSH = (1 << 23),	/* flush disk write cache		   */
+	PBF_READ_AHEAD = (1 << 24), /* asynchronous read-ahead		   */
+	PBF_RUN_QUEUES = (1 << 25), /* run block device task queue	   */
+
+} page_buf_flags_t;
+
+#define PBF_UPDATE (PBF_READ | PBF_WRITE)
+#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
+#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+
+typedef struct pb_target {
+	dev_t			pbr_dev;
+	struct block_device	*pbr_bdev;
+	struct address_space	*pbr_mapping;
+	unsigned int		pbr_bsize;
+	unsigned int		pbr_sshift;
+	size_t			pbr_smask;
+} pb_target_t;
+
+/*
+ *	page_buf_t:  Buffer structure for page cache-based buffers
+ *
+ * This buffer structure is used by the page cache buffer management routines
+ * to refer to an assembly of pages forming a logical buffer.  The actual
+ * I/O is performed with buffer_head or bio structures, as required by drivers,
+ * for drivers which do not understand this structure.  The buffer structure is
+ * used on temporary basis only, and discarded when released.
+ *
+ * The real data storage is recorded in the page cache.  Metadata is
+ * hashed to the inode for the block device on which the file system resides.
+ * File data is hashed to the inode for the file.  Pages which are only
+ * partially filled with data have bits set in their block_map entry
+ * to indicate which disk blocks in the page are not valid.
+ */
+
+struct page_buf_s;
+typedef void (*page_buf_iodone_t)(struct page_buf_s *);
+			/* call-back function on I/O completion */
+typedef void (*page_buf_relse_t)(struct page_buf_s *);
+			/* call-back function on I/O completion */
+typedef int (*page_buf_bdstrat_t)(struct page_buf_s *);
+
+#define PB_PAGES	4
+
+typedef struct page_buf_s {
+	struct semaphore	pb_sema;	/* semaphore for lockables  */
+	unsigned long		pb_flushtime;	/* time to flush pagebuf    */
+	atomic_t		pb_pin_count;	/* pin count		    */
+	wait_queue_head_t	pb_waiters;	/* unpin waiters	    */
+	struct list_head	pb_list;
+	page_buf_flags_t	pb_flags;	/* status flags */
+	struct list_head	pb_hash_list;
+	struct pb_target	*pb_target;	/* logical object */
+	atomic_t		pb_hold;	/* reference count */
+	page_buf_daddr_t	pb_bn;		/* block number for I/O */
+	loff_t			pb_file_offset;	/* offset in file */
+	size_t			pb_buffer_length; /* size of buffer in bytes */
+	size_t			pb_count_desired; /* desired transfer size */
+	void			*pb_addr;	/* virtual address of buffer */
+	struct work_struct	pb_iodone_work;
+	atomic_t		pb_io_remaining;/* #outstanding I/O requests */
+	page_buf_iodone_t	pb_iodone;	/* I/O completion function */
+	page_buf_relse_t	pb_relse;	/* releasing function */
+	page_buf_bdstrat_t	pb_strat;	/* pre-write function */
+	struct semaphore	pb_iodonesema;	/* Semaphore for I/O waiters */
+	void			*pb_fspriv;
+	void			*pb_fspriv2;
+	void			*pb_fspriv3;
+	unsigned short		pb_error;	/* error code on I/O */
+	unsigned short		pb_page_count;	/* size of page array */
+	unsigned short		pb_offset;	/* page offset in first page */
+	unsigned char		pb_locked;	/* page array is locked */
+	unsigned char		pb_hash_index;	/* hash table index	*/
+	struct page		**pb_pages;	/* array of page pointers */
+	struct page		*pb_page_array[PB_PAGES]; /* inline pages */
+#ifdef PAGEBUF_LOCK_TRACKING
+	int			pb_last_holder;
+#endif
+} page_buf_t;
+
+
+/* Finding and Reading Buffers */
+
+extern page_buf_t *pagebuf_find(	/* find buffer for block if	*/
+					/* the block is in memory	*/
+		struct pb_target *,	/* inode for block		*/
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t);	/* PBF_LOCK			*/
+
+extern page_buf_t *pagebuf_get(		/* allocate a buffer		*/
+		struct pb_target *,	/* inode for buffer		*/
+		loff_t,			/* starting offset of range     */
+		size_t,			/* length of range              */
+		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
+					/* PBF_ASYNC			*/
+
+extern page_buf_t *pagebuf_lookup(
+		struct pb_target *,
+		loff_t,			/* starting offset of range	*/
+		size_t,			/* length of range		*/
+		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
+					/* PBF_FORCEIO, _PBF_LOCKABLE	*/
+
+extern page_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
+					/*  no memory or disk address	*/
+		size_t len,
+		struct pb_target *);	/* mount point "fake" inode	*/
+
+extern page_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
+					/* without disk address		*/
+		size_t len,
+		struct pb_target *);	/* mount point "fake" inode	*/
+
+extern int pagebuf_associate_memory(
+		page_buf_t *,
+		void *,
+		size_t);
+
+extern void pagebuf_hold(		/* increment reference count	*/
+		page_buf_t *);		/* buffer to hold		*/
+
+extern void pagebuf_readahead(		/* read ahead into cache	*/
+		struct pb_target  *,	/* target for buffer (or NULL)	*/
+		loff_t,			/* starting offset of range     */
+		size_t,			/* length of range              */
+		page_buf_flags_t);	/* additional read flags	*/
+
+/* Releasing Buffers */
+
+extern void pagebuf_free(		/* deallocate a buffer		*/
+		page_buf_t *);		/* buffer to deallocate		*/
+
+extern void pagebuf_rele(		/* release hold on a buffer	*/
+		page_buf_t *);		/* buffer to release		*/
+
+/* Locking and Unlocking Buffers */
+
+extern int pagebuf_cond_lock(		/* lock buffer, if not locked	*/
+					/* (returns -EBUSY if locked)	*/
+		page_buf_t *);		/* buffer to lock		*/
+
+extern int pagebuf_lock_value(		/* return count on lock		*/
+		page_buf_t *);          /* buffer to check              */
+
+extern int pagebuf_lock(		/* lock buffer                  */
+		page_buf_t *);          /* buffer to lock               */
+
+extern void pagebuf_unlock(		/* unlock buffer		*/
+		page_buf_t *);		/* buffer to unlock		*/
+
+/* Buffer Read and Write Routines */
+
+extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
+		page_buf_t *,		/* buffer to mark		*/
+		int,			/* use data/log helper thread.	*/
+		int);			/* run completion locally, or in
+					 * a helper thread.		*/
+
+extern void pagebuf_ioerror(		/* mark buffer in error	(or not) */
+		page_buf_t *,		/* buffer to mark		*/
+		unsigned int);		/* error to store (0 if none)	*/
+
+extern int pagebuf_iostart(		/* start I/O on a buffer	*/
+		page_buf_t *,		/* buffer to start		*/
+		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC,		*/
+					/* PBF_READ, PBF_WRITE,		*/
+					/* PBF_DELWRI, PBF_SYNC		*/
+
+extern int pagebuf_iorequest(		/* start real I/O		*/
+		page_buf_t *);		/* buffer to convey to device	*/
+
+extern int pagebuf_iowait(		/* wait for buffer I/O done	*/
+		page_buf_t *);		/* buffer to wait on		*/
+
+extern void pagebuf_iomove(		/* move data in/out of pagebuf	*/
+		page_buf_t *,		/* buffer to manipulate		*/
+		size_t,			/* starting buffer offset	*/
+		size_t,			/* length in buffer		*/
+		caddr_t,		/* data pointer			*/
+		page_buf_rw_t);		/* direction			*/
+
+static inline int pagebuf_iostrategy(page_buf_t *pb)
+{
+	return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
+}
+
+static inline int pagebuf_geterror(page_buf_t *pb)
+{
+	return pb ? pb->pb_error : ENOMEM;
+}
+
+/* Buffer Utility Routines */
+
+extern caddr_t pagebuf_offset(		/* pointer at offset in buffer	*/
+		page_buf_t *,		/* buffer to offset into	*/
+		size_t);		/* offset			*/
+
+/* Pinning Buffer Storage in Memory */
+
+extern void pagebuf_pin(		/* pin buffer in memory		*/
+		page_buf_t *);		/* buffer to pin		*/
+
+extern void pagebuf_unpin(		/* unpin buffered data		*/
+		page_buf_t *);		/* buffer to unpin		*/
+
+extern int pagebuf_ispin(		/* check if buffer is pinned	*/
+		page_buf_t *);		/* buffer to check		*/
+
+/* Delayed Write Buffer Routines */
+
+#define PBDF_WAIT    0x01
+extern void pagebuf_delwri_flush(
+		pb_target_t *,
+		unsigned long,
+		int *);
+
+extern void pagebuf_delwri_dequeue(
+		page_buf_t *);
+
+/* Buffer Daemon Setup Routines */
+
+extern int pagebuf_init(void);
+extern void pagebuf_terminate(void);
+
+
+#ifdef PAGEBUF_TRACE
+extern ktrace_t *pagebuf_trace_buf;
+extern void pagebuf_trace(
+		page_buf_t *,		/* buffer being traced		*/
+		char *,			/* description of operation	*/
+		void *,			/* arbitrary diagnostic value	*/
+		void *);		/* return address		*/
+#else
+# define pagebuf_trace(pb, id, ptr, ra)	do { } while (0)
+#endif
+
+#define pagebuf_target_name(target)	\
+	({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
+
+
+
+
+
+/* These are just for xfs_syncsub... it sets an internal variable
+ * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
+ */
+#define XFS_B_ASYNC		PBF_ASYNC
+#define XFS_B_DELWRI		PBF_DELWRI
+#define XFS_B_READ		PBF_READ
+#define XFS_B_WRITE		PBF_WRITE
+#define XFS_B_STALE		PBF_STALE
+
+#define XFS_BUF_TRYLOCK		PBF_TRYLOCK
+#define XFS_INCORE_TRYLOCK	PBF_TRYLOCK
+#define XFS_BUF_LOCK		PBF_LOCK
+#define XFS_BUF_MAPPED		PBF_MAPPED
+
+#define BUF_BUSY		PBF_DONT_BLOCK
+
+#define XFS_BUF_BFLAGS(x)	((x)->pb_flags)
+#define XFS_BUF_ZEROFLAGS(x)	\
+	((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_SYNC|PBF_DELWRI))
+
+#define XFS_BUF_STALE(x)	((x)->pb_flags |= XFS_B_STALE)
+#define XFS_BUF_UNSTALE(x)	((x)->pb_flags &= ~XFS_B_STALE)
+#define XFS_BUF_ISSTALE(x)	((x)->pb_flags & XFS_B_STALE)
+#define XFS_BUF_SUPER_STALE(x)	do {				\
+					XFS_BUF_STALE(x);	\
+					xfs_buf_undelay(x);	\
+					XFS_BUF_DONE(x);	\
+				} while (0)
+
+#define XFS_BUF_MANAGE		PBF_FS_MANAGED
+#define XFS_BUF_UNMANAGE(x)	((x)->pb_flags &= ~PBF_FS_MANAGED)
+
+static inline void xfs_buf_undelay(page_buf_t *pb)
+{
+	if (pb->pb_flags & PBF_DELWRI) {
+		if (pb->pb_list.next != &pb->pb_list) {
+			pagebuf_delwri_dequeue(pb);
+			pagebuf_rele(pb);
+		} else {
+			pb->pb_flags &= ~PBF_DELWRI;
+		}
+	}
+}
+
+#define XFS_BUF_DELAYWRITE(x)	 ((x)->pb_flags |= PBF_DELWRI)
+#define XFS_BUF_UNDELAYWRITE(x)	 xfs_buf_undelay(x)
+#define XFS_BUF_ISDELAYWRITE(x)	 ((x)->pb_flags & PBF_DELWRI)
+
+#define XFS_BUF_ERROR(x,no)	 pagebuf_ioerror(x,no)
+#define XFS_BUF_GETERROR(x)	 pagebuf_geterror(x)
+#define XFS_BUF_ISERROR(x)	 (pagebuf_geterror(x)?1:0)
+
+#define XFS_BUF_DONE(x)		 ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
+#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
+#define XFS_BUF_ISDONE(x)	 (!(PBF_NOT_DONE(x)))
+
+#define XFS_BUF_BUSY(x)		 ((x)->pb_flags |= PBF_FORCEIO)
+#define XFS_BUF_UNBUSY(x)	 ((x)->pb_flags &= ~PBF_FORCEIO)
+#define XFS_BUF_ISBUSY(x)	 (1)
+
+#define XFS_BUF_ASYNC(x)	 ((x)->pb_flags |= PBF_ASYNC)
+#define XFS_BUF_UNASYNC(x)	 ((x)->pb_flags &= ~PBF_ASYNC)
+#define XFS_BUF_ISASYNC(x)	 ((x)->pb_flags & PBF_ASYNC)
+
+#define XFS_BUF_FLUSH(x)	 ((x)->pb_flags |= PBF_FLUSH)
+#define XFS_BUF_UNFLUSH(x)	 ((x)->pb_flags &= ~PBF_FLUSH)
+#define XFS_BUF_ISFLUSH(x)	 ((x)->pb_flags & PBF_FLUSH)
+
+#define XFS_BUF_SHUT(x)		 printk("XFS_BUF_SHUT not implemented yet\n")
+#define XFS_BUF_UNSHUT(x)	 printk("XFS_BUF_UNSHUT not implemented yet\n")
+#define XFS_BUF_ISSHUT(x)	 (0)
+
+#define XFS_BUF_HOLD(x)		pagebuf_hold(x)
+#define XFS_BUF_READ(x)		((x)->pb_flags |= PBF_READ)
+#define XFS_BUF_UNREAD(x)	((x)->pb_flags &= ~PBF_READ)
+#define XFS_BUF_ISREAD(x)	((x)->pb_flags & PBF_READ)
+
+#define XFS_BUF_WRITE(x)	((x)->pb_flags |= PBF_WRITE)
+#define XFS_BUF_UNWRITE(x)	((x)->pb_flags &= ~PBF_WRITE)
+#define XFS_BUF_ISWRITE(x)	((x)->pb_flags & PBF_WRITE)
+
+#define XFS_BUF_ISUNINITIAL(x)	 (0)
+#define XFS_BUF_UNUNINITIAL(x)	 (0)
+
+#define XFS_BUF_BP_ISMAPPED(bp)	 1
+
+typedef struct page_buf_s xfs_buf_t;
+#define xfs_buf page_buf_s
+
+typedef struct pb_target xfs_buftarg_t;
+#define xfs_buftarg pb_target
+
+#define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
+#define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
+
+#define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
+#define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
+			(buf)->pb_iodone = (func)
+#define XFS_BUF_CLR_IODONE_FUNC(buf)		\
+			(buf)->pb_iodone = NULL
+#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)	\
+			(buf)->pb_strat = (func)
+#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)		\
+			(buf)->pb_strat = NULL
+
+#define XFS_BUF_FSPRIVATE(buf, type)		\
+			((type)(buf)->pb_fspriv)
+#define XFS_BUF_SET_FSPRIVATE(buf, value)	\
+			(buf)->pb_fspriv = (void *)(value)
+#define XFS_BUF_FSPRIVATE2(buf, type)		\
+			((type)(buf)->pb_fspriv2)
+#define XFS_BUF_SET_FSPRIVATE2(buf, value)	\
+			(buf)->pb_fspriv2 = (void *)(value)
+#define XFS_BUF_FSPRIVATE3(buf, type)		\
+			((type)(buf)->pb_fspriv3)
+#define XFS_BUF_SET_FSPRIVATE3(buf, value)	\
+			(buf)->pb_fspriv3  = (void *)(value)
+#define XFS_BUF_SET_START(buf)
+
+#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
+			(buf)->pb_relse = (value)
+
+#define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
+
+extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
+{
+	if (bp->pb_flags & PBF_MAPPED)
+		return XFS_BUF_PTR(bp) + offset;
+	return (xfs_caddr_t) pagebuf_offset(bp, offset);
+}
+
+#define XFS_BUF_SET_PTR(bp, val, count)		\
+				pagebuf_associate_memory(bp, val, count)
+#define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
+#define XFS_BUF_SET_ADDR(bp, blk)		\
+			((bp)->pb_bn = (page_buf_daddr_t)(blk))
+#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
+#define XFS_BUF_SET_OFFSET(bp, off)		\
+			((bp)->pb_file_offset = (off))
+#define XFS_BUF_COUNT(bp)	((bp)->pb_count_desired)
+#define XFS_BUF_SET_COUNT(bp, cnt)		\
+			((bp)->pb_count_desired = (cnt))
+#define XFS_BUF_SIZE(bp)	((bp)->pb_buffer_length)
+#define XFS_BUF_SET_SIZE(bp, cnt)		\
+			((bp)->pb_buffer_length = (cnt))
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define XFS_BUF_ISPINNED(bp)	pagebuf_ispin(bp)
+
+#define XFS_BUF_VALUSEMA(bp)	pagebuf_lock_value(bp)
+#define XFS_BUF_CPSEMA(bp)	(pagebuf_cond_lock(bp) == 0)
+#define XFS_BUF_VSEMA(bp)	pagebuf_unlock(bp)
+#define XFS_BUF_PSEMA(bp,x)	pagebuf_lock(bp)
+#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
+
+/* setup the buffer target from a buftarg structure */
+#define XFS_BUF_SET_TARGET(bp, target)	\
+		(bp)->pb_target = (target)
+#define XFS_BUF_TARGET(bp)	((bp)->pb_target)
+#define XFS_BUFTARG_NAME(target)	\
+		pagebuf_target_name(target)
+
+#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
+#define XFS_BUF_SET_VTYPE(bp, type)
+#define XFS_BUF_SET_REF(bp, ref)
+
+#define xfs_buf_read(target, blkno, len, flags) \
+		pagebuf_get((target), (blkno), (len), \
+			PBF_LOCK | PBF_READ | PBF_MAPPED | PBF_MAPPABLE)
+#define xfs_buf_get(target, blkno, len, flags) \
+		pagebuf_get((target), (blkno), (len), \
+			PBF_LOCK | PBF_MAPPED | PBF_MAPPABLE)
+
+#define xfs_buf_read_flags(target, blkno, len, flags) \
+		pagebuf_get((target), (blkno), (len), \
+			PBF_READ | PBF_MAPPABLE | flags)
+#define xfs_buf_get_flags(target, blkno, len, flags) \
+		pagebuf_get((target), (blkno), (len), \
+			PBF_MAPPABLE | flags)
+
+static inline int	xfs_bawrite(void *mp, page_buf_t *bp)
+{
+	bp->pb_fspriv3 = mp;
+	bp->pb_strat = xfs_bdstrat_cb;
+	xfs_buf_undelay(bp);
+	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | PBF_RUN_QUEUES);
+}
+
+static inline void	xfs_buf_relse(page_buf_t *bp)
+{
+	if ((bp->pb_flags & _PBF_LOCKABLE) && !bp->pb_relse)
+		pagebuf_unlock(bp);
+	pagebuf_rele(bp);
+}
+
+#define xfs_bpin(bp)		pagebuf_pin(bp)
+#define xfs_bunpin(bp)		pagebuf_unpin(bp)
+
+#define xfs_buftrace(id, bp)	\
+	    pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
+
+#define xfs_biodone(pb)		    \
+	    pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
+
+#define xfs_incore(buftarg,blkno,len,lockit) \
+	    pagebuf_find(buftarg, blkno ,len, lockit)
+
+
+#define xfs_biomove(pb, off, len, data, rw) \
+	    pagebuf_iomove((pb), (off), (len), (data), \
+		((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
+
+#define xfs_biozero(pb, off, len) \
+	    pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
+
+
+static inline int	XFS_bwrite(page_buf_t *pb)
+{
+	int	iowait = (pb->pb_flags & PBF_ASYNC) == 0;
+	int	error = 0;
+
+	pb->pb_flags |= PBF_SYNC;
+	if (!iowait)
+		pb->pb_flags |= PBF_RUN_QUEUES;
+
+	xfs_buf_undelay(pb);
+	pagebuf_iostrategy(pb);
+	if (iowait) {
+		error = pagebuf_iowait(pb);
+		xfs_buf_relse(pb);
+	}
+	return error;
+}
+
+#define XFS_bdwrite(pb)		     \
+	    pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
+
+static inline int xfs_bdwrite(void *mp, page_buf_t *bp)
+{
+	bp->pb_strat = xfs_bdstrat_cb;
+	bp->pb_fspriv3 = mp;
+
+	return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
+}
+
+#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
+
+#define xfs_iowait(pb)	pagebuf_iowait(pb)
+
+
+/*
+ * Go through all incore buffers, and release buffers
+ * if they belong to the given device. This is used in
+ * filesystem error handling to preserve the consistency
+ * of its metadata.
+ */
+
+#define xfs_binval(buftarg)	xfs_flush_buftarg(buftarg)
+
+#define XFS_bflush(buftarg)	xfs_flush_buftarg(buftarg)
+
+#define xfs_incore_relse(buftarg,delwri_only,wait)	\
+	xfs_relse_buftarg(buftarg)
+
+#define xfs_baread(target, rablkno, ralen)  \
+	pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
+
+#define xfs_buf_get_empty(len, target)	pagebuf_get_empty((len), (target))
+#define xfs_buf_get_noaddr(len, target)	pagebuf_get_no_daddr((len), (target))
+#define xfs_buf_free(bp)		pagebuf_free(bp)
+
+#endif	/* __XFS_BUF_H__ */
+
diff -Nru a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
--- a/fs/xfs/linux/xfs_globals.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/linux/xfs_globals.c	Thu Jan 29 23:15:58 2004
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -36,57 +36,8 @@
  */
 
 #include "xfs.h"
-
-#include "xfs_fs.h"
-#include "xfs_buf.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_clnt.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_imap.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bit.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_da_btree.h"
-#include "xfs_dir_leaf.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
-#include "xfs_dir2_trace.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_inode_item.h"
-#include "xfs_buf_item.h"
-#include "xfs_extfree_item.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
+#include "xfs_cred.h"
+#include "xfs_sysctl.h"
 
 /*
  * System memory size - used to scale certain data structures in XFS.
@@ -117,34 +68,3 @@
  */
 cred_t sys_cred_val, *sys_cred = &sys_cred_val;
 
-/*
- * Export symbols used for XFS debugging
- */
-EXPORT_SYMBOL(xfs_next_bit);
-EXPORT_SYMBOL(xfs_contig_bits);
-EXPORT_SYMBOL(xfs_bmbt_get_all);
-#if ARCH_CONVERT != ARCH_NOCONVERT
-EXPORT_SYMBOL(xfs_bmbt_disk_get_all);
-#endif
-
-/*
- * Export symbols used for XFS tracing
- */
-#ifdef XFS_ALLOC_TRACE
-EXPORT_SYMBOL(xfs_alloc_trace_buf);
-#endif
-#ifdef XFS_BMAP_TRACE
-EXPORT_SYMBOL(xfs_bmap_trace_buf);
-#endif
-#ifdef XFS_BMBT_TRACE
-EXPORT_SYMBOL(xfs_bmbt_trace_buf);
-#endif
-#ifdef XFS_ATTR_TRACE
-EXPORT_SYMBOL(xfs_attr_trace_buf);
-#endif  
-#ifdef XFS_DIR2_TRACE
-EXPORT_SYMBOL(xfs_dir2_trace_buf);
-#endif   
-#ifdef XFS_DIR_TRACE
-EXPORT_SYMBOL(xfs_dir_trace_buf);
-#endif
diff -Nru a/fs/xfs/linux/xfs_iomap.c b/fs/xfs/linux/xfs_iomap.c
--- a/fs/xfs/linux/xfs_iomap.c	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,795 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include "xfs.h"
-
-#include "xfs_fs.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir_sf.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bit.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_iomap.h"
-
-#define XFS_WRITEIO_ALIGN(mp,off)	(((off) >> mp->m_writeio_log) \
-						<< mp->m_writeio_log)
-#define XFS_STRAT_WRITE_IMAPS	2
-#define XFS_WRITE_IMAPS		XFS_BMAP_MAX_NMAP
-
-STATIC int
-xfs_imap_to_bmap(
-	xfs_iocore_t	*io,
-	xfs_off_t	offset,
-	xfs_bmbt_irec_t *imap,
-	xfs_iomap_t	*iomapp,
-	int		imaps,			/* Number of imap entries */
-	int		iomaps,			/* Number of iomap entries */
-	int		flags)
-{
-	xfs_mount_t	*mp;
-	xfs_fsize_t	nisize;
-	int		pbm;
-	xfs_fsblock_t	start_block;
-
-	mp = io->io_mount;
-	nisize = XFS_SIZE(mp, io);
-	if (io->io_new_size > nisize)
-		nisize = io->io_new_size;
-
-	for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
-		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-		iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-		iomapp->iomap_delta = offset - iomapp->iomap_offset;
-		iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
-		iomapp->iomap_flags = flags;
-
-		start_block = imap->br_startblock;
-		if (start_block == HOLESTARTBLOCK) {
-			iomapp->iomap_bn = IOMAP_DADDR_NULL;
-			iomapp->iomap_flags = IOMAP_HOLE;
-		} else if (start_block == DELAYSTARTBLOCK) {
-			iomapp->iomap_bn = IOMAP_DADDR_NULL;
-			iomapp->iomap_flags = IOMAP_DELAY;
-		} else {
-			iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
-			if (ISUNWRITTEN(imap))
-				iomapp->iomap_flags |= IOMAP_UNWRITTEN;
-		}
-
-		if ((iomapp->iomap_offset + iomapp->iomap_bsize) >= nisize) {
-			iomapp->iomap_flags |= IOMAP_EOF;
-		}
-
-		offset += iomapp->iomap_bsize - iomapp->iomap_delta;
-	}
-	return pbm;	/* Return the number filled */
-}
-
-int
-xfs_iomap(
-	xfs_iocore_t	*io,
-	xfs_off_t	offset,
-	ssize_t		count,
-	int		flags,
-	xfs_iomap_t	*iomapp,
-	int		*niomaps)
-{
-	xfs_mount_t	*mp = io->io_mount;
-	xfs_fileoff_t	offset_fsb, end_fsb;
-	int		error = 0;
-	int		lockmode = 0;
-	xfs_bmbt_irec_t	imap;
-	int		nimaps = 1;
-	int		bmapi_flags = 0;
-	int		iomap_flags = 0;
-
-	if (XFS_FORCED_SHUTDOWN(mp))
-		return XFS_ERROR(EIO);
-
-	switch (flags &
-		(BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
-		 BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
-	case BMAPI_READ:
-		lockmode = XFS_LCK_MAP_SHARED(mp, io);
-		bmapi_flags = XFS_BMAPI_ENTIRE;
-		if (flags & BMAPI_IGNSTATE)
-			bmapi_flags |= XFS_BMAPI_IGSTATE;
-		break;
-	case BMAPI_WRITE:
-		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
-		bmapi_flags = 0;
-		XFS_ILOCK(mp, io, lockmode);
-		break;
-	case BMAPI_ALLOCATE:
-		lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
-		bmapi_flags = XFS_BMAPI_ENTIRE;
-		/* Attempt non-blocking lock */
-		if (flags & BMAPI_TRYLOCK) {
-			if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
-				return XFS_ERROR(EAGAIN);
-		} else {
-			XFS_ILOCK(mp, io, lockmode);
-		}
-		break;
-	case BMAPI_UNWRITTEN:
-		goto phase2;
-	case BMAPI_DEVICE:
-		lockmode = XFS_LCK_MAP_SHARED(mp, io);
-		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
-			mp->m_rtdev_targp : mp->m_ddev_targp;
-		error = 0;
-		*niomaps = 1;
-		goto out;
-	default:
-		BUG();
-	}
-
-	ASSERT(offset <= mp->m_maxioffset);
-	if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
-		count = mp->m_maxioffset - offset;
-	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
-	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
-			(xfs_filblks_t)(end_fsb - offset_fsb),
-			bmapi_flags,  NULL, 0, &imap,
-			&nimaps, NULL);
-
-	if (error)
-		goto out;
-
-phase2:
-	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
-	case BMAPI_WRITE:
-		/* If we found an extent, return it */
-		if (nimaps && (imap.br_startblock != HOLESTARTBLOCK))
-			break;
-
-		if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
-			error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
-					count, flags, &imap, &nimaps, nimaps);
-		} else {
-			error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
-					flags, &imap, &nimaps);
-		}
-		iomap_flags = IOMAP_NEW;
-		break;
-	case BMAPI_ALLOCATE:
-		/* If we found an extent, return it */
-		XFS_IUNLOCK(mp, io, lockmode);
-		lockmode = 0;
-
-		if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock))
-			break;
-
-		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, &imap, &nimaps);
-		break;
-	case BMAPI_UNWRITTEN:
-		lockmode = 0;
-		error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
-		nimaps = 0;
-		break;
-	}
-
-	if (nimaps) {
-		*niomaps = xfs_imap_to_bmap(io, offset, &imap,
-					iomapp, nimaps, *niomaps, iomap_flags);
-	} else if (niomaps) {
-		*niomaps = 0;
-	}
-
-out:
-	if (lockmode)
-		XFS_IUNLOCK(mp, io, lockmode);
-	return XFS_ERROR(error);
-}
-
-STATIC int
-xfs_flush_space(
-	xfs_inode_t	*ip,
-	int		*fsynced,
-	int		*ioflags)
-{
-	switch (*fsynced) {
-	case 0:
-		if (ip->i_delayed_blks) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			xfs_flush_inode(ip);
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			*fsynced = 1;
-		} else {
-			*ioflags |= BMAPI_SYNC;
-			*fsynced = 2;
-		}
-		return 0;
-	case 1:
-		*fsynced = 2;
-		*ioflags |= BMAPI_SYNC;
-		return 0;
-	case 2:
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		xfs_flush_device(ip);
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		*fsynced = 3;
-		return 0;
-	}
-	return 1;
-}
-
-int
-xfs_iomap_write_direct(
-	xfs_inode_t	*ip,
-	loff_t		offset,
-	size_t		count,
-	int		flags,
-	xfs_bmbt_irec_t *ret_imap,
-	int		*nmaps,
-	int		found)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t	*io = &ip->i_iocore;
-	xfs_fileoff_t	offset_fsb;
-	xfs_fileoff_t	last_fsb;
-	xfs_filblks_t	count_fsb;
-	xfs_fsize_t	isize;
-	xfs_fsblock_t	firstfsb;
-	int		nimaps, maps;
-	int		error;
-	int		bmapi_flag;
-	int		rt;
-	xfs_trans_t	*tp;
-	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
-	xfs_bmap_free_t free_list;
-	int		aeof;
-	xfs_filblks_t	datablocks;
-	int		committed;
-	int		numrtextents;
-	uint		resblks;
-
-	/*
-	 * Make sure that the dquots are there. This doesn't hold
-	 * the ilock across a disk read.
-	 */
-
-	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
-	if (error)
-		return XFS_ERROR(error);
-
-	maps = min(XFS_WRITE_IMAPS, *nmaps);
-	nimaps = maps;
-
-	isize = ip->i_d.di_size;
-	aeof = (offset + count) > isize;
-
-	if (io->io_new_size > isize)
-		isize = io->io_new_size;
-
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-	count_fsb = last_fsb - offset_fsb;
-	if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
-		xfs_fileoff_t	map_last_fsb;
-
-		map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
-
-		if (map_last_fsb < last_fsb) {
-			last_fsb = map_last_fsb;
-			count_fsb = last_fsb - offset_fsb;
-		}
-		ASSERT(count_fsb > 0);
-	}
-
-	/*
-	 * determine if reserving space on
-	 * the data or realtime partition.
-	 */
-	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
-		int	sbrtextsize, iprtextsize;
-
-		sbrtextsize = mp->m_sb.sb_rextsize;
-		iprtextsize =
-			ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize;
-		numrtextents = (count_fsb + iprtextsize - 1);
-		do_div(numrtextents, sbrtextsize);
-		datablocks = 0;
-	} else {
-		datablocks = count_fsb;
-		numrtextents = 0;
-	}
-
-	/*
-	 * allocate and setup the transaction
-	 */
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-
-	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
-
-	error = xfs_trans_reserve(tp, resblks,
-			XFS_WRITE_LOG_RES(mp), numrtextents,
-			XFS_TRANS_PERM_LOG_RES,
-			XFS_WRITE_LOG_COUNT);
-
-	/*
-	 * check for running out of space
-	 */
-	if (error)
-		/*
-		 * Free the transaction structure.
-		 */
-		xfs_trans_cancel(tp, 0);
-
-	xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-	if (error)
-		goto error_out; /* Don't return in above if .. trans ..,
-					need lock to return */
-
-	if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) {
-		error = (EDQUOT);
-		goto error1;
-	}
-	nimaps = 1;
-
-	bmapi_flag = XFS_BMAPI_WRITE;
-	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-	xfs_trans_ihold(tp, ip);
-
-	if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
-		bmapi_flag |= XFS_BMAPI_PREALLOC;
-
-	/*
-	 * issue the bmapi() call to allocate the blocks
-	 */
-	XFS_BMAP_INIT(&free_list, &firstfsb);
-	imapp = &imap[0];
-	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-		bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list);
-	if (error) {
-		goto error0;
-	}
-
-	/*
-	 * complete the transaction
-	 */
-
-	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
-	if (error) {
-		goto error0;
-	}
-
-	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-	if (error) {
-		goto error_out;
-	}
-
-	/* copy any maps to caller's array and return any error. */
-	if (nimaps == 0) {
-		error = (ENOSPC);
-		goto error_out;
-	}
-
-	*ret_imap = imap[0];
-	*nmaps = 1;
-	return 0;
-
- error0:	/* Cancel bmap, unlock inode, and cancel trans */
-	xfs_bmap_cancel(&free_list);
-
- error1:	/* Just cancel transaction */
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-	*nmaps = 0;	/* nothing set-up here */
-
-error_out:
-	return XFS_ERROR(error);
-}
-
-int
-xfs_iomap_write_delay(
-	xfs_inode_t	*ip,
-	loff_t		offset,
-	size_t		count,
-	int		ioflag,
-	xfs_bmbt_irec_t *ret_imap,
-	int		*nmaps)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_iocore_t	*io = &ip->i_iocore;
-	xfs_fileoff_t	offset_fsb;
-	xfs_fileoff_t	last_fsb;
-	xfs_fsize_t	isize;
-	xfs_fsblock_t	firstblock;
-	int		nimaps;
-	int		error;
-	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-	int		aeof;
-	int		fsynced = 0;
-
-	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
-
-	/*
-	 * Make sure that the dquots are there. This doesn't hold
-	 * the ilock across a disk read.
-	 */
-
-	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
-	if (error)
-		return XFS_ERROR(error);
-
-retry:
-	isize = ip->i_d.di_size;
-	if (io->io_new_size > isize) {
-		isize = io->io_new_size;
-	}
-
-	aeof = 0;
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
-	/*
-	 * If the caller is doing a write at the end of the file,
-	 * then extend the allocation (and the buffer used for the write)
-	 * out to the file system's write iosize.  We clean up any extra
-	 * space left over when the file is closed in xfs_inactive().
-	 *
-	 * We don't bother with this for sync writes, because we need
-	 * to minimize the amount we write for good performance.
-	 */
-	if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
-		xfs_off_t	aligned_offset;
-		unsigned int	iosize;
-		xfs_fileoff_t	ioalign;
-
-		iosize = mp->m_writeio_blocks;
-		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
-		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-		last_fsb = ioalign + iosize;
-		aeof = 1;
-	}
-
-	nimaps = XFS_WRITE_IMAPS;
-	firstblock = NULLFSBLOCK;
-
-	/*
-	 * roundup the allocation request to m_dalign boundary if file size
-	 * is greater that 512K and we are allocating past the allocation eof
-	 */
-	if (mp->m_dalign && (isize >= mp->m_dalign) && aeof) {
-		int eof;
-		xfs_fileoff_t new_last_fsb;
-		new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
-		error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
-		if (error) {
-			return error;
-		}
-		if (eof) {
-			last_fsb = new_last_fsb;
-		}
-	}
-
-	error = xfs_bmapi(NULL, ip, offset_fsb,
-			  (xfs_filblks_t)(last_fsb - offset_fsb),
-			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
-			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
-			  &nimaps, NULL);
-	/*
-	 * This can be EDQUOT, if nimaps == 0
-	 */
-	if (error && (error != ENOSPC)) {
-		return XFS_ERROR(error);
-	}
-	/*
-	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-	 * then we must have run out of space.
-	 */
-
-	if (nimaps == 0) {
-		if (xfs_flush_space(ip, &fsynced, &ioflag))
-			return XFS_ERROR(ENOSPC);
-
-		error = 0;
-		goto retry;
-	}
-
-	*ret_imap = imap[0];
-	*nmaps = 1;
-	return 0;
-}
-
-/*
- * Pass in a delayed allocate extent, convert it to real extents;
- * return to the caller the extent we create which maps on top of
- * the originating callers request.
- *
- * Called without a lock on the inode.
- */
-int
-xfs_iomap_write_allocate(
-	xfs_inode_t	*ip,
-	xfs_bmbt_irec_t *map,
-	int		*retmap)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_fileoff_t	offset_fsb, last_block;
-	xfs_fileoff_t	end_fsb, map_start_fsb;
-	xfs_fsblock_t	first_block;
-	xfs_bmap_free_t	free_list;
-	xfs_filblks_t	count_fsb;
-	xfs_bmbt_irec_t	imap[XFS_STRAT_WRITE_IMAPS];
-	xfs_trans_t	*tp;
-	int		i, nimaps, committed;
-	int		error = 0;
-	int		nres;
-
-	*retmap = 0;
-
-	/*
-	 * Make sure that the dquots are there.
-	 */
-
-	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
-		return XFS_ERROR(error);
-
-	offset_fsb = map->br_startoff;
-	count_fsb = map->br_blockcount;
-	map_start_fsb = offset_fsb;
-
-	XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
-
-	while (count_fsb != 0) {
-		/*
-		 * Set up a transaction with which to allocate the
-		 * backing store for the file.  Do allocations in a
-		 * loop until we get some space in the range we are
-		 * interested in.  The other space that might be allocated
-		 * is in the delayed allocation extent on which we sit
-		 * but before our buffer starts.
-		 */
-
-		nimaps = 0;
-		while (nimaps == 0) {
-			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-			error = xfs_trans_reserve(tp, nres,
-					XFS_WRITE_LOG_RES(mp),
-					0, XFS_TRANS_PERM_LOG_RES,
-					XFS_WRITE_LOG_COUNT);
-
-			if (error == ENOSPC) {
-				error = xfs_trans_reserve(tp, 0,
-						XFS_WRITE_LOG_RES(mp),
-						0,
-						XFS_TRANS_PERM_LOG_RES,
-						XFS_WRITE_LOG_COUNT);
-			}
-			if (error) {
-				xfs_trans_cancel(tp, 0);
-				return XFS_ERROR(error);
-			}
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-			xfs_trans_ihold(tp, ip);
-
-			XFS_BMAP_INIT(&free_list, &first_block);
-
-			nimaps = XFS_STRAT_WRITE_IMAPS;
-			/*
-			 * Ensure we don't go beyond eof - it is possible
-			 * the extents changed since we did the read call,
-			 * we dropped the ilock in the interim.
-			 */
-
-			end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size);
-			xfs_bmap_last_offset(NULL, ip, &last_block,
-				XFS_DATA_FORK);
-			last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
-			if ((map_start_fsb + count_fsb) > last_block) {
-				count_fsb = last_block - map_start_fsb;
-				if (count_fsb == 0) {
-					error = EAGAIN;
-					goto trans_cancel;
-				}
-			}
-
-			/* Go get the actual blocks */
-			error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
-					XFS_BMAPI_WRITE, &first_block, 1,
-					imap, &nimaps, &free_list);
-
-			if (error)
-				goto trans_cancel;
-
-			error = xfs_bmap_finish(&tp, &free_list,
-					first_block, &committed);
-
-			if (error)
-				goto trans_cancel;
-
-			error = xfs_trans_commit(tp,
-					XFS_TRANS_RELEASE_LOG_RES, NULL);
-
-			if (error)
-				goto error0;
-
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		}
-
-		/*
-		 * See if we were able to allocate an extent that
-		 * covers at least part of the callers request
-		 */
-
-		for (i = 0; i < nimaps; i++) {
-			if ((map->br_startoff >= imap[i].br_startoff) &&
-			    (map->br_startoff < (imap[i].br_startoff +
-						 imap[i].br_blockcount))) {
-				*map = imap[i];
-				*retmap = 1;
-				XFS_STATS_INC(xs_xstrat_quick);
-				return 0;
-			}
-			count_fsb -= imap[i].br_blockcount;
-		}
-
-		/* So far we have not mapped the requested part of the
-		 * file, just surrounding data, try again.
-		 */
-		nimaps--;
-		offset_fsb = imap[nimaps].br_startoff +
-			     imap[nimaps].br_blockcount;
-		map_start_fsb = offset_fsb;
-	}
-
-trans_cancel:
-	xfs_bmap_cancel(&free_list);
-	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-error0:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return XFS_ERROR(error);
-}
-
-int
-xfs_iomap_write_unwritten(
-	xfs_inode_t	*ip,
-	loff_t		offset,
-	size_t		count)
-{
-	xfs_mount_t	*mp = ip->i_mount;
-	xfs_trans_t	*tp;
-	xfs_fileoff_t	offset_fsb;
-	xfs_filblks_t	count_fsb;
-	xfs_filblks_t	numblks_fsb;
-	xfs_bmbt_irec_t	imap;
-	int		committed;
-	int		error;
-	int		nres;
-	int		nimaps;
-	xfs_fsblock_t	firstfsb;
-	xfs_bmap_free_t	free_list;
-
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	count_fsb = XFS_B_TO_FSB(mp, count);
-
-	do {
-		nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-
-		/*
-		 * set up a transaction to convert the range of extents
-		 * from unwritten to real. Do allocations in a loop until
-		 * we have covered the range passed in.
-		 */
-
-		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
-		error = xfs_trans_reserve(tp, nres,
-				XFS_WRITE_LOG_RES(mp), 0,
-				XFS_TRANS_PERM_LOG_RES,
-				XFS_WRITE_LOG_COUNT);
-		if (error) {
-			xfs_trans_cancel(tp, 0);
-			goto error0;
-		}
-
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-		xfs_trans_ihold(tp, ip);
-
-		/*
-		 * Modify the unwritten extent state of the buffer.
-		 */
-		XFS_BMAP_INIT(&free_list, &firstfsb);
-		nimaps = 1;
-		error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
-				  XFS_BMAPI_WRITE, &firstfsb,
-				  1, &imap, &nimaps, &free_list);
-		if (error)
-			goto error_on_bmapi_transaction;
-
-		error = xfs_bmap_finish(&(tp), &(free_list),
-				firstfsb, &committed);
-		if (error)
-			goto error_on_bmapi_transaction;
-
-		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		if (error)
-			goto error0;
-
-		if ((numblks_fsb = imap.br_blockcount) == 0) {
-			/*
-			 * The numblks_fsb value should always get
-			 * smaller, otherwise the loop is stuck.
-			 */
-			ASSERT(imap.br_blockcount);
-			break;
-		}
-		offset_fsb += numblks_fsb;
-		count_fsb -= numblks_fsb;
-	} while (count_fsb > 0);
-
-	return 0;
-
-error_on_bmapi_transaction:
-	xfs_bmap_cancel(&free_list);
-	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
-	return XFS_ERROR(error);
-}
diff -Nru a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
--- a/fs/xfs/linux/xfs_linux.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/linux/xfs_linux.h	Thu Jan 29 23:15:58 2004
@@ -32,6 +32,44 @@
 #ifndef __XFS_LINUX__
 #define __XFS_LINUX__
 
+#include <linux/types.h>
+#include <linux/config.h>
+
+/*
+ * Some types are conditional depending on the target system.
+ * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
+ * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
+ * as requiring XFS_BIG_BLKNOS to be set.
+ */
+#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
+# define XFS_BIG_BLKNOS	1
+# if BITS_PER_LONG == 64
+#  define XFS_BIG_INUMS	1
+# else
+#  define XFS_BIG_INUMS	0
+# endif
+#else
+# define XFS_BIG_BLKNOS	0
+# define XFS_BIG_INUMS	0
+#endif
+
+#include <xfs_types.h>
+#include <xfs_arch.h>
+
+#include <kmem.h>
+#include <mrlock.h>
+#include <spin.h>
+#include <sv.h>
+#include <mutex.h>
+#include <sema.h>
+#include <time.h>
+
+#include <support/qsort.h>
+#include <support/ktrace.h>
+#include <support/debug.h>
+#include <support/move.h>
+#include <support/uuid.h>
+
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -55,19 +93,18 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
-#include <linux/xfs_behavior.h>
-#include <linux/xfs_vfs.h>
-#include <linux/xfs_cred.h>
-#include <linux/xfs_vnode.h>
-#include <linux/xfs_stats.h>
-#include <linux/xfs_sysctl.h>
-#include <linux/xfs_iops.h>
-#include <linux/xfs_super.h>
-#include <linux/xfs_globals.h>
-#include <linux/xfs_fs_subr.h>
-#include <linux/xfs_lrw.h>
-
-#include <pagebuf/page_buf.h>
+#include <xfs_behavior.h>
+#include <xfs_vfs.h>
+#include <xfs_cred.h>
+#include <xfs_vnode.h>
+#include <xfs_stats.h>
+#include <xfs_sysctl.h>
+#include <xfs_iops.h>
+#include <xfs_super.h>
+#include <xfs_globals.h>
+#include <xfs_fs_subr.h>
+#include <xfs_lrw.h>
+#include <xfs_buf.h>
 
 /*
  * Feature macros (disable/enable)
diff -Nru a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
--- a/fs/xfs/linux/xfs_super.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/linux/xfs_super.h	Thu Jan 29 23:15:58 2004
@@ -60,6 +60,14 @@
 # define set_posix_acl_flag(sb)	do { } while (0)
 #endif
 
+#ifdef CONFIG_XFS_SECURITY
+# define XFS_SECURITY_STRING	"security attrs, "
+# define ENOSECURITY		0
+#else
+# define XFS_SECURITY_STRING
+# define ENOSECURITY		EOPNOTSUPP
+#endif
+
 #ifdef CONFIG_XFS_RT
 # define XFS_REALTIME_STRING	"realtime, "
 #else
@@ -89,6 +97,7 @@
 #endif
 
 #define XFS_BUILD_OPTIONS	XFS_ACL_STRING \
+				XFS_SECURITY_STRING \
 				XFS_REALTIME_STRING \
 				XFS_BIGFS_STRING \
 				XFS_TRACE_STRING \
diff -Nru a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
--- a/fs/xfs/pagebuf/page_buf.c	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,2107 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- *	page_buf.c
- *
- *	The page_buf module provides an abstract buffer cache model on top of
- *	the Linux page cache.  Cached metadata blocks for a file system are
- *	hashed to the inode for the block device.  The page_buf module
- *	assembles buffer (page_buf_t) objects on demand to aggregate such
- *	cached pages for I/O.
- *
- *
- *      Written by Steve Lord, Jim Mostek, Russell Cattelan
- *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
- *
- */
-
-#include <linux/module.h>
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/blkdev.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/suspend.h>
-#include <linux/percpu.h>
-
-#include <support/ktrace.h>
-#include <support/debug.h>
-#include <support/kmem.h>
-
-#include "page_buf.h"
-
-#define BBSHIFT		9
-#define BN_ALIGN_MASK	((1 << (PAGE_CACHE_SHIFT - BBSHIFT)) - 1)
-
-#ifndef GFP_READAHEAD
-#define GFP_READAHEAD	(__GFP_NOWARN|__GFP_NORETRY)
-#endif
-
-/*
- * File wide globals
- */
-
-STATIC kmem_cache_t *pagebuf_cache;
-STATIC void pagebuf_daemon_wakeup(int);
-STATIC void pagebuf_delwri_queue(page_buf_t *, int);
-STATIC struct workqueue_struct *pagebuf_logio_workqueue;
-STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
-
-/*
- * Pagebuf module configuration parameters, exported via
- * /proc/sys/vm/pagebuf
- */
-
-typedef struct pb_sysctl_val {
-	int	min;
-	int	val;
-	int	max;
-} pb_sysctl_val_t;
-
-struct {
-	pb_sysctl_val_t	flush_interval;	/* interval between runs of the
-					 * delwri flush daemon.  */
-	pb_sysctl_val_t	age_buffer;	/* time for buffer to age before
-					 * we flush it.  */
-	pb_sysctl_val_t	stats_clear;	/* clear the pagebuf stats */
-	pb_sysctl_val_t	debug;		/* debug tracing on or off */
-} pb_params = {
-			  /*	MIN	DFLT	MAX	*/
-	.flush_interval	= {	HZ/2,	HZ,	30*HZ	},
-	.age_buffer	= {	1*HZ,	15*HZ,	300*HZ	},
-	.stats_clear	= {	0,	0,	1	},
-	.debug		= {	0,	0,	1	},
-};
-
-enum {
-	PB_FLUSH_INT = 1,
-	PB_FLUSH_AGE = 2,
-	PB_STATS_CLEAR = 3,
-	PB_DEBUG = 4,
-};
-
-/*
- * Pagebuf statistics variables
- */
-
-struct pbstats {
-	u_int32_t	pb_get;
-	u_int32_t	pb_create;
-	u_int32_t	pb_get_locked;
-	u_int32_t	pb_get_locked_waited;
-	u_int32_t	pb_busy_locked;
-	u_int32_t	pb_miss_locked;
-	u_int32_t	pb_page_retries;
-	u_int32_t	pb_page_found;
-	u_int32_t	pb_get_read;
-} pbstats;
-DEFINE_PER_CPU(struct pbstats, pbstats);
-
-/* We don't disable preempt, not too worried about poking the
- * wrong cpu's stat for now */
-#define PB_STATS_INC(count)	(__get_cpu_var(pbstats).count++)
-
-/*
- * Pagebuf debugging
- */
-
-#ifdef PAGEBUF_TRACE
-void
-pagebuf_trace(
-	page_buf_t	*pb,
-	char		*id,
-	void		*data,
-	void		*ra)
-{
-	if (!pb_params.debug.val)
-		return;
-	ktrace_enter(pagebuf_trace_buf,
-		pb, id,
-		(void *)(unsigned long)pb->pb_flags,
-		(void *)(unsigned long)pb->pb_hold.counter,
-		(void *)(unsigned long)pb->pb_sema.count.counter,
-		(void *)current,
-		data, ra,
-		(void *)(unsigned long)((pb->pb_file_offset>>32) & 0xffffffff),
-		(void *)(unsigned long)(pb->pb_file_offset & 0xffffffff),
-		(void *)(unsigned long)pb->pb_buffer_length,
-		NULL, NULL, NULL, NULL, NULL);
-}
-ktrace_t *pagebuf_trace_buf;
-EXPORT_SYMBOL(pagebuf_trace_buf);
-#define PAGEBUF_TRACE_SIZE	4096
-#define PB_TRACE(pb, id, data)	\
-	pagebuf_trace(pb, id, (void *)data, (void *)__builtin_return_address(0))
-#else
-#define PB_TRACE(pb, id, data)	do { } while (0)
-#endif
-
-#ifdef PAGEBUF_LOCK_TRACKING
-# define PB_SET_OWNER(pb)	((pb)->pb_last_holder = current->pid)
-# define PB_CLEAR_OWNER(pb)	((pb)->pb_last_holder = -1)
-# define PB_GET_OWNER(pb)	((pb)->pb_last_holder)
-#else
-# define PB_SET_OWNER(pb)	do { } while (0)
-# define PB_CLEAR_OWNER(pb)	do { } while (0)
-# define PB_GET_OWNER(pb)	do { } while (0)
-#endif
-
-/*
- * Pagebuf allocation / freeing.
- */
-
-#define pb_to_gfp(flags) \
-	(((flags) & PBF_READ_AHEAD) ? GFP_READAHEAD : \
-	 ((flags) & PBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL)
-
-#define pagebuf_allocate(flags) \
-	kmem_cache_alloc(pagebuf_cache, pb_to_gfp(flags))
-#define pagebuf_deallocate(pb) \
-	kmem_cache_free(pagebuf_cache, (pb));
-
-/*
- * Pagebuf hashing
- */
-
-#define NBITS	8
-#define NHASH	(1<<NBITS)
-
-typedef struct {
-	struct list_head	pb_hash;
-	int			pb_count;
-	spinlock_t		pb_hash_lock;
-} pb_hash_t;
-
-STATIC pb_hash_t	pbhash[NHASH];
-#define pb_hash(pb)	&pbhash[pb->pb_hash_index]
-
-STATIC int
-_bhash(
-	struct block_device *bdev,
-	loff_t		base)
-{
-	int		bit, hval;
-
-	base >>= 9;
-	base ^= (unsigned long)bdev / L1_CACHE_BYTES;
-	for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {
-		hval ^= (int)base & (NHASH-1);
-		base >>= NBITS;
-	}
-	return hval;
-}
-
-/*
- * Mapping of multi-page buffers into contiguous virtual space
- */
-
-STATIC void *pagebuf_mapout_locked(page_buf_t *);
-
-typedef struct a_list {
-	void		*vm_addr;
-	struct a_list	*next;
-} a_list_t;
-
-STATIC a_list_t		*as_free_head;
-STATIC int		as_list_len;
-STATIC spinlock_t	as_lock = SPIN_LOCK_UNLOCKED;
-
-/*
- * Try to batch vunmaps because they are costly.
- */
-STATIC void
-free_address(
-	void		*addr)
-{
-	a_list_t	*aentry;
-
-	aentry = kmalloc(sizeof(a_list_t), GFP_ATOMIC);
-	if (aentry) {
-		spin_lock(&as_lock);
-		aentry->next = as_free_head;
-		aentry->vm_addr = addr;
-		as_free_head = aentry;
-		as_list_len++;
-		spin_unlock(&as_lock);
-	} else {
-		vunmap(addr);
-	}
-}
-
-STATIC void
-purge_addresses(void)
-{
-	a_list_t	*aentry, *old;
-
-	if (as_free_head == NULL)
-		return;
-
-	spin_lock(&as_lock);
-	aentry = as_free_head;
-	as_free_head = NULL;
-	as_list_len = 0;
-	spin_unlock(&as_lock);
-
-	while ((old = aentry) != NULL) {
-		vunmap(aentry->vm_addr);
-		aentry = aentry->next;
-		kfree(old);
-	}
-}
-
-/*
- *	Internal pagebuf object manipulation
- */
-
-STATIC void
-_pagebuf_initialize(
-	page_buf_t		*pb,
-	pb_target_t		*target,
-	loff_t			range_base,
-	size_t			range_length,
-	page_buf_flags_t	flags)
-{
-	/*
-	 * We don't want certain flags to appear in pb->pb_flags.
-	 */
-	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
-
-	memset(pb, 0, sizeof(page_buf_t));
-	atomic_set(&pb->pb_hold, 1);
-	init_MUTEX_LOCKED(&pb->pb_iodonesema);
-	INIT_LIST_HEAD(&pb->pb_list);
-	INIT_LIST_HEAD(&pb->pb_hash_list);
-	init_MUTEX_LOCKED(&pb->pb_sema); /* held, no waiters */
-	PB_SET_OWNER(pb);
-	pb->pb_target = target;
-	pb->pb_file_offset = range_base;
-	/*
-	 * Set buffer_length and count_desired to the same value initially.
-	 * IO routines should use count_desired, which will be the same in
-	 * most cases but may be reset (e.g. XFS recovery).
-	 */
-	pb->pb_buffer_length = pb->pb_count_desired = range_length;
-	pb->pb_flags = flags | PBF_NONE;
-	pb->pb_bn = PAGE_BUF_DADDR_NULL;
-	atomic_set(&pb->pb_pin_count, 0);
-	init_waitqueue_head(&pb->pb_waiters);
-
-	PB_STATS_INC(pb_create);
-	PB_TRACE(pb, "initialize", target);
-}
-
-/*
- * Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
- */
-STATIC int
-_pagebuf_get_pages(
-	page_buf_t		*pb,
-	int			page_count,
-	page_buf_flags_t	flags)
-{
-	int			gpf_mask = pb_to_gfp(flags);
-
-	/* Make sure that we have a page list */
-	if (pb->pb_pages == NULL) {
-		pb->pb_offset = page_buf_poff(pb->pb_file_offset);
-		pb->pb_page_count = page_count;
-		if (page_count <= PB_PAGES) {
-			pb->pb_pages = pb->pb_page_array;
-		} else {
-			pb->pb_pages = kmalloc(sizeof(struct page *) *
-					page_count, gpf_mask);
-			if (pb->pb_pages == NULL)
-				return -ENOMEM;
-		}
-		memset(pb->pb_pages, 0, sizeof(struct page *) * page_count);
-	}
-	return 0;
-}
-
-/*
- * Walk a pagebuf releasing all the pages contained within it.
- */
-STATIC inline void
-_pagebuf_freepages(
-	page_buf_t		*pb)
-{
-	int			buf_index;
-
-	for (buf_index = 0; buf_index < pb->pb_page_count; buf_index++) {
-		struct page	*page = pb->pb_pages[buf_index];
-
-		if (page) {
-			pb->pb_pages[buf_index] = NULL;
-			page_cache_release(page);
-		}
-	}
-}
-
-/*
- *	_pagebuf_free_object
- *
- *	_pagebuf_free_object releases the contents specified buffer.
- *	The modification state of any associated pages is left unchanged.
- */
-void
-_pagebuf_free_object(
-	pb_hash_t		*hash,	/* hash bucket for buffer */
-	page_buf_t		*pb)	/* buffer to deallocate	*/
-{
-	page_buf_flags_t	pb_flags = pb->pb_flags;
-
-	PB_TRACE(pb, "free_object", 0);
-	pb->pb_flags |= PBF_FREED;
-
-	if (hash) {
-		if (!list_empty(&pb->pb_hash_list)) {
-			hash->pb_count--;
-			list_del_init(&pb->pb_hash_list);
-		}
-		spin_unlock(&hash->pb_hash_lock);
-	}
-
-	if (!(pb_flags & PBF_FREED)) {
-		/* release any virtual mapping */ ;
-		if (pb->pb_flags & _PBF_ADDR_ALLOCATED) {
-			void *vaddr = pagebuf_mapout_locked(pb);
-			if (vaddr) {
-				free_address(vaddr);
-			}
-		}
-
-		if (pb->pb_flags & _PBF_MEM_ALLOCATED) {
-			if (pb->pb_pages) {
-				/* release the pages in the address list */
-				if ((pb->pb_pages[0]) &&
-				    (pb->pb_flags & _PBF_MEM_SLAB)) {
-					kfree(pb->pb_addr);
-				} else {
-					_pagebuf_freepages(pb);
-				}
-				if (pb->pb_pages != pb->pb_page_array)
-					kfree(pb->pb_pages);
-				pb->pb_pages = NULL;
-			}
-			pb->pb_flags &= ~(_PBF_MEM_ALLOCATED|_PBF_MEM_SLAB);
-		}
-	}
-
-	pagebuf_deallocate(pb);
-}
-
-/*
- *	_pagebuf_lookup_pages
- *
- *	_pagebuf_lookup_pages finds all pages which match the buffer
- *	in question and the range of file offsets supplied,
- *	and builds the page list for the buffer, if the
- *	page list is not already formed or if not all of the pages are
- *	already in the list. Invalid pages (pages which have not yet been
- *	read in from disk) are assigned for any pages which are not found.
- */
-STATIC int
-_pagebuf_lookup_pages(
-	page_buf_t		*pb,
-	struct address_space	*aspace,
-	page_buf_flags_t	flags)
-{
-	loff_t			next_buffer_offset;
-	unsigned long		page_count, pi, index;
-	struct page		*page;
-	int			gfp_mask, retry_count = 5, rval = 0;
-	int			all_mapped, good_pages, nbytes;
-	unsigned int		blocksize, sectorshift;
-	size_t			size, offset;
-
-
-	/* For pagebufs where we want to map an address, do not use
-	 * highmem pages - so that we do not need to use kmap resources
-	 * to access the data.
-	 *
-	 * For pages where the caller has indicated there may be resource
-	 * contention (e.g. called from a transaction) do not flush
-	 * delalloc pages to obtain memory.
-	 */
-
-	if (flags & PBF_READ_AHEAD) {
-		gfp_mask = GFP_READAHEAD;
-		retry_count = 0;
-	} else if (flags & PBF_DONT_BLOCK) {
-		gfp_mask = GFP_NOFS;
-	} else if (flags & PBF_MAPPABLE) {
-		gfp_mask = GFP_KERNEL;
-	} else {
-		gfp_mask = GFP_HIGHUSER;
-	}
-
-	next_buffer_offset = pb->pb_file_offset + pb->pb_buffer_length;
-
-	good_pages = page_count = (page_buf_btoc(next_buffer_offset) -
-				   page_buf_btoct(pb->pb_file_offset));
-
-	if (pb->pb_flags & _PBF_ALL_PAGES_MAPPED) {
-		/* Bring pages forward in cache */
-		for (pi = 0; pi < page_count; pi++) {
-			mark_page_accessed(pb->pb_pages[pi]);
-		}
-		if ((flags & PBF_MAPPED) && !(pb->pb_flags & PBF_MAPPED)) {
-			all_mapped = 1;
-			goto mapit;
-		}
-		return 0;
-	}
-
-	/* Ensure pb_pages field has been initialised */
-	rval = _pagebuf_get_pages(pb, page_count, flags);
-	if (rval)
-		return rval;
-
-	rval = pi = 0;
-	blocksize = pb->pb_target->pbr_bsize;
-	sectorshift = pb->pb_target->pbr_sshift;
-	size = pb->pb_count_desired;
-	offset = pb->pb_offset;
-
-	/* Enter the pages in the page list */
-	index = (pb->pb_file_offset - pb->pb_offset) >> PAGE_CACHE_SHIFT;
-	for (all_mapped = 1; pi < page_count; pi++, index++) {
-		if (pb->pb_pages[pi] == 0) {
-		      retry:
-			page = find_or_create_page(aspace, index, gfp_mask);
-			if (!page) {
-				if (--retry_count > 0) {
-					PB_STATS_INC(pb_page_retries);
-					pagebuf_daemon_wakeup(1);
-					current->state = TASK_UNINTERRUPTIBLE;
-					schedule_timeout(10);
-					goto retry;
-				}
-				rval = -ENOMEM;
-				all_mapped = 0;
-				continue;
-			}
-			PB_STATS_INC(pb_page_found);
-			mark_page_accessed(page);
-			pb->pb_pages[pi] = page;
-		} else {
-			page = pb->pb_pages[pi];
-			lock_page(page);
-		}
-
-		nbytes = PAGE_CACHE_SIZE - offset;
-		if (nbytes > size)
-			nbytes = size;
-		size -= nbytes;
-
-		if (!PageUptodate(page)) {
-			if (blocksize == PAGE_CACHE_SIZE) {
-				if (flags & PBF_READ)
-					pb->pb_locked = 1;
-				good_pages--;
-			} else if (!PagePrivate(page)) {
-				unsigned long	i, range;
-
-				/*
-				 * In this case page->private holds a bitmap
-				 * of uptodate sectors within the page
-				 */
-				ASSERT(blocksize < PAGE_CACHE_SIZE);
-				range = (offset + nbytes) >> sectorshift;
-				for (i = offset >> sectorshift; i < range; i++)
-					if (!test_bit(i, &page->private))
-						break;
-				if (i != range)
-					good_pages--;
-			} else {
-				good_pages--;
-			}
-		}
-		offset = 0;
-	}
-
-	if (!pb->pb_locked) {
-		for (pi = 0; pi < page_count; pi++) {
-			if (pb->pb_pages[pi])
-				unlock_page(pb->pb_pages[pi]);
-		}
-	}
-
-mapit:
-	pb->pb_flags |= _PBF_MEM_ALLOCATED;
-	if (all_mapped) {
-		pb->pb_flags |= _PBF_ALL_PAGES_MAPPED;
-
-		/* A single page buffer is always mappable */
-		if (page_count == 1) {
-			pb->pb_addr = (caddr_t)
-				page_address(pb->pb_pages[0]) + pb->pb_offset;
-			pb->pb_flags |= PBF_MAPPED;
-		} else if (flags & PBF_MAPPED) {
-			if (as_list_len > 64)
-				purge_addresses();
-			pb->pb_addr = vmap(pb->pb_pages, page_count,
-					VM_MAP, PAGE_KERNEL);
-			if (pb->pb_addr == NULL)
-				return -ENOMEM;
-			pb->pb_addr += pb->pb_offset;
-			pb->pb_flags |= PBF_MAPPED | _PBF_ADDR_ALLOCATED;
-		}
-	}
-	/* If some pages were found with data in them
-	 * we are not in PBF_NONE state.
-	 */
-	if (good_pages != 0) {
-		pb->pb_flags &= ~(PBF_NONE);
-		if (good_pages != page_count) {
-			pb->pb_flags |= PBF_PARTIAL;
-		}
-	}
-
-	PB_TRACE(pb, "lookup_pages", (long)good_pages);
-
-	return rval;
-}
-
-/*
- *	Finding and Reading Buffers
- */
-
-/*
- *	_pagebuf_find
- *
- *	Looks up, and creates if absent, a lockable buffer for
- *	a given range of an inode.  The buffer is returned
- *	locked.	 If other overlapping buffers exist, they are
- *	released before the new buffer is created and locked,
- *	which may imply that this call will block until those buffers
- *	are unlocked.  No I/O is implied by this call.
- */
-STATIC page_buf_t *
-_pagebuf_find(				/* find buffer for block	*/
-	pb_target_t		*target,/* target for block		*/
-	loff_t			ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/
-	page_buf_t		*new_pb)/* newly allocated buffer	*/
-{
-	loff_t			range_base;
-	size_t			range_length;
-	int			hval;
-	pb_hash_t		*h;
-	struct list_head	*p;
-	page_buf_t		*pb;
-	int			not_locked;
-
-	range_base = (ioff << BBSHIFT);
-	range_length = (isize << BBSHIFT);
-
-	/* Ensure we never do IOs smaller than the sector size */
-	BUG_ON(range_length < (1 << target->pbr_sshift));
-
-	/* Ensure we never do IOs that are not sector aligned */
-	BUG_ON(range_base & (loff_t)target->pbr_smask);
-
-	hval = _bhash(target->pbr_bdev, range_base);
-	h = &pbhash[hval];
-
-	spin_lock(&h->pb_hash_lock);
-	list_for_each(p, &h->pb_hash) {
-		pb = list_entry(p, page_buf_t, pb_hash_list);
-
-		if ((target == pb->pb_target) &&
-		    (pb->pb_file_offset == range_base) &&
-		    (pb->pb_buffer_length == range_length)) {
-			if (pb->pb_flags & PBF_FREED)
-				break;
-			/* If we look at something bring it to the
-			 * front of the list for next time
-			 */
-			list_del(&pb->pb_hash_list);
-			list_add(&pb->pb_hash_list, &h->pb_hash);
-			goto found;
-		}
-	}
-
-	/* No match found */
-	if (new_pb) {
-		_pagebuf_initialize(new_pb, target, range_base,
-				range_length, flags | _PBF_LOCKABLE);
-		new_pb->pb_hash_index = hval;
-		h->pb_count++;
-		list_add(&new_pb->pb_hash_list, &h->pb_hash);
-	} else {
-		PB_STATS_INC(pb_miss_locked);
-	}
-
-	spin_unlock(&h->pb_hash_lock);
-	return (new_pb);
-
-found:
-	atomic_inc(&pb->pb_hold);
-	spin_unlock(&h->pb_hash_lock);
-
-	/* Attempt to get the semaphore without sleeping,
-	 * if this does not work then we need to drop the
-	 * spinlock and do a hard attempt on the semaphore.
-	 */
-	not_locked = down_trylock(&pb->pb_sema);
-	if (not_locked) {
-		if (!(flags & PBF_TRYLOCK)) {
-			/* wait for buffer ownership */
-			PB_TRACE(pb, "get_lock", 0);
-			pagebuf_lock(pb);
-			PB_STATS_INC(pb_get_locked_waited);
-		} else {
-			/* We asked for a trylock and failed, no need
-			 * to look at file offset and length here, we
-			 * know that this pagebuf at least overlaps our
-			 * pagebuf and is locked, therefore our buffer
-			 * either does not exist, or is this buffer
-			 */
-
-			pagebuf_rele(pb);
-			PB_STATS_INC(pb_busy_locked);
-			return (NULL);
-		}
-	} else {
-		/* trylock worked */
-		PB_SET_OWNER(pb);
-	}
-
-	if (pb->pb_flags & PBF_STALE)
-		pb->pb_flags &= PBF_MAPPABLE | \
-				PBF_MAPPED | \
-				_PBF_LOCKABLE | \
-				_PBF_ALL_PAGES_MAPPED | \
-				_PBF_ADDR_ALLOCATED | \
-				_PBF_MEM_ALLOCATED | \
-				_PBF_MEM_SLAB;
-	PB_TRACE(pb, "got_lock", 0);
-	PB_STATS_INC(pb_get_locked);
-	return (pb);
-}
-
-
-/*
- *	pagebuf_find
- *
- *	pagebuf_find returns a buffer matching the specified range of
- *	data for the specified target, if any of the relevant blocks
- *	are in memory.  The buffer may have unallocated holes, if
- *	some, but not all, of the blocks are in memory.  Even where
- *	pages are present in the buffer, not all of every page may be
- *	valid.
- */
-page_buf_t *
-pagebuf_find(				/* find buffer for block	*/
-					/* if the block is in memory	*/
-	pb_target_t		*target,/* target for block		*/
-	loff_t			ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
-{
-	return _pagebuf_find(target, ioff, isize, flags, NULL);
-}
-
-/*
- *	pagebuf_get
- *
- *	pagebuf_get assembles a buffer covering the specified range.
- *	Some or all of the blocks in the range may be valid.  Storage
- *	in memory for all portions of the buffer will be allocated,
- *	although backing storage may not be.  If PBF_READ is set in
- *	flags, pagebuf_iostart is called also.
- */
-page_buf_t *
-pagebuf_get(				/* allocate a buffer		*/
-	pb_target_t		*target,/* target for buffer		*/
-	loff_t			ioff,	/* starting offset of range	*/
-	size_t			isize,	/* length of range		*/
-	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
-{
-	page_buf_t		*pb, *new_pb;
-	int			error;
-
-	new_pb = pagebuf_allocate(flags);
-	if (unlikely(!new_pb))
-		return (NULL);
-
-	pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
-	if (pb != new_pb) {
-		pagebuf_deallocate(new_pb);
-		if (unlikely(!pb))
-			return (NULL);
-	}
-
-	PB_STATS_INC(pb_get);
-
-	/* fill in any missing pages */
-	error = _pagebuf_lookup_pages(pb, pb->pb_target->pbr_mapping, flags);
-	if (unlikely(error)) {
-		pagebuf_free(pb);
-		return (NULL);
-	}
-
-	/*
-	 * Always fill in the block number now, the mapped cases can do
-	 * their own overlay of this later.
-	 */
-	pb->pb_bn = ioff;
-	pb->pb_count_desired = pb->pb_buffer_length;
-
-	if (flags & PBF_READ) {
-		if (PBF_NOT_DONE(pb)) {
-			PB_TRACE(pb, "get_read", (unsigned long)flags);
-			PB_STATS_INC(pb_get_read);
-			pagebuf_iostart(pb, flags);
-		} else if (flags & PBF_ASYNC) {
-			PB_TRACE(pb, "get_read_async", (unsigned long)flags);
-			/*
-			 * Read ahead call which is already satisfied,
-			 * drop the buffer
-			 */
-			if (flags & (PBF_LOCK | PBF_TRYLOCK))
-				pagebuf_unlock(pb);
-			pagebuf_rele(pb);
-			return NULL;
-		} else {
-			PB_TRACE(pb, "get_read_done", (unsigned long)flags);
-			/* We do not want read in the flags */
-			pb->pb_flags &= ~PBF_READ;
-		}
-	} else {
-		PB_TRACE(pb, "get_write", (unsigned long)flags);
-	}
-	return (pb);
-}
-
-/*
- * Create a skeletal pagebuf (no pages associated with it).
- */
-page_buf_t *
-pagebuf_lookup(
-	struct pb_target	*target,
-	loff_t			ioff,
-	size_t			isize,
-	page_buf_flags_t	flags)
-{
-	page_buf_t		*pb;
-
-	pb = pagebuf_allocate(flags);
-	if (pb) {
-		_pagebuf_initialize(pb, target, ioff, isize, flags);
-	}
-	return pb;
-}
-
-/*
- * If we are not low on memory then do the readahead in a deadlock
- * safe manner.
- */
-void
-pagebuf_readahead(
-	pb_target_t		*target,
-	loff_t			ioff,
-	size_t			isize,
-	page_buf_flags_t	flags)
-{
-	struct backing_dev_info *bdi;
-
-	bdi = target->pbr_mapping->backing_dev_info;
-	if (bdi_read_congested(bdi))
-		return;
-	if (bdi_write_congested(bdi))
-		return;
-
-	flags |= (PBF_TRYLOCK|PBF_READ|PBF_ASYNC|PBF_MAPPABLE|PBF_READ_AHEAD);
-	pagebuf_get(target, ioff, isize, flags);
-}
-
-page_buf_t *
-pagebuf_get_empty(
-	size_t			len,
-	pb_target_t		*target)
-{
-	page_buf_t		*pb;
-
-	pb = pagebuf_allocate(_PBF_LOCKABLE);
-	if (pb)
-		_pagebuf_initialize(pb, target, 0, len, _PBF_LOCKABLE);
-	return pb;
-}
-
-static inline struct page *
-mem_to_page(
-	void			*addr)
-{
-	if (((unsigned long)addr < VMALLOC_START) ||
-	    ((unsigned long)addr >= VMALLOC_END)) {
-		return virt_to_page(addr);
-	} else {
-		return vmalloc_to_page(addr);
-	}
-}
-
-int
-pagebuf_associate_memory(
-	page_buf_t		*pb,
-	void			*mem,
-	size_t			len)
-{
-	int			rval;
-	int			i = 0;
-	size_t			ptr;
-	size_t			end, end_cur;
-	off_t			offset;
-	int			page_count;
-
-	page_count = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
-	offset = (off_t) mem - ((off_t)mem & PAGE_CACHE_MASK);
-	if (offset && (len > PAGE_CACHE_SIZE))
-		page_count++;
-
-	/* Free any previous set of page pointers */
-	if (pb->pb_pages && (pb->pb_pages != pb->pb_page_array)) {
-		kfree(pb->pb_pages);
-	}
-	pb->pb_pages = NULL;
-	pb->pb_addr = mem;
-
-	rval = _pagebuf_get_pages(pb, page_count, 0);
-	if (rval)
-		return rval;
-
-	pb->pb_offset = offset;
-	ptr = (size_t) mem & PAGE_CACHE_MASK;
-	end = PAGE_CACHE_ALIGN((size_t) mem + len);
-	end_cur = end;
-	/* set up first page */
-	pb->pb_pages[0] = mem_to_page(mem);
-
-	ptr += PAGE_CACHE_SIZE;
-	pb->pb_page_count = ++i;
-	while (ptr < end) {
-		pb->pb_pages[i] = mem_to_page((void *)ptr);
-		pb->pb_page_count = ++i;
-		ptr += PAGE_CACHE_SIZE;
-	}
-	pb->pb_locked = 0;
-
-	pb->pb_count_desired = pb->pb_buffer_length = len;
-	pb->pb_flags |= PBF_MAPPED;
-
-	return 0;
-}
-
-page_buf_t *
-pagebuf_get_no_daddr(
-	size_t			len,
-	pb_target_t		*target)
-{
-	int			rval;
-	void			*rmem = NULL;
-	page_buf_flags_t	flags = _PBF_LOCKABLE | PBF_FORCEIO;
-	page_buf_t		*pb;
-	size_t			tlen = 0;
-
-	if (unlikely(len > 0x20000))
-		return NULL;
-
-	pb = pagebuf_allocate(flags);
-	if (!pb)
-		return NULL;
-
-	_pagebuf_initialize(pb, target, 0, len, flags);
-
-	do {
-		if (tlen == 0) {
-			tlen = len; /* first time */
-		} else {
-			kfree(rmem); /* free the mem from the previous try */
-			tlen <<= 1; /* double the size and try again */
-		}
-		if ((rmem = kmalloc(tlen, GFP_KERNEL)) == 0) {
-			pagebuf_free(pb);
-			return NULL;
-		}
-	} while ((size_t)rmem != ((size_t)rmem & ~target->pbr_smask));
-
-	if ((rval = pagebuf_associate_memory(pb, rmem, len)) != 0) {
-		kfree(rmem);
-		pagebuf_free(pb);
-		return NULL;
-	}
-	/* otherwise pagebuf_free just ignores it */
-	pb->pb_flags |= (_PBF_MEM_ALLOCATED | _PBF_MEM_SLAB);
-	PB_CLEAR_OWNER(pb);
-	up(&pb->pb_sema);	/* Return unlocked pagebuf */
-
-	PB_TRACE(pb, "no_daddr", rmem);
-
-	return pb;
-}
-
-
-/*
- *	pagebuf_hold
- *
- *	Increment reference count on buffer, to hold the buffer concurrently
- *	with another thread which may release (free) the buffer asynchronously.
- *
- *	Must hold the buffer already to call this function.
- */
-void
-pagebuf_hold(
-	page_buf_t		*pb)
-{
-	atomic_inc(&pb->pb_hold);
-	PB_TRACE(pb, "hold", 0);
-}
-
-/*
- *	pagebuf_free
- *
- *	pagebuf_free releases the specified buffer.  The modification
- *	state of any associated pages is left unchanged.
- */
-void
-pagebuf_free(
-	page_buf_t		*pb)
-{
-	if (pb->pb_flags & _PBF_LOCKABLE) {
-		pb_hash_t	*h = pb_hash(pb);
-
-		spin_lock(&h->pb_hash_lock);
-		_pagebuf_free_object(h, pb);
-	} else {
-		_pagebuf_free_object(NULL, pb);
-	}
-}
-
-/*
- *	pagebuf_rele
- *
- *	pagebuf_rele releases a hold on the specified buffer.  If the
- *	the hold count is 1, pagebuf_rele calls pagebuf_free.
- */
-void
-pagebuf_rele(
-	page_buf_t		*pb)
-{
-	pb_hash_t		*h;
-
-	PB_TRACE(pb, "rele", pb->pb_relse);
-	if (pb->pb_flags & _PBF_LOCKABLE) {
-		h = pb_hash(pb);
-		spin_lock(&h->pb_hash_lock);
-	} else {
-		h = NULL;
-	}
-
-	if (atomic_dec_and_test(&pb->pb_hold)) {
-		int		do_free = 1;
-
-		if (pb->pb_relse) {
-			atomic_inc(&pb->pb_hold);
-			if (h)
-				spin_unlock(&h->pb_hash_lock);
-			(*(pb->pb_relse)) (pb);
-			do_free = 0;
-		}
-		if (pb->pb_flags & PBF_DELWRI) {
-			pb->pb_flags |= PBF_ASYNC;
-			atomic_inc(&pb->pb_hold);
-			if (h && do_free)
-				spin_unlock(&h->pb_hash_lock);
-			pagebuf_delwri_queue(pb, 0);
-			do_free = 0;
-		} else if (pb->pb_flags & PBF_FS_MANAGED) {
-			if (h)
-				spin_unlock(&h->pb_hash_lock);
-			do_free = 0;
-		}
-
-		if (do_free) {
-			_pagebuf_free_object(h, pb);
-		}
-	} else if (h) {
-		spin_unlock(&h->pb_hash_lock);
-	}
-}
-
-
-/*
- *	Mutual exclusion on buffers.  Locking model:
- *
- *	Buffers associated with inodes for which buffer locking
- *	is not enabled are not protected by semaphores, and are
- *	assumed to be exclusively owned by the caller.  There is a
- *	spinlock in the buffer, used by the caller when concurrent
- *	access is possible.
- */
-
-/*
- *	pagebuf_cond_lock
- *
- *	pagebuf_cond_lock locks a buffer object, if it is not already locked.
- *	Note that this in no way
- *	locks the underlying pages, so it is only useful for synchronizing
- *	concurrent use of page buffer objects, not for synchronizing independent
- *	access to the underlying pages.
- */
-int
-pagebuf_cond_lock(			/* lock buffer, if not locked	*/
-					/* returns -EBUSY if locked)	*/
-	page_buf_t		*pb)
-{
-	int			locked;
-
-	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
-	locked = down_trylock(&pb->pb_sema) == 0;
-	if (locked) {
-		PB_SET_OWNER(pb);
-	}
-	PB_TRACE(pb, "cond_lock", (long)locked);
-	return(locked ? 0 : -EBUSY);
-}
-
-/*
- *	pagebuf_lock_value
- *
- *	Return lock value for a pagebuf
- */
-int
-pagebuf_lock_value(
-	page_buf_t		*pb)
-{
-	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
-	return(atomic_read(&pb->pb_sema.count));
-}
-
-/*
- *	pagebuf_lock
- *
- *	pagebuf_lock locks a buffer object.  Note that this in no way
- *	locks the underlying pages, so it is only useful for synchronizing
- *	concurrent use of page buffer objects, not for synchronizing independent
- *	access to the underlying pages.
- */
-int
-pagebuf_lock(
-	page_buf_t		*pb)
-{
-	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
-
-	PB_TRACE(pb, "lock", 0);
-	if (atomic_read(&pb->pb_io_remaining))
-		blk_run_queues();
-	down(&pb->pb_sema);
-	PB_SET_OWNER(pb);
-	PB_TRACE(pb, "locked", 0);
-	return 0;
-}
-
-/*
- *	pagebuf_unlock
- *
- *	pagebuf_unlock releases the lock on the buffer object created by
- *	pagebuf_lock or pagebuf_cond_lock (not any
- *	pinning of underlying pages created by pagebuf_pin).
- */
-void
-pagebuf_unlock(				/* unlock buffer		*/
-	page_buf_t		*pb)	/* buffer to unlock		*/
-{
-	ASSERT(pb->pb_flags & _PBF_LOCKABLE);
-	PB_CLEAR_OWNER(pb);
-	up(&pb->pb_sema);
-	PB_TRACE(pb, "unlock", 0);
-}
-
-
-/*
- *	Pinning Buffer Storage in Memory
- */
-
-/*
- *	pagebuf_pin
- *
- *	pagebuf_pin locks all of the memory represented by a buffer in
- *	memory.  Multiple calls to pagebuf_pin and pagebuf_unpin, for
- *	the same or different buffers affecting a given page, will
- *	properly count the number of outstanding "pin" requests.  The
- *	buffer may be released after the pagebuf_pin and a different
- *	buffer used when calling pagebuf_unpin, if desired.
- *	pagebuf_pin should be used by the file system when it wants be
- *	assured that no attempt will be made to force the affected
- *	memory to disk.	 It does not assure that a given logical page
- *	will not be moved to a different physical page.
- */
-void
-pagebuf_pin(
-	page_buf_t		*pb)
-{
-	atomic_inc(&pb->pb_pin_count);
-	PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
-}
-
-/*
- *	pagebuf_unpin
- *
- *	pagebuf_unpin reverses the locking of memory performed by
- *	pagebuf_pin.  Note that both functions affected the logical
- *	pages associated with the buffer, not the buffer itself.
- */
-void
-pagebuf_unpin(
-	page_buf_t		*pb)
-{
-	if (atomic_dec_and_test(&pb->pb_pin_count)) {
-		wake_up_all(&pb->pb_waiters);
-	}
-	PB_TRACE(pb, "unpin", (long)pb->pb_pin_count.counter);
-}
-
-int
-pagebuf_ispin(
-	page_buf_t		*pb)
-{
-	return atomic_read(&pb->pb_pin_count);
-}
-
-/*
- *	pagebuf_wait_unpin
- *
- *	pagebuf_wait_unpin waits until all of the memory associated
- *	with the buffer is not longer locked in memory.  It returns
- *	immediately if none of the affected pages are locked.
- */
-static inline void
-_pagebuf_wait_unpin(
-	page_buf_t		*pb)
-{
-	DECLARE_WAITQUEUE	(wait, current);
-
-	if (atomic_read(&pb->pb_pin_count) == 0)
-		return;
-
-	add_wait_queue(&pb->pb_waiters, &wait);
-	for (;;) {
-		current->state = TASK_UNINTERRUPTIBLE;
-		if (atomic_read(&pb->pb_pin_count) == 0)
-			break;
-		if (atomic_read(&pb->pb_io_remaining))
-			blk_run_queues();
-		schedule();
-	}
-	remove_wait_queue(&pb->pb_waiters, &wait);
-	current->state = TASK_RUNNING;
-}
-
-/*
- *	Buffer Utility Routines
- */
-
-/*
- *	pagebuf_iodone
- *
- *	pagebuf_iodone marks a buffer for which I/O is in progress
- *	done with respect to that I/O.	The pb_iodone routine, if
- *	present, will be called as a side-effect.
- */
-void
-pagebuf_iodone_work(
-	void			*v)
-{
-	page_buf_t		*pb = (page_buf_t *)v;
-
-	if (pb->pb_iodone) {
-		(*(pb->pb_iodone)) (pb);
-		return;
-	}
-
-	if (pb->pb_flags & PBF_ASYNC) {
-		if ((pb->pb_flags & _PBF_LOCKABLE) && !pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
-	}
-}
-
-void
-pagebuf_iodone(
-	page_buf_t		*pb,
-	int			dataio,
-	int			schedule)
-{
-	pb->pb_flags &= ~(PBF_READ | PBF_WRITE);
-	if (pb->pb_error == 0) {
-		pb->pb_flags &= ~(PBF_PARTIAL | PBF_NONE);
-	}
-
-	PB_TRACE(pb, "iodone", pb->pb_iodone);
-
-	if ((pb->pb_iodone) || (pb->pb_flags & PBF_ASYNC)) {
-		if (schedule) {
-			INIT_WORK(&pb->pb_iodone_work, pagebuf_iodone_work, pb);
-			queue_work(dataio ? pagebuf_dataio_workqueue :
-				pagebuf_logio_workqueue, &pb->pb_iodone_work);
-		} else {
-			pagebuf_iodone_work(pb);
-		}
-	} else {
-		up(&pb->pb_iodonesema);
-	}
-}
-
-/*
- *	pagebuf_ioerror
- *
- *	pagebuf_ioerror sets the error code for a buffer.
- */
-void
-pagebuf_ioerror(			/* mark/clear buffer error flag */
-	page_buf_t		*pb,	/* buffer to mark		*/
-	unsigned int		error)	/* error to store (0 if none)	*/
-{
-	pb->pb_error = error;
-	PB_TRACE(pb, "ioerror", (unsigned long)error);
-}
-
-/*
- *	pagebuf_iostart
- *
- *	pagebuf_iostart initiates I/O on a buffer, based on the flags supplied.
- *	If necessary, it will arrange for any disk space allocation required,
- *	and it will break up the request if the block mappings require it.
- *	The pb_iodone routine in the buffer supplied will only be called
- *	when all of the subsidiary I/O requests, if any, have been completed.
- *	pagebuf_iostart calls the pagebuf_ioinitiate routine or
- *	pagebuf_iorequest, if the former routine is not defined, to start
- *	the I/O on a given low-level request.
- */
-int
-pagebuf_iostart(			/* start I/O on a buffer	  */
-	page_buf_t		*pb,	/* buffer to start		  */
-	page_buf_flags_t	flags)	/* PBF_LOCK, PBF_ASYNC, PBF_READ, */
-					/* PBF_WRITE, PBF_DELWRI,	  */
-					/* PBF_SYNC, PBF_DONT_BLOCK	  */
-{
-	int			status = 0;
-
-	PB_TRACE(pb, "iostart", (unsigned long)flags);
-
-	if (flags & PBF_DELWRI) {
-		pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC);
-		pb->pb_flags |= flags &
-				(PBF_DELWRI | PBF_ASYNC | PBF_SYNC);
-		pagebuf_delwri_queue(pb, 1);
-		return status;
-	}
-
-	pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | \
-			PBF_DELWRI | PBF_READ_AHEAD | PBF_RUN_QUEUES);
-	pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
-			PBF_SYNC | PBF_READ_AHEAD | PBF_RUN_QUEUES);
-
-	BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL);
-
-	/* For writes allow an alternate strategy routine to precede
-	 * the actual I/O request (which may not be issued at all in
-	 * a shutdown situation, for example).
-	 */
-	status = (flags & PBF_WRITE) ?
-		pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
-
-	/* Wait for I/O if we are not an async request.
-	 * Note: async I/O request completion will release the buffer,
-	 * and that can already be done by this point.  So using the
-	 * buffer pointer from here on, after async I/O, is invalid.
-	 */
-	if (!status && !(flags & PBF_ASYNC))
-		status = pagebuf_iowait(pb);
-
-	return status;
-}
-
-/*
- * Helper routine for pagebuf_iorequest
- */
-
-STATIC __inline__ int
-_pagebuf_iolocked(
-	page_buf_t		*pb)
-{
-	ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
-	if (pb->pb_flags & PBF_READ)
-		return pb->pb_locked;
-	return ((pb->pb_flags & _PBF_LOCKABLE) == 0);
-}
-
-STATIC __inline__ void
-_pagebuf_iodone(
-	page_buf_t		*pb,
-	int			schedule)
-{
-	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-		pb->pb_locked = 0;
-		pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), schedule);
-	}
-}
-
-STATIC int
-bio_end_io_pagebuf(
-	struct bio		*bio,
-	unsigned int		bytes_done,
-	int			error)
-{
-	page_buf_t		*pb = (page_buf_t *)bio->bi_private;
-	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
-	unsigned int		sectorshift = pb->pb_target->pbr_sshift;
-	struct bio_vec		*bvec = bio->bi_io_vec;
-
-	if (bio->bi_size)
-		return 1;
-
-	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-		pb->pb_error = EIO;
-
-	for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
-		struct page	*page = bvec->bv_page;
-
-		if (pb->pb_error) {
-			SetPageError(page);
-		} else if (blocksize == PAGE_CACHE_SIZE) {
-			SetPageUptodate(page);
-		} else if (!PagePrivate(page)) {
-			unsigned int	j, range;
-
-			ASSERT(blocksize < PAGE_CACHE_SIZE);
-			range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
-			for (j = bvec->bv_offset >> sectorshift; j < range; j++)
-				set_bit(j, &page->private);
-			if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))
-				SetPageUptodate(page);
-		}
-
-		if (_pagebuf_iolocked(pb)) {
-			unlock_page(page);
-		}
-	}
-
-	_pagebuf_iodone(pb, 1);
-	bio_put(bio);
-	return 0;
-}
-
-void
-_pagebuf_ioapply(
-	page_buf_t		*pb)
-{
-	int			i, map_i, total_nr_pages, nr_pages;
-	struct bio		*bio;
-	int			offset = pb->pb_offset;
-	int			size = pb->pb_count_desired;
-	sector_t		sector = pb->pb_bn;
-	unsigned int		blocksize = pb->pb_target->pbr_bsize;
-	int			locking = _pagebuf_iolocked(pb);
-
-	total_nr_pages = pb->pb_page_count;
-	map_i = 0;
-
-	/* Special code path for reading a sub page size pagebuf in --
-	 * we populate up the whole page, and hence the other metadata
-	 * in the same page.  This optimization is only valid when the
-	 * filesystem block size and the page size are equal.
-	 */
-	if ((pb->pb_buffer_length < PAGE_CACHE_SIZE) &&
-	    (pb->pb_flags & PBF_READ) && locking &&
-	    (blocksize == PAGE_CACHE_SIZE)) {
-		bio = bio_alloc(GFP_NOIO, 1);
-
-		bio->bi_bdev = pb->pb_target->pbr_bdev;
-		bio->bi_sector = sector - (offset >> BBSHIFT);
-		bio->bi_end_io = bio_end_io_pagebuf;
-		bio->bi_private = pb;
-
-		bio_add_page(bio, pb->pb_pages[0], PAGE_CACHE_SIZE, 0);
-		size = 0;
-
-		atomic_inc(&pb->pb_io_remaining);
-
-		goto submit_io;
-	}
-
-	/* Lock down the pages which we need to for the request */
-	if (locking && (pb->pb_flags & PBF_WRITE) && (pb->pb_locked == 0)) {
-		for (i = 0; size; i++) {
-			int		nbytes = PAGE_CACHE_SIZE - offset;
-			struct page	*page = pb->pb_pages[i];
-
-			if (nbytes > size)
-				nbytes = size;
-
-			lock_page(page);
-
-			size -= nbytes;
-			offset = 0;
-		}
-		offset = pb->pb_offset;
-		size = pb->pb_count_desired;
-	}
-
-next_chunk:
-	atomic_inc(&pb->pb_io_remaining);
-	nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
-	if (nr_pages > total_nr_pages)
-		nr_pages = total_nr_pages;
-
-	bio = bio_alloc(GFP_NOIO, nr_pages);
-	bio->bi_bdev = pb->pb_target->pbr_bdev;
-	bio->bi_sector = sector;
-	bio->bi_end_io = bio_end_io_pagebuf;
-	bio->bi_private = pb;
-
-	for (; size && nr_pages; nr_pages--, map_i++) {
-		int	nbytes = PAGE_CACHE_SIZE - offset;
-
-		if (nbytes > size)
-			nbytes = size;
-
-		if (bio_add_page(bio, pb->pb_pages[map_i],
-					nbytes, offset) < nbytes)
-			break;
-
-		offset = 0;
-		sector += nbytes >> BBSHIFT;
-		size -= nbytes;
-		total_nr_pages--;
-	}
-
-submit_io:
-	if (likely(bio->bi_size)) {
-		submit_bio((pb->pb_flags & PBF_READ) ? READ : WRITE, bio);
-		if (size)
-			goto next_chunk;
-	} else {
-		bio_put(bio);
-		pagebuf_ioerror(pb, EIO);
-	}
-
-	if (pb->pb_flags & PBF_RUN_QUEUES) {
-		pb->pb_flags &= ~PBF_RUN_QUEUES;
-		if (atomic_read(&pb->pb_io_remaining) > 1)
-			blk_run_queues();
-	}
-}
-
-/*
- *	pagebuf_iorequest
- *
- *	pagebuf_iorequest is the core I/O request routine.
- *	It assumes that the buffer is well-formed and
- *	mapped and ready for physical I/O, unlike
- *	pagebuf_iostart() and pagebuf_iophysio().  Those
- *	routines call the pagebuf_ioinitiate routine to start I/O,
- *	if it is present, or else call pagebuf_iorequest()
- *	directly if the pagebuf_ioinitiate routine is not present.
- *
- *	This function will be responsible for ensuring access to the
- *	pages is restricted whilst I/O is in progress - for locking
- *	pagebufs the pagebuf lock is the mediator, for non-locking
- *	pagebufs the pages will be locked. In the locking case we
- *	need to use the pagebuf lock as multiple meta-data buffers
- *	will reference the same page.
- */
-int
-pagebuf_iorequest(			/* start real I/O		*/
-	page_buf_t		*pb)	/* buffer to convey to device	*/
-{
-	PB_TRACE(pb, "iorequest", 0);
-
-	if (pb->pb_flags & PBF_DELWRI) {
-		pagebuf_delwri_queue(pb, 1);
-		return 0;
-	}
-
-	if (pb->pb_flags & PBF_WRITE) {
-		_pagebuf_wait_unpin(pb);
-	}
-
-	pagebuf_hold(pb);
-
-	/* Set the count to 1 initially, this will stop an I/O
-	 * completion callout which happens before we have started
-	 * all the I/O from calling pagebuf_iodone too early.
-	 */
-	atomic_set(&pb->pb_io_remaining, 1);
-	_pagebuf_ioapply(pb);
-	_pagebuf_iodone(pb, 0);
-
-	pagebuf_rele(pb);
-	return 0;
-}
-
-/*
- *	pagebuf_iowait
- *
- *	pagebuf_iowait waits for I/O to complete on the buffer supplied.
- *	It returns immediately if no I/O is pending.  In any case, it returns
- *	the error code, if any, or 0 if there is no error.
- */
-int
-pagebuf_iowait(
-	page_buf_t		*pb)
-{
-	PB_TRACE(pb, "iowait", 0);
-	if (atomic_read(&pb->pb_io_remaining))
-		blk_run_queues();
-	down(&pb->pb_iodonesema);
-	PB_TRACE(pb, "iowaited", (long)pb->pb_error);
-	return pb->pb_error;
-}
-
-STATIC void *
-pagebuf_mapout_locked(
-	page_buf_t		*pb)
-{
-	void			*old_addr = NULL;
-
-	if (pb->pb_flags & PBF_MAPPED) {
-		if (pb->pb_flags & _PBF_ADDR_ALLOCATED)
-			old_addr = pb->pb_addr - pb->pb_offset;
-		pb->pb_addr = NULL;
-		pb->pb_flags &= ~(PBF_MAPPED | _PBF_ADDR_ALLOCATED);
-	}
-
-	return old_addr;	/* Caller must free the address space,
-				 * we are under a spin lock, probably
-				 * not safe to do vfree here
-				 */
-}
-
-caddr_t
-pagebuf_offset(
-	page_buf_t		*pb,
-	size_t			offset)
-{
-	struct page		*page;
-
-	offset += pb->pb_offset;
-
-	page = pb->pb_pages[offset >> PAGE_CACHE_SHIFT];
-	return (caddr_t) page_address(page) + (offset & (PAGE_CACHE_SIZE - 1));
-}
-
-/*
- *	pagebuf_iomove
- *
- *	Move data into or out of a buffer.
- */
-void
-pagebuf_iomove(
-	page_buf_t		*pb,	/* buffer to process		*/
-	size_t			boff,	/* starting buffer offset	*/
-	size_t			bsize,	/* length to copy		*/
-	caddr_t			data,	/* data address			*/
-	page_buf_rw_t		mode)	/* read/write flag		*/
-{
-	size_t			bend, cpoff, csize;
-	struct page		*page;
-
-	bend = boff + bsize;
-	while (boff < bend) {
-		page = pb->pb_pages[page_buf_btoct(boff + pb->pb_offset)];
-		cpoff = page_buf_poff(boff + pb->pb_offset);
-		csize = min_t(size_t,
-			      PAGE_CACHE_SIZE-cpoff, pb->pb_count_desired-boff);
-
-		ASSERT(((csize + cpoff) <= PAGE_CACHE_SIZE));
-
-		switch (mode) {
-		case PBRW_ZERO:
-			memset(page_address(page) + cpoff, 0, csize);
-			break;
-		case PBRW_READ:
-			memcpy(data, page_address(page) + cpoff, csize);
-			break;
-		case PBRW_WRITE:
-			memcpy(page_address(page) + cpoff, data, csize);
-		}
-
-		boff += csize;
-		data += csize;
-	}
-}
-
-
-/*
- * Pagebuf delayed write buffer handling
- */
-
-STATIC int pbd_active = 1;
-STATIC LIST_HEAD(pbd_delwrite_queue);
-STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
-
-STATIC void
-pagebuf_delwri_queue(
-	page_buf_t		*pb,
-	int			unlock)
-{
-	PB_TRACE(pb, "delwri_q", (long)unlock);
-	spin_lock(&pbd_delwrite_lock);
-	/* If already in the queue, dequeue and place at tail */
-	if (!list_empty(&pb->pb_list)) {
-		if (unlock) {
-			atomic_dec(&pb->pb_hold);
-		}
-		list_del(&pb->pb_list);
-	}
-
-	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
-	pb->pb_flushtime = jiffies + pb_params.age_buffer.val;
-	spin_unlock(&pbd_delwrite_lock);
-
-	if (unlock && (pb->pb_flags & _PBF_LOCKABLE)) {
-		pagebuf_unlock(pb);
-	}
-}
-
-void
-pagebuf_delwri_dequeue(
-	page_buf_t		*pb)
-{
-	PB_TRACE(pb, "delwri_uq", 0);
-	spin_lock(&pbd_delwrite_lock);
-	list_del_init(&pb->pb_list);
-	pb->pb_flags &= ~PBF_DELWRI;
-	spin_unlock(&pbd_delwrite_lock);
-}
-
-STATIC void
-pagebuf_runall_queues(
-	struct workqueue_struct	*queue)
-{
-	flush_workqueue(queue);
-}
-
-/* Defines for pagebuf daemon */
-DECLARE_WAIT_QUEUE_HEAD(pbd_waitq);
-STATIC int force_flush;
-
-STATIC void
-pagebuf_daemon_wakeup(
-	int			flag)
-{
-	force_flush = flag;
-	if (waitqueue_active(&pbd_waitq)) {
-		wake_up_interruptible(&pbd_waitq);
-	}
-}
-
-typedef void (*timeout_fn)(unsigned long);
-
-STATIC int
-pagebuf_daemon(
-	void			*data)
-{
-	int			count;
-	page_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
-	struct timer_list	pb_daemon_timer =
-		TIMER_INITIALIZER((timeout_fn)pagebuf_daemon_wakeup, 0, 0);
-
-	/*  Set up the thread  */
-	daemonize("pagebufd");
-
-	current->flags |= PF_MEMALLOC;
-
-	INIT_LIST_HEAD(&tmp);
-	do {
-		/* swsusp */
-		if (current->flags & PF_FREEZE)
-			refrigerator(PF_IOTHREAD);
-
-		if (pbd_active == 1) {
-			mod_timer(&pb_daemon_timer,
-				  jiffies + pb_params.flush_interval.val);
-			interruptible_sleep_on(&pbd_waitq);
-		}
-
-		if (pbd_active == 0) {
-			del_timer_sync(&pb_daemon_timer);
-		}
-
-		spin_lock(&pbd_delwrite_lock);
-
-		count = 0;
-		list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-			pb = list_entry(curr, page_buf_t, pb_list);
-
-			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
-
-			if ((pb->pb_flags & PBF_DELWRI) && !pagebuf_ispin(pb) &&
-			    (((pb->pb_flags & _PBF_LOCKABLE) == 0) ||
-			     !pagebuf_cond_lock(pb))) {
-
-				if (!force_flush &&
-				    time_before(jiffies, pb->pb_flushtime)) {
-					pagebuf_unlock(pb);
-					break;
-				}
-
-				pb->pb_flags &= ~PBF_DELWRI;
-				pb->pb_flags |= PBF_WRITE;
-
-				list_del(&pb->pb_list);
-				list_add(&pb->pb_list, &tmp);
-
-				count++;
-			}
-		}
-
-		spin_unlock(&pbd_delwrite_lock);
-		while (!list_empty(&tmp)) {
-			pb = list_entry(tmp.next, page_buf_t, pb_list);
-			list_del_init(&pb->pb_list);
-
-			pagebuf_iostrategy(pb);
-		}
-
-		if (as_list_len > 0)
-			purge_addresses();
-		if (count)
-			blk_run_queues();
-
-		force_flush = 0;
-	} while (pbd_active == 1);
-
-	pbd_active = -1;
-	wake_up_interruptible(&pbd_waitq);
-
-	return 0;
-}
-
-void
-pagebuf_delwri_flush(
-	pb_target_t		*target,
-	u_long			flags,
-	int			*pinptr)
-{
-	page_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
-	int			pincount = 0;
-	int			flush_cnt = 0;
-
-	pagebuf_runall_queues(pagebuf_dataio_workqueue);
-	pagebuf_runall_queues(pagebuf_logio_workqueue);
-
-	spin_lock(&pbd_delwrite_lock);
-	INIT_LIST_HEAD(&tmp);
-
-	list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-		pb = list_entry(curr, page_buf_t, pb_list);
-
-		/*
-		 * Skip other targets, markers and in progress buffers
-		 */
-
-		if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
-		    !(pb->pb_flags & PBF_DELWRI)) {
-			continue;
-		}
-
-		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
-		if (pagebuf_ispin(pb)) {
-			pincount++;
-			continue;
-		}
-
-		pb->pb_flags &= ~PBF_DELWRI;
-		pb->pb_flags |= PBF_WRITE;
-		list_move(&pb->pb_list, &tmp);
-	}
-	/* ok found all the items that can be worked on 
-	 * drop the lock and process the private list */
-	spin_unlock(&pbd_delwrite_lock);
-
-	list_for_each_safe(curr, next, &tmp) {
-		pb = list_entry(curr, page_buf_t, pb_list);
-
-		if (flags & PBDF_WAIT)
-			pb->pb_flags &= ~PBF_ASYNC;
-		else
-			list_del_init(curr);
-
-		pagebuf_lock(pb);
-		pagebuf_iostrategy(pb);
-		if (++flush_cnt > 32) {
-			blk_run_queues();
-			flush_cnt = 0;
-		}
-	}
-
-	blk_run_queues();
-
-	while (!list_empty(&tmp)) {
-		pb = list_entry(tmp.next, page_buf_t, pb_list);
-
-		list_del_init(&pb->pb_list);
-		pagebuf_iowait(pb);
-		if (!pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
-	}
-
-	if (pinptr)
-		*pinptr = pincount;
-}
-
-STATIC int
-pagebuf_daemon_start(void)
-{
-	int		rval;
-
-	pagebuf_logio_workqueue = create_workqueue("xfslogd");
-	if (!pagebuf_logio_workqueue)
-		return -ENOMEM;
-
-	pagebuf_dataio_workqueue = create_workqueue("xfsdatad");
-	if (!pagebuf_dataio_workqueue) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		return -ENOMEM;
-	}
-
-	rval = kernel_thread(pagebuf_daemon, NULL, CLONE_FS|CLONE_FILES);
-	if (rval < 0) {
-		destroy_workqueue(pagebuf_logio_workqueue);
-		destroy_workqueue(pagebuf_dataio_workqueue);
-	}
-
-	return rval;
-}
-
-/*
- * pagebuf_daemon_stop
- *
- * Note: do not mark as __exit, it is called from pagebuf_terminate.
- */
-STATIC void
-pagebuf_daemon_stop(void)
-{
-	pbd_active = 0;
-	wake_up_interruptible(&pbd_waitq);
-	wait_event_interruptible(pbd_waitq, pbd_active);
-	destroy_workqueue(pagebuf_logio_workqueue);
-	destroy_workqueue(pagebuf_dataio_workqueue);
-}
-
-
-/*
- * Pagebuf sysctl interface
- */
-
-STATIC int
-pb_stats_clear_handler(
-	ctl_table		*ctl,
-	int			write,
-	struct file		*filp,
-	void			*buffer,
-	size_t			*lenp)
-{
-	int			c, ret;
-	int			*valp = ctl->data;
-
-	ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp);
-
-	if (!ret && write && *valp) {
-		printk("XFS Clearing pbstats\n");
-		for (c = 0; c < NR_CPUS; c++) {
-			if (!cpu_possible(c)) continue;
-				memset(&per_cpu(pbstats, c), 0,
-				       sizeof(struct pbstats));
-		}
-		pb_params.stats_clear.val = 0;
-	}
-
-	return ret;
-}
-
-STATIC struct ctl_table_header *pagebuf_table_header;
-
-STATIC ctl_table pagebuf_table[] = {
-	{PB_FLUSH_INT, "flush_int", &pb_params.flush_interval.val,
-	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL,
-	&pb_params.flush_interval.min, &pb_params.flush_interval.max},
-
-	{PB_FLUSH_AGE, "flush_age", &pb_params.age_buffer.val,
-	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
-	&pb_params.age_buffer.min, &pb_params.age_buffer.max},
-
-	{PB_STATS_CLEAR, "stats_clear", &pb_params.stats_clear.val,
-	sizeof(int), 0644, NULL, &pb_stats_clear_handler,
-	&sysctl_intvec, NULL, 
-	&pb_params.stats_clear.min, &pb_params.stats_clear.max},
-
-#ifdef PAGEBUF_TRACE
-	{PB_DEBUG, "debug", &pb_params.debug.val,
-	sizeof(int), 0644, NULL, &proc_dointvec_minmax,
-	&sysctl_intvec, NULL, 
-	&pb_params.debug.min, &pb_params.debug.max},
-#endif
-	{0}
-};
-
-STATIC ctl_table pagebuf_dir_table[] = {
-	{VM_PAGEBUF, "pagebuf", NULL, 0, 0555, pagebuf_table},
-	{0}
-};
-
-STATIC ctl_table pagebuf_root_table[] = {
-	{CTL_VM, "vm",  NULL, 0, 0555, pagebuf_dir_table},
-	{0}
-};
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-pagebuf_readstats(
-	char			*buffer,
-	char			**start,
-	off_t			offset,
-	int			count,
-	int			*eof,
-	void			*data)
-{
-	int			c, i, len, val;
-
-	len = 0;
-	len += sprintf(buffer + len, "pagebuf");
-	for (i = 0; i < sizeof(struct pbstats) / sizeof(u_int32_t); i++) {
-		val = 0;
-		for (c = 0 ; c < NR_CPUS; c++) {
-			if (!cpu_possible(c)) continue;
-			val += *(((u_int32_t*)&per_cpu(pbstats, c) + i));
-		}
-		len += sprintf(buffer + len, " %u", val);
-	}
-	buffer[len++] = '\n';
-
-	if (offset >= len) {
-		*start = buffer;
-		*eof = 1;
-		return 0;
-	}
-	*start = buffer + offset;
-	if ((len -= offset) > count)
-		return count;
-	*eof = 1;
-
-	return len;
-}
-#endif  /* CONFIG_PROC_FS */
-
-/*
- *	Initialization and Termination
- */
-
-int __init
-pagebuf_init(void)
-{
-	int			i;
-
-	pagebuf_table_header = register_sysctl_table(pagebuf_root_table, 1);
-
-#ifdef CONFIG_PROC_FS
-	if (proc_mkdir("fs/pagebuf", 0))
-		create_proc_read_entry(
-			"fs/pagebuf/stat", 0, 0, pagebuf_readstats, NULL);
-#endif
-
-	pagebuf_cache = kmem_cache_create("page_buf_t", sizeof(page_buf_t), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (pagebuf_cache == NULL) {
-		printk("pagebuf: couldn't init pagebuf cache\n");
-		pagebuf_terminate();
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < NHASH; i++) {
-		spin_lock_init(&pbhash[i].pb_hash_lock);
-		INIT_LIST_HEAD(&pbhash[i].pb_hash);
-	}
-
-#ifdef PAGEBUF_TRACE
-	pagebuf_trace_buf = ktrace_alloc(PAGEBUF_TRACE_SIZE, KM_SLEEP);
-#endif
-
-	pagebuf_daemon_start();
-	return 0;
-}
-
-
-/*
- *	pagebuf_terminate.
- *
- *	Note: do not mark as __exit, this is also called from the __init code.
- */
-void
-pagebuf_terminate(void)
-{
-	pagebuf_daemon_stop();
-
-	kmem_cache_destroy(pagebuf_cache);
-
-	unregister_sysctl_table(pagebuf_table_header);
-#ifdef  CONFIG_PROC_FS
-	remove_proc_entry("fs/pagebuf/stat", NULL);
-	remove_proc_entry("fs/pagebuf", NULL);
-#endif
-}
-
-
-/*
- *	Module management (for kernel debugger module)
- */
-EXPORT_SYMBOL(pagebuf_offset);
-#ifdef DEBUG
-EXPORT_SYMBOL(pbd_delwrite_queue);
-#endif
diff -Nru a/fs/xfs/pagebuf/page_buf.h b/fs/xfs/pagebuf/page_buf.h
--- a/fs/xfs/pagebuf/page_buf.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,340 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-/*
- * Written by Steve Lord, Jim Mostek, Russell Cattelan at SGI
- */
-
-#ifndef __PAGE_BUF_H__
-#define __PAGE_BUF_H__
-
-#include <linux/config.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- *	Base types
- */
-
-/* daddr must be signed since -1 is used for bmaps that are not yet allocated */
-typedef loff_t page_buf_daddr_t;
-
-#define PAGE_BUF_DADDR_NULL ((page_buf_daddr_t) (-1LL))
-
-#define page_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
-#define page_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
-#define page_buf_btoct(dd)	((dd) >> PAGE_CACHE_SHIFT)
-#define page_buf_poff(aa)	((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum page_buf_rw_e {
-	PBRW_READ = 1,			/* transfer into target memory */
-	PBRW_WRITE = 2,			/* transfer from target memory */
-	PBRW_ZERO = 3			/* Zero target memory */
-} page_buf_rw_t;
-
-
-typedef enum page_buf_flags_e {		/* pb_flags values */
-	PBF_READ = (1 << 0),	/* buffer intended for reading from device */
-	PBF_WRITE = (1 << 1),	/* buffer intended for writing to device   */
-	PBF_MAPPED = (1 << 2),  /* buffer mapped (pb_addr valid)           */
-	PBF_PARTIAL = (1 << 3), /* buffer partially read                   */
-	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
-	PBF_NONE = (1 << 5),    /* buffer not read at all                  */
-	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
-	PBF_FREED = (1 << 7),   /* buffer has been freed and is invalid    */
-	PBF_SYNC = (1 << 8),    /* force updates to disk                   */
-	PBF_MAPPABLE = (1 << 9),/* use directly-addressable pages          */
-	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
-	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
-	PBF_FS_DATAIOD = (1 << 12), /* schedule IO completion on fs datad  */
-
-	/* flags used only as arguments to access routines */
-	PBF_LOCK = (1 << 13),	/* lock requested			   */
-	PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait	   */
-	PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread	   */
-
-	/* flags used only internally */
-	_PBF_LOCKABLE = (1 << 16), /* page_buf_t may be locked		   */
-	_PBF_PRIVATE_BH = (1 << 17), /* do not use public buffer heads	   */
-	_PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped	   */
-	_PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated	   */
-	_PBF_MEM_ALLOCATED = (1 << 20), /* underlying pages are allocated  */
-	_PBF_MEM_SLAB = (1 << 21), /* underlying pages are slab allocated  */
-
-	PBF_FORCEIO = (1 << 22), /* ignore any cache state		   */
-	PBF_FLUSH = (1 << 23),	/* flush disk write cache		   */
-	PBF_READ_AHEAD = (1 << 24), /* asynchronous read-ahead		   */
-	PBF_RUN_QUEUES = (1 << 25), /* run block device task queue	   */
-
-} page_buf_flags_t;
-
-#define PBF_UPDATE (PBF_READ | PBF_WRITE)
-#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
-#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
-
-typedef struct pb_target {
-	dev_t			pbr_dev;
-	struct block_device	*pbr_bdev;
-	struct address_space	*pbr_mapping;
-	unsigned int		pbr_bsize;
-	unsigned int		pbr_sshift;
-	size_t			pbr_smask;
-} pb_target_t;
-
-/*
- *	page_buf_t:  Buffer structure for page cache-based buffers
- *
- * This buffer structure is used by the page cache buffer management routines
- * to refer to an assembly of pages forming a logical buffer.  The actual
- * I/O is performed with buffer_head or bio structures, as required by drivers,
- * for drivers which do not understand this structure.  The buffer structure is
- * used on temporary basis only, and discarded when released.
- *
- * The real data storage is recorded in the page cache.  Metadata is
- * hashed to the inode for the block device on which the file system resides.
- * File data is hashed to the inode for the file.  Pages which are only
- * partially filled with data have bits set in their block_map entry
- * to indicate which disk blocks in the page are not valid.
- */
-
-struct page_buf_s;
-typedef void (*page_buf_iodone_t)(struct page_buf_s *);
-			/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct page_buf_s *);
-			/* call-back function on I/O completion */
-typedef int (*page_buf_bdstrat_t)(struct page_buf_s *);
-
-#define PB_PAGES	4
-
-typedef struct page_buf_s {
-	struct semaphore	pb_sema;	/* semaphore for lockables  */
-	unsigned long		pb_flushtime;	/* time to flush pagebuf    */
-	atomic_t		pb_pin_count;	/* pin count		    */
-	wait_queue_head_t	pb_waiters;	/* unpin waiters	    */
-	struct list_head	pb_list;
-	page_buf_flags_t	pb_flags;	/* status flags */
-	struct list_head	pb_hash_list;
-	struct pb_target	*pb_target;	/* logical object */
-	atomic_t		pb_hold;	/* reference count */
-	page_buf_daddr_t	pb_bn;		/* block number for I/O */
-	loff_t			pb_file_offset;	/* offset in file */
-	size_t			pb_buffer_length; /* size of buffer in bytes */
-	size_t			pb_count_desired; /* desired transfer size */
-	void			*pb_addr;	/* virtual address of buffer */
-	struct work_struct	pb_iodone_work;
-	atomic_t		pb_io_remaining;/* #outstanding I/O requests */
-	page_buf_iodone_t	pb_iodone;	/* I/O completion function */
-	page_buf_relse_t	pb_relse;	/* releasing function */
-	page_buf_bdstrat_t	pb_strat;	/* pre-write function */
-	struct semaphore	pb_iodonesema;	/* Semaphore for I/O waiters */
-	void			*pb_fspriv;
-	void			*pb_fspriv2;
-	void			*pb_fspriv3;
-	unsigned short		pb_error;	/* error code on I/O */
-	unsigned short		pb_page_count;	/* size of page array */
-	unsigned short		pb_offset;	/* page offset in first page */
-	unsigned char		pb_locked;	/* page array is locked */
-	unsigned char		pb_hash_index;	/* hash table index	*/
-	struct page		**pb_pages;	/* array of page pointers */
-	struct page		*pb_page_array[PB_PAGES]; /* inline pages */
-#ifdef PAGEBUF_LOCK_TRACKING
-	int			pb_last_holder;
-#endif
-} page_buf_t;
-
-
-/* Finding and Reading Buffers */
-
-extern page_buf_t *pagebuf_find(	/* find buffer for block if	*/
-					/* the block is in memory	*/
-		struct pb_target *,	/* inode for block		*/
-		loff_t,			/* starting offset of range	*/
-		size_t,			/* length of range		*/
-		page_buf_flags_t);	/* PBF_LOCK			*/
-
-extern page_buf_t *pagebuf_get(		/* allocate a buffer		*/
-		struct pb_target *,	/* inode for buffer		*/
-		loff_t,			/* starting offset of range     */
-		size_t,			/* length of range              */
-		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
-					/* PBF_ASYNC			*/
-
-extern page_buf_t *pagebuf_lookup(
-		struct pb_target *,
-		loff_t,			/* starting offset of range	*/
-		size_t,			/* length of range		*/
-		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
-					/* PBF_FORCEIO, _PBF_LOCKABLE	*/
-
-extern page_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
-					/*  no memory or disk address	*/
-		size_t len,
-		struct pb_target *);	/* mount point "fake" inode	*/
-
-extern page_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
-					/* without disk address		*/
-		size_t len,
-		struct pb_target *);	/* mount point "fake" inode	*/
-
-extern int pagebuf_associate_memory(
-		page_buf_t *,
-		void *,
-		size_t);
-
-extern void pagebuf_hold(		/* increment reference count	*/
-		page_buf_t *);		/* buffer to hold		*/
-
-extern void pagebuf_readahead(		/* read ahead into cache	*/
-		struct pb_target  *,	/* target for buffer (or NULL)	*/
-		loff_t,			/* starting offset of range     */
-		size_t,			/* length of range              */
-		page_buf_flags_t);	/* additional read flags	*/
-
-/* Releasing Buffers */
-
-extern void pagebuf_free(		/* deallocate a buffer		*/
-		page_buf_t *);		/* buffer to deallocate		*/
-
-extern void pagebuf_rele(		/* release hold on a buffer	*/
-		page_buf_t *);		/* buffer to release		*/
-
-/* Locking and Unlocking Buffers */
-
-extern int pagebuf_cond_lock(		/* lock buffer, if not locked	*/
-					/* (returns -EBUSY if locked)	*/
-		page_buf_t *);		/* buffer to lock		*/
-
-extern int pagebuf_lock_value(		/* return count on lock		*/
-		page_buf_t *);          /* buffer to check              */
-
-extern int pagebuf_lock(		/* lock buffer                  */
-		page_buf_t *);          /* buffer to lock               */
-
-extern void pagebuf_unlock(		/* unlock buffer		*/
-		page_buf_t *);		/* buffer to unlock		*/
-
-/* Buffer Read and Write Routines */
-
-extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
-		page_buf_t *,		/* buffer to mark		*/
-		int,			/* use data/log helper thread.	*/
-		int);			/* run completion locally, or in
-					 * a helper thread.		*/
-
-extern void pagebuf_ioerror(		/* mark buffer in error	(or not) */
-		page_buf_t *,		/* buffer to mark		*/
-		unsigned int);		/* error to store (0 if none)	*/
-
-extern int pagebuf_iostart(		/* start I/O on a buffer	*/
-		page_buf_t *,		/* buffer to start		*/
-		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC,		*/
-					/* PBF_READ, PBF_WRITE,		*/
-					/* PBF_DELWRI, PBF_SYNC		*/
-
-extern int pagebuf_iorequest(		/* start real I/O		*/
-		page_buf_t *);		/* buffer to convey to device	*/
-
-extern int pagebuf_iowait(		/* wait for buffer I/O done	*/
-		page_buf_t *);		/* buffer to wait on		*/
-
-extern void pagebuf_iomove(		/* move data in/out of pagebuf	*/
-		page_buf_t *,		/* buffer to manipulate		*/
-		size_t,			/* starting buffer offset	*/
-		size_t,			/* length in buffer		*/
-		caddr_t,		/* data pointer			*/
-		page_buf_rw_t);		/* direction			*/
-
-static inline int pagebuf_iostrategy(page_buf_t *pb)
-{
-	return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
-}
-
-static inline int pagebuf_geterror(page_buf_t *pb)
-{
-	return pb ? pb->pb_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-
-extern caddr_t pagebuf_offset(		/* pointer at offset in buffer	*/
-		page_buf_t *,		/* buffer to offset into	*/
-		size_t);		/* offset			*/
-
-/* Pinning Buffer Storage in Memory */
-
-extern void pagebuf_pin(		/* pin buffer in memory		*/
-		page_buf_t *);		/* buffer to pin		*/
-
-extern void pagebuf_unpin(		/* unpin buffered data		*/
-		page_buf_t *);		/* buffer to unpin		*/
-
-extern int pagebuf_ispin(		/* check if buffer is pinned	*/
-		page_buf_t *);		/* buffer to check		*/
-
-/* Delayed Write Buffer Routines */
-
-#define PBDF_WAIT    0x01
-extern void pagebuf_delwri_flush(
-		pb_target_t *,
-		unsigned long,
-		int *);
-
-extern void pagebuf_delwri_dequeue(
-		page_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-
-extern int pagebuf_init(void);
-extern void pagebuf_terminate(void);
-
-
-#ifdef PAGEBUF_TRACE
-extern ktrace_t *pagebuf_trace_buf;
-extern void pagebuf_trace(
-		page_buf_t *,		/* buffer being traced		*/
-		char *,			/* description of operation	*/
-		void *,			/* arbitrary diagnostic value	*/
-		void *);		/* return address		*/
-#else
-# define pagebuf_trace(pb, id, ptr, ra)	do { } while (0)
-#endif
-
-#define pagebuf_target_name(target)	\
-	({ char __b[BDEVNAME_SIZE]; bdevname((target)->pbr_bdev, __b); __b; })
-
-#endif /* __PAGE_BUF_H__ */
diff -Nru a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
--- a/fs/xfs/quota/xfs_qm.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/quota/xfs_qm.c	Thu Jan 29 23:15:58 2004
@@ -78,7 +78,6 @@
  */
 mutex_t xfs_Gqm_lock;
 struct xfs_qm	*xfs_Gqm;
-EXPORT_SYMBOL(xfs_Gqm);	/* used by xfsidbg */
 
 kmem_zone_t	*qm_dqzone;
 kmem_zone_t	*qm_dqtrxzone;
diff -Nru a/fs/xfs/support/kmem.h b/fs/xfs/support/kmem.h
--- a/fs/xfs/support/kmem.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-/*
- * Cutoff point to use vmalloc instead of kmalloc.
- */
-#define MAX_SLAB_SIZE	0x10000
-
-/*
- * XFS uses slightly different names for these due to the
- * IRIX heritage.
- */
-#define	kmem_zone	kmem_cache_s
-#define kmem_zone_t	kmem_cache_t
-
-#define KM_SLEEP	0x0001
-#define KM_NOSLEEP	0x0002
-#define KM_NOFS		0x0004
-
-typedef unsigned long xfs_pflags_t;
-
-#define PFLAGS_TEST_FSTRANS()		(current->flags & PF_FSTRANS)
-
-#define PFLAGS_SET_FSTRANS(STATEP) do {	\
-	*(STATEP) = current->flags;	\
-	current->flags |= PF_FSTRANS;	\
-} while (0)
-
-#define PFLAGS_RESTORE(STATEP) do {	\
-	current->flags = *(STATEP);	\
-} while (0)
-
-#define PFLAGS_DUP(OSTATEP, NSTATEP) do { \
-	*(NSTATEP) = *(OSTATEP);	\
-} while (0)
-
-/*
- * XXX get rid of the unconditional  __GFP_NOFAIL by adding
- * a KM_FAIL flag and using it where we're allowed to fail.
- */
-static __inline unsigned int
-kmem_flags_convert(int flags)
-{
-	int lflags;
-
-#if DEBUG
-	if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS))) {
-		printk(KERN_WARNING
-		    "XFS: memory allocation with wrong flags (%x)\n", flags);
-		BUG();
-	}
-#endif
-
-	lflags = (flags & KM_NOSLEEP) ? GFP_ATOMIC : (GFP_KERNEL|__GFP_NOFAIL);
-
-	/* avoid recusive callbacks to filesystem during transactions */
-	if (PFLAGS_TEST_FSTRANS())
-		lflags &= ~__GFP_FS;
-
-	return lflags;
-}
-
-static __inline void *
-kmem_alloc(size_t size, int flags)
-{
-	if (unlikely(MAX_SLAB_SIZE < size))
-		/* Avoid doing filesystem sensitive stuff to get this */
-		return __vmalloc(size, kmem_flags_convert(flags), PAGE_KERNEL);
-	return kmalloc(size, kmem_flags_convert(flags));
-}
-
-static __inline void *
-kmem_zalloc(size_t size, int flags)
-{
-	void *ptr = kmem_alloc(size, flags);
-	if (likely(ptr != NULL))
-		memset(ptr, 0, size);
-	return ptr;
-}
-
-static __inline void
-kmem_free(void *ptr, size_t size)
-{
-	if (unlikely((unsigned long)ptr < VMALLOC_START ||
-		     (unsigned long)ptr >= VMALLOC_END))
-		kfree(ptr);
-	else
-		vfree(ptr);
-}
-
-static __inline void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
-{
-	void *new = kmem_alloc(newsize, flags);
-
-	if (likely(ptr != NULL)) {
-		if (likely(new != NULL))
-			memcpy(new, ptr, min(oldsize, newsize));
-		kmem_free(ptr, oldsize);
-	}
-
-	return new;
-}
-
-static __inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
-	return kmem_cache_create(zone_name, size, 0, 0, NULL, NULL);
-}
-
-static __inline void *
-kmem_zone_alloc(kmem_zone_t *zone, int flags)
-{
-	return kmem_cache_alloc(zone, kmem_flags_convert(flags));
-}
-
-static __inline void *
-kmem_zone_zalloc(kmem_zone_t *zone, int flags)
-{
-	void *ptr = kmem_zone_alloc(zone, flags);
-	if (likely(ptr != NULL))
-		memset(ptr, 0, kmem_cache_size(zone));
-	return ptr;
-}
-
-static __inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
-	kmem_cache_free(zone, ptr);
-}
-
-typedef struct shrinker *kmem_shaker_t;
-typedef int (*kmem_shake_func_t)(int, unsigned int);
-
-static __inline kmem_shaker_t
-kmem_shake_register(kmem_shake_func_t sfunc)
-{
-	return set_shrinker(DEFAULT_SEEKS, sfunc);
-}
-
-static __inline void
-kmem_shake_deregister(kmem_shaker_t shrinker)
-{
-	remove_shrinker(shrinker);
-}
-
-static __inline int
-kmem_shake_allow(unsigned int gfp_mask)
-{
-	return (gfp_mask & __GFP_WAIT);
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff -Nru a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
--- a/fs/xfs/support/ktrace.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/support/ktrace.c	Thu Jan 29 23:15:58 2004
@@ -30,13 +30,12 @@
  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  */
 
-#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/slab.h>
 
 #include <xfs_types.h>
-#include "kmem.h"
-#include "spin.h"
+#include <kmem.h>
+#include <spin.h>
 #include "debug.h"
 #include "ktrace.h"
 
@@ -273,7 +272,6 @@
 	}
 	return ktep;
 }
-EXPORT_SYMBOL(ktrace_first);
 
 /*
  * ktrace_next()
@@ -308,7 +306,6 @@
 
 	return ktep;
 }
-EXPORT_SYMBOL(ktrace_next);
 
 /*
  * ktrace_skip()
diff -Nru a/fs/xfs/support/ktrace.h b/fs/xfs/support/ktrace.h
--- a/fs/xfs/support/ktrace.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/support/ktrace.h	Thu Jan 29 23:15:58 2004
@@ -32,7 +32,7 @@
 #ifndef __XFS_SUPPORT_KTRACE_H__
 #define __XFS_SUPPORT_KTRACE_H__
 
-#include <support/spin.h>
+#include <spin.h>
 
 /*
  * Trace buffer entry structure.
diff -Nru a/fs/xfs/support/mrlock.c b/fs/xfs/support/mrlock.c
--- a/fs/xfs/support/mrlock.c	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,274 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-
-#include <linux/time.h>
-#include <linux/sched.h>
-#include <asm/system.h>
-#include <linux/interrupt.h>
-#include <asm/current.h>
-
-#include "mrlock.h"
-
-
-#if USE_RW_WAIT_QUEUE_SPINLOCK
-# define wq_write_lock	write_lock
-#else
-# define wq_write_lock	spin_lock
-#endif
-
-/*
- * We don't seem to need lock_type (only one supported), name, or
- * sequence. But, XFS will pass it so let's leave them here for now.
- */
-/* ARGSUSED */
-void
-mrlock_init(mrlock_t *mrp, int lock_type, char *name, long sequence)
-{
-	mrp->mr_count = 0;
-	mrp->mr_reads_waiting = 0;
-	mrp->mr_writes_waiting = 0;
-	init_waitqueue_head(&mrp->mr_readerq);
-	init_waitqueue_head(&mrp->mr_writerq);
-	mrp->mr_lock = SPIN_LOCK_UNLOCKED;
-}
-
-/*
- * Macros to lock/unlock the mrlock_t.
- */
-
-#define MRLOCK(m)		spin_lock(&(m)->mr_lock);
-#define MRUNLOCK(m)		spin_unlock(&(m)->mr_lock);
-
-
-/*
- * lock_wait should never be called in an interrupt thread.
- *
- * mrlocks can sleep (i.e. call schedule) and so they can't ever
- * be called from an interrupt thread.
- *
- * threads that wake-up should also never be invoked from interrupt threads.
- *
- * But, waitqueue_lock is locked from interrupt threads - and we are
- * called with interrupts disabled, so it is all OK.
- */
-
-/* ARGSUSED */
-void
-lock_wait(wait_queue_head_t *q, spinlock_t *lock, int rw)
-{
-	DECLARE_WAITQUEUE( wait, current );
-
-	__set_current_state(TASK_UNINTERRUPTIBLE);
-
-	spin_lock(&q->lock);
-	if (rw) {
-		__add_wait_queue_tail(q, &wait);
-	} else {
-		__add_wait_queue(q, &wait);
-	}
-
-	spin_unlock(&q->lock);
-	spin_unlock(lock);
-
-	schedule();
-
-	spin_lock(&q->lock);
-	__remove_wait_queue(q, &wait);
-	spin_unlock(&q->lock);
-
-	spin_lock(lock);
-
-	/* return with lock held */
-}
-
-/* ARGSUSED */
-void
-mrfree(mrlock_t *mrp)
-{
-}
-
-/* ARGSUSED */
-void
-mrlock(mrlock_t *mrp, int type, int flags)
-{
-	if (type == MR_ACCESS)
-		mraccess(mrp);
-	else
-		mrupdate(mrp);
-}
-
-/* ARGSUSED */
-void
-mraccessf(mrlock_t *mrp, int flags)
-{
-	MRLOCK(mrp);
-	if(mrp->mr_writes_waiting > 0) {
-		mrp->mr_reads_waiting++;
-		lock_wait(&mrp->mr_readerq, &mrp->mr_lock, 0);
-		mrp->mr_reads_waiting--;
-	}
-	while (mrp->mr_count < 0) {
-		mrp->mr_reads_waiting++;
-		lock_wait(&mrp->mr_readerq, &mrp->mr_lock, 0);
-		mrp->mr_reads_waiting--;
-	}
-	mrp->mr_count++;
-	MRUNLOCK(mrp);
-}
-
-/* ARGSUSED */
-void
-mrupdatef(mrlock_t *mrp, int flags)
-{
-	MRLOCK(mrp);
-	while(mrp->mr_count) {
-		mrp->mr_writes_waiting++;
-		lock_wait(&mrp->mr_writerq, &mrp->mr_lock, 1);
-		mrp->mr_writes_waiting--;
-	}
-
-	mrp->mr_count = -1; /* writer on it */
-	MRUNLOCK(mrp);
-}
-
-int
-mrtryaccess(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-	/*
-	 * If anyone is waiting for update access or the lock is held for update
-	 * fail the request.
-	 */
-	if(mrp->mr_writes_waiting > 0 || mrp->mr_count < 0) {
-		MRUNLOCK(mrp);
-		return 0;
-	}
-	mrp->mr_count++;
-	MRUNLOCK(mrp);
-	return 1;
-}
-
-int
-mrtrypromote(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-
-	if(mrp->mr_count == 1) { /* We are the only thread with the lock */
-		mrp->mr_count = -1; /* writer on it */
-		MRUNLOCK(mrp);
-		return 1;
-	}
-
-	MRUNLOCK(mrp);
-	return 0;
-}
-
-int
-mrtryupdate(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-
-	if(mrp->mr_count) {
-		MRUNLOCK(mrp);
-		return 0;
-	}
-
-	mrp->mr_count = -1; /* writer on it */
-	MRUNLOCK(mrp);
-	return 1;
-}
-
-static __inline__ void mrwake(mrlock_t *mrp)
-{
-	/*
-	 * First, if the count is now 0, we need to wake-up anyone waiting.
-	 */
-	if (!mrp->mr_count) {
-		if (mrp->mr_writes_waiting) {	/* Wake-up first writer waiting */
-			wake_up(&mrp->mr_writerq);
-		} else if (mrp->mr_reads_waiting) {	/* Wakeup any readers waiting */
-			wake_up(&mrp->mr_readerq);
-		}
-	}
-}
-
-void
-mraccunlock(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-	mrp->mr_count--;
-	mrwake(mrp);
-	MRUNLOCK(mrp);
-}
-
-void
-mrunlock(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-	if (mrp->mr_count < 0) {
-		mrp->mr_count = 0;
-	} else {
-		mrp->mr_count--;
-	}
-	mrwake(mrp);
-	MRUNLOCK(mrp);
-}
-
-int
-ismrlocked(mrlock_t *mrp, int type)	/* No need to lock since info can change */
-{
-	if (type == MR_ACCESS)
-		return (mrp->mr_count > 0); /* Read lock */
-	else if (type == MR_UPDATE)
-		return (mrp->mr_count < 0); /* Write lock */
-	else if (type == (MR_UPDATE | MR_ACCESS))
-		return (mrp->mr_count);	/* Any type of lock held */
-	else /* Any waiters */
-		return (mrp->mr_reads_waiting | mrp->mr_writes_waiting);
-}
-
-/*
- * Demote from update to access. We better be the only thread with the
- * lock in update mode so it should be easy to set to 1.
- * Wake-up any readers waiting.
- */
-
-void
-mrdemote(mrlock_t *mrp)
-{
-	MRLOCK(mrp);
-	mrp->mr_count = 1;
-	if (mrp->mr_reads_waiting) {	/* Wakeup all readers waiting */
-		wake_up(&mrp->mr_readerq);
-	}
-	MRUNLOCK(mrp);
-}
diff -Nru a/fs/xfs/support/mrlock.h b/fs/xfs/support/mrlock.h
--- a/fs/xfs/support/mrlock.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/time.h>
-#include <linux/wait.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-
-/*
- * Implement mrlocks on Linux that work for XFS.
- *
- * These are sleep locks and not spinlocks. If one wants read/write spinlocks,
- * use read_lock, write_lock, ... see spinlock.h.
- */
-
-typedef struct mrlock_s {
-	int			mr_count;
-	unsigned short		mr_reads_waiting;
-	unsigned short		mr_writes_waiting;
-	wait_queue_head_t	mr_readerq;
-	wait_queue_head_t	mr_writerq;
-	spinlock_t		mr_lock;
-} mrlock_t;
-
-#define MR_ACCESS	1
-#define MR_UPDATE	2
-
-#define MRLOCK_BARRIER		0x1
-#define MRLOCK_ALLOW_EQUAL_PRI	0x8
-
-/*
- * mraccessf/mrupdatef take flags to be passed in while sleeping;
- * only PLTWAIT is currently supported.
- */
-
-extern void	mraccessf(mrlock_t *, int);
-extern void	mrupdatef(mrlock_t *, int);
-extern void     mrlock(mrlock_t *, int, int);
-extern void     mrunlock(mrlock_t *);
-extern void     mraccunlock(mrlock_t *);
-extern int      mrtryupdate(mrlock_t *);
-extern int      mrtryaccess(mrlock_t *);
-extern int	mrtrypromote(mrlock_t *);
-extern void     mrdemote(mrlock_t *);
-
-extern int	ismrlocked(mrlock_t *, int);
-extern void     mrlock_init(mrlock_t *, int type, char *name, long sequence);
-extern void     mrfree(mrlock_t *);
-
-#define mrinit(mrp, name)	mrlock_init(mrp, MRLOCK_BARRIER, name, -1)
-#define mraccess(mrp)		mraccessf(mrp, 0) /* grab for READ/ACCESS */
-#define mrupdate(mrp)		mrupdatef(mrp, 0) /* grab for WRITE/UPDATE */
-#define mrislocked_access(mrp)	((mrp)->mr_count > 0)
-#define mrislocked_update(mrp)	((mrp)->mr_count < 0)
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff -Nru a/fs/xfs/support/mutex.h b/fs/xfs/support/mutex.h
--- a/fs/xfs/support/mutex.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_MUTEX_H__
-#define __XFS_SUPPORT_MUTEX_H__
-
-#include <linux/spinlock.h>
-#include <asm/semaphore.h>
-
-/*
- * Map the mutex'es from IRIX to Linux semaphores.
- *
- * Destroy just simply initializes to -99 which should block all other
- * callers.
- */
-#define MUTEX_DEFAULT		0x0
-typedef struct semaphore	mutex_t;
-
-#define mutex_init(lock, type, name)		sema_init(lock, 1)
-#define mutex_destroy(lock)			sema_init(lock, -99)
-#define mutex_lock(lock, num)			down(lock)
-#define mutex_trylock(lock)			(down_trylock(lock) ? 0 : 1)
-#define mutex_unlock(lock)			up(lock)
-
-#endif /* __XFS_SUPPORT_MUTEX_H__ */
diff -Nru a/fs/xfs/support/sema.h b/fs/xfs/support/sema.h
--- a/fs/xfs/support/sema.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_SEMA_H__
-#define __XFS_SUPPORT_SEMA_H__
-
-#include <linux/time.h>
-#include <linux/wait.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-
-/*
- * sema_t structure just maps to struct semaphore in Linux kernel.
- */
-
-typedef struct semaphore sema_t;
-
-#define init_sema(sp, val, c, d)	sema_init(sp, val)
-#define initsema(sp, val)		sema_init(sp, val)
-#define initnsema(sp, val, name)	sema_init(sp, val)
-#define psema(sp, b)			down(sp)
-#define vsema(sp)			up(sp)
-#define valusema(sp)			(atomic_read(&(sp)->count))
-#define freesema(sema)
-
-/*
- * Map cpsema (try to get the sema) to down_trylock. We need to switch
- * the return values since cpsema returns 1 (acquired) 0 (failed) and
- * down_trylock returns the reverse 0 (acquired) 1 (failed).
- */
-
-#define cpsema(sp)			(down_trylock(sp) ? 0 : 1)
-
-/*
- * Didn't do cvsema(sp). Not sure how to map this to up/down/...
- * It does a vsema if the values is < 0 other wise nothing.
- */
-
-#endif /* __XFS_SUPPORT_SEMA_H__ */
diff -Nru a/fs/xfs/support/spin.h b/fs/xfs/support/spin.h
--- a/fs/xfs/support/spin.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_SPIN_H__
-#define __XFS_SUPPORT_SPIN_H__
-
-#include <linux/sched.h>	/* preempt needs this */
-#include <linux/spinlock.h>
-
-/*
- * Map lock_t from IRIX to Linux spinlocks.
- *
- * Note that linux turns on/off spinlocks depending on CONFIG_SMP.
- * We don't need to worry about SMP or not here.
- */
-
-#define SPLDECL(s)		unsigned long s
-
-typedef spinlock_t lock_t;
-
-#define spinlock_init(lock, name)	spin_lock_init(lock)
-#define	spinlock_destroy(lock)
-
-static inline unsigned long mutex_spinlock(lock_t *lock)
-{
-	spin_lock(lock);
-	return 0;
-}
-
-/*ARGSUSED*/
-static inline void mutex_spinunlock(lock_t *lock, unsigned long s)
-{
-	spin_unlock(lock);
-}
-
-static inline void nested_spinlock(lock_t *lock)
-{
-	spin_lock(lock);
-}
-
-static inline void nested_spinunlock(lock_t *lock)
-{
-	spin_unlock(lock);
-}
-
-#endif /* __XFS_SUPPORT_SPIN_H__ */
diff -Nru a/fs/xfs/support/sv.h b/fs/xfs/support/sv.h
--- a/fs/xfs/support/sv.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
- * Portions Copyright (c) 2002 Christoph Hellwig.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_SV_H__
-#define __XFS_SUPPORT_SV_H__
-
-#include <linux/wait.h>
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-
-/*
- * Synchronisation variables.
- *
- * (Parameters "pri", "svf" and "rts" are not implemented)
- */
-
-typedef struct sv_s {
-	wait_queue_head_t waiters;
-} sv_t;
-
-#define SV_FIFO		0x0		/* sv_t is FIFO type */
-#define SV_LIFO		0x2		/* sv_t is LIFO type */
-#define SV_PRIO		0x4		/* sv_t is PRIO type */
-#define SV_KEYED	0x6		/* sv_t is KEYED type */
-#define SV_DEFAULT      SV_FIFO
-
-
-static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state,
-			     unsigned long timeout)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue_exclusive(&sv->waiters, &wait);
-	__set_current_state(state);
-	spin_unlock(lock);
-
-	schedule_timeout(timeout);
-
-	remove_wait_queue(&sv->waiters, &wait);
-}
-
-#define init_sv(sv,type,name,flag) \
-	init_waitqueue_head(&(sv)->waiters)
-#define sv_init(sv,flag,name) \
-	init_waitqueue_head(&(sv)->waiters)
-#define sv_destroy(sv) \
-	/*NOTHING*/
-#define sv_wait(sv, pri, lock, s) \
-	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_wait_sig(sv, pri, lock, s)   \
-	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT)
-#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \
-	_sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \
-	_sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts))
-#define sv_signal(sv) \
-	wake_up(&(sv)->waiters)
-#define sv_broadcast(sv) \
-	wake_up_all(&(sv)->waiters)
-
-#endif /* __XFS_SUPPORT_SV_H__ */
diff -Nru a/fs/xfs/support/time.h b/fs/xfs/support/time.h
--- a/fs/xfs/support/time.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.  Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
-	current->state = TASK_UNINTERRUPTIBLE;
-	schedule_timeout(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
-	*tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff -Nru a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
--- a/fs/xfs/support/uuid.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/support/uuid.c	Thu Jan 29 23:15:58 2004
@@ -30,14 +30,7 @@
  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  */
 
-#include <linux/types.h>
-#include <xfs_types.h>
-#include <xfs_arch.h>
-#include "time.h"
-#include "uuid.h"
-#include "kmem.h"
-#include "debug.h"
-#include "mutex.h"
+#include <xfs.h>
 
 static mutex_t	uuid_monitor;
 static int	uuid_table_size;
diff -Nru a/fs/xfs/xfs.h b/fs/xfs/xfs.h
--- a/fs/xfs/xfs.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs.h	Thu Jan 29 23:15:58 2004
@@ -32,28 +32,8 @@
 #ifndef __XFS_H__
 #define __XFS_H__
 
-#include <linux/types.h>
-#include <linux/config.h>
-#include <xfs_types.h>
-
-#include <xfs_arch.h>
-
-#include <support/kmem.h>
-#include <support/mrlock.h>
-#include <support/qsort.h>
-#include <support/spin.h>
-#include <support/sv.h>
-#include <support/ktrace.h>
-#include <support/mutex.h>
-#include <support/sema.h>
-#include <support/debug.h>
-#include <support/move.h>
-#include <support/uuid.h>
-#include <support/time.h>
-
 #include <linux/xfs_linux.h>
 
 #include <xfs_fs.h> 
-#include <xfs_buf.h>
 
 #endif	/* __XFS_H__ */
diff -Nru a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
--- a/fs/xfs/xfs_attr.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_attr.c	Thu Jan 29 23:15:58 2004
@@ -106,12 +106,11 @@
 #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */
 #define ATTR_RMTVALUE_TRANSBLKS	8	/* max # of blks in a transaction */
 
-#if defined(DEBUG)
+#if defined(XFS_ATTR_TRACE)
 ktrace_t *xfs_attr_trace_buf;
 #endif
 
 
-
 /*========================================================================
  * Overall external interface routines.
  *========================================================================*/
@@ -2589,6 +2588,14 @@
 }
 
 STATIC int
+attr_secure_capable(
+	struct vnode	*vp,
+	cred_t		*cred)
+{
+	return -ENOSECURITY;
+}
+
+STATIC int
 attr_system_set(
 	struct vnode *vp, char *name, void *data, size_t size, int xflags)
 {
@@ -2651,6 +2658,16 @@
 	.attr_capable	= attr_trusted_capable,
 };
 
+struct attrnames attr_secure = {
+	.attr_name	= "security.",
+	.attr_namelen	= sizeof("security.") - 1,
+	.attr_flag	= ATTR_SECURE,
+	.attr_get	= attr_generic_get,
+	.attr_set	= attr_generic_set,
+	.attr_remove	= attr_generic_remove,
+	.attr_capable	= attr_secure_capable,
+};
+
 struct attrnames attr_user = {
 	.attr_name	= "user.",
 	.attr_namelen	= sizeof("user.") - 1,
@@ -2661,4 +2678,4 @@
 };
 
 struct attrnames *attr_namespaces[] =
-	{ &attr_system, &attr_trusted, &attr_user };
+	{ &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff -Nru a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
--- a/fs/xfs/xfs_attr.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_attr.h	Thu Jan 29 23:15:58 2004
@@ -69,8 +69,9 @@
 	attrcapable_t	attr_capable;
 } attrnames_t;
 
-#define ATTR_NAMECOUNT	3
+#define ATTR_NAMECOUNT	4
 extern struct attrnames attr_user;
+extern struct attrnames attr_secure;
 extern struct attrnames attr_system;
 extern struct attrnames attr_trusted;
 extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
@@ -86,6 +87,7 @@
 #define ATTR_DONTFOLLOW	0x0001	/* -- unused, from IRIX -- */
 #define ATTR_ROOT	0x0002	/* use attrs in root (trusted) namespace */
 #define ATTR_TRUST	0x0004	/* -- unused, from IRIX -- */
+#define ATTR_SECURE	0x0008	/* use attrs in security namespace */
 #define ATTR_CREATE	0x0010	/* pure create: fail if attr already exists */
 #define ATTR_REPLACE	0x0020	/* pure set: fail if attr does not exist */
 #define ATTR_SYSTEM	0x0100	/* use attrs in system (pseudo) namespace */
diff -Nru a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
--- a/fs/xfs/xfs_attr_leaf.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_attr_leaf.c	Thu Jan 29 23:15:58 2004
@@ -159,6 +159,9 @@
 			continue;
 		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
+		if (((args->flags & ATTR_SECURE) != 0) !=
+		    ((sfe->flags & XFS_ATTR_SECURE) != 0))
+			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
@@ -173,7 +176,8 @@
 
 	sfe->namelen = args->namelen;
 	INT_SET(sfe->valuelen, ARCH_CONVERT, args->valuelen);
-	sfe->flags = (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
+	sfe->flags = (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
+			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
 	memcpy(sfe->nameval, args->name, args->namelen);
 	memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen);
 	INT_MOD(sf->hdr.count, ARCH_CONVERT, 1);
@@ -209,6 +213,9 @@
 			continue;
 		if (memcmp(sfe->nameval, args->name, args->namelen) != 0)
 			continue;
+		if (((args->flags & ATTR_SECURE) != 0) !=
+		    ((sfe->flags & XFS_ATTR_SECURE) != 0))
+			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
@@ -253,6 +260,9 @@
 			continue;
 		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
+		if (((args->flags & ATTR_SECURE) != 0) !=
+		    ((sfe->flags & XFS_ATTR_SECURE) != 0))
+			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
@@ -281,6 +291,9 @@
 			continue;
 		if (memcmp(args->name, sfe->nameval, args->namelen) != 0)
 			continue;
+		if (((args->flags & ATTR_SECURE) != 0) !=
+		    ((sfe->flags & XFS_ATTR_SECURE) != 0))
+			continue;
 		if (((args->flags & ATTR_ROOT) != 0) !=
 		    ((sfe->flags & XFS_ATTR_ROOT) != 0))
 			continue;
@@ -369,7 +382,8 @@
 		nargs.valuelen = INT_GET(sfe->valuelen, ARCH_CONVERT);
 		nargs.hashval = xfs_da_hashname((char *)sfe->nameval,
 						sfe->namelen);
-		nargs.flags = (sfe->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0;
+		nargs.flags = (sfe->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
+				((sfe->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
 		error = xfs_attr_leaf_lookup_int(bp, &nargs); /* set a->index */
 		ASSERT(error == ENOATTR);
 		error = xfs_attr_leaf_add(bp, &nargs);
@@ -446,14 +460,15 @@
 				i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
 			attrnames_t	*namesp;
 
-			namesp = (sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted :
-				  &attr_user;
 			if (((context->flags & ATTR_ROOT) != 0) !=
 			    ((sfe->flags & XFS_ATTR_ROOT) != 0) &&
 			    !(context->flags & ATTR_KERNFULLS)) {
 				sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
 				continue;
 			}
+			namesp = (sfe->flags & XFS_ATTR_SECURE) ? &attr_secure:
+				((sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted :
+				  &attr_user);
 			if (context->flags & ATTR_KERNOVAL) {
 				ASSERT(context->flags & ATTR_KERNAMELS);
 				context->count += namesp->attr_namelen +
@@ -548,8 +563,9 @@
 	for ( ; i < nsbuf; i++, sbp++) {
 		attrnames_t	*namesp;
 
-		namesp = (sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted :
-			  &attr_user;
+		namesp = (sfe->flags & XFS_ATTR_SECURE) ? &attr_secure :
+			((sfe->flags & XFS_ATTR_ROOT) ? &attr_trusted :
+			  &attr_user);
 
 		if (cursor->hashval != INT_GET(sbp->hash, ARCH_CONVERT)) {
 			cursor->hashval = INT_GET(sbp->hash, ARCH_CONVERT);
@@ -668,7 +684,8 @@
 		nargs.value = (char *)&name_loc->nameval[nargs.namelen];
 		nargs.valuelen = INT_GET(name_loc->valuelen, ARCH_CONVERT);
 		nargs.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
-		nargs.flags = (entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0;
+		nargs.flags = (entry->flags & XFS_ATTR_SECURE) ? ATTR_SECURE :
+			      ((entry->flags & XFS_ATTR_ROOT) ? ATTR_ROOT : 0);
 		xfs_attr_shortform_add(&nargs);
 	}
 	error = 0;
@@ -963,7 +980,8 @@
 				      + INT_GET(map->size, ARCH_CONVERT));
 	INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
 	entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
-	entry->flags |= (args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0;
+	entry->flags |= (args->flags & ATTR_SECURE) ? XFS_ATTR_SECURE :
+			((args->flags & ATTR_ROOT) ? XFS_ATTR_ROOT : 0);
 	if (args->rename) {
 		entry->flags |= XFS_ATTR_INCOMPLETE;
 		if ((args->blkno2 == args->blkno) &&
@@ -1881,6 +1899,9 @@
 			if (memcmp(args->name, (char *)name_loc->nameval,
 					     args->namelen) != 0)
 				continue;
+			if (((args->flags & ATTR_SECURE) != 0) !=
+			    ((entry->flags & XFS_ATTR_SECURE) != 0))
+				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
 			    ((entry->flags & XFS_ATTR_ROOT) != 0))
 				continue;
@@ -1893,6 +1914,9 @@
 			if (memcmp(args->name, (char *)name_rmt->name,
 					     args->namelen) != 0)
 				continue;
+			if (((args->flags & ATTR_SECURE) != 0) !=
+			    ((entry->flags & XFS_ATTR_SECURE) != 0))
+				continue;
 			if (((args->flags & ATTR_ROOT) != 0) !=
 			    ((entry->flags & XFS_ATTR_ROOT) != 0))
 				continue;
@@ -2290,8 +2314,9 @@
 		    !(context->flags & ATTR_KERNFULLS))
 			continue;		/* skip non-matching entries */
 
-		namesp = (entry->flags & XFS_ATTR_ROOT) ? &attr_trusted :
-			  &attr_user;
+		namesp = (entry->flags & XFS_ATTR_SECURE) ? &attr_secure :
+			((entry->flags & XFS_ATTR_ROOT) ? &attr_trusted :
+			  &attr_user);
 
 		if (entry->flags & XFS_ATTR_LOCAL) {
 			name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
diff -Nru a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
--- a/fs/xfs/xfs_attr_leaf.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_attr_leaf.h	Thu Jan 29 23:15:58 2004
@@ -73,9 +73,9 @@
  * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
  * nodes until the hash key changes or the attribute name is found.
  *
- * We store the fact that an attribute is a ROOT versus USER attribute in
+ * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
  * the leaf_entry.  The namespaces are independent only because we also look
- * at the root/user bit when we are looking for a matching attribute name.
+ * at the namespace bit when we are looking for a matching attribute name.
  *
  * We also store a "incomplete" bit in the leaf_entry.  It shows that an
  * attribute is in the middle of being created and should not be shown to
@@ -102,7 +102,7 @@
 	struct xfs_attr_leaf_entry {	/* sorted on key, not name */
 		xfs_dahash_t hashval;	/* hash value of name */
 		__uint16_t nameidx;	/* index into buffer of name/value */
-		__uint8_t flags;	/* LOCAL, ROOT and INCOMPLETE flags */
+		__uint8_t flags;	/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
 		__uint8_t pad2;		/* unused pad byte */
 	} entries[1];			/* variable sized array */
 	struct xfs_attr_leaf_name_local {
@@ -130,9 +130,11 @@
  */
 #define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */
 #define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */
+#define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */
 #define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */
 #define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT)
 #define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT)
+#define XFS_ATTR_SECURE		(1 << XFS_ATTR_SECURE_BIT)
 #define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT)
 
 /*
diff -Nru a/fs/xfs/xfs_behavior.c b/fs/xfs/xfs_behavior.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/xfs_behavior.c	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ *
+ */
+#include "xfs.h"
+
+/*
+ * Source file used to associate/disassociate behaviors with virtualized
+ * objects.  See xfs_behavior.h for more information about behaviors, etc.
+ *
+ * The implementation is split between functions in this file and macros
+ * in xfs_behavior.h.
+ */
+
+/*
+ * Insert a new behavior descriptor into a behavior chain.
+ *
+ * The behavior chain is ordered based on the 'position' number which
+ * lives in the first field of the ops vector (higher numbers first).
+ *
+ * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Otherwise, return 0 to indicate success.
+ */
+int
+bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
+{
+	bhv_desc_t	*curdesc, *prev;
+	int		position;
+
+	/*
+	 * Validate the position value of the new behavior.
+	 */
+	position = BHV_POSITION(bdp);
+	ASSERT(position >= BHV_POSITION_BASE && position <= BHV_POSITION_TOP);
+
+	/*
+	 * Find location to insert behavior.  Check for duplicates.
+	 */
+	prev = NULL;
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		/* Check for duplication. */
+		if (curdesc->bd_ops == bdp->bd_ops) {
+			ASSERT(0);
+			return EINVAL;
+		}
+
+		/* Find correct position */
+		if (position >= BHV_POSITION(curdesc)) {
+			ASSERT(position != BHV_POSITION(curdesc));
+			break;		/* found it */
+		}
+
+		prev = curdesc;
+	}
+
+	if (prev == NULL) {
+		/* insert at front of chain */
+		bdp->bd_next = bhp->bh_first;
+		bhp->bh_first = bdp;
+	} else {
+		/* insert after prev */
+		bdp->bd_next = prev->bd_next;
+		prev->bd_next = bdp;
+	}
+
+	return 0;
+}
+
+/*
+ * Remove a behavior descriptor from a position in a behavior chain;
+ * the postition is guaranteed not to be the first position.
+ * Should only be called by the bhv_remove() macro.
+ */
+void
+bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp)
+{
+	bhv_desc_t	*curdesc, *prev;
+
+	ASSERT(bhp->bh_first != NULL);
+	ASSERT(bhp->bh_first->bd_next != NULL);
+
+	prev = bhp->bh_first;
+	for (curdesc = bhp->bh_first->bd_next;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		if (curdesc == bdp)
+			break;		/* found it */
+		prev = curdesc;
+	}
+
+	ASSERT(curdesc == bdp);
+	prev->bd_next = bdp->bd_next;	/* remove from after prev */
+}
+
+/*
+ * Look for a specific ops vector on the specified behavior chain.
+ * Return the associated behavior descriptor.  Or NULL, if not found.
+ */
+bhv_desc_t *
+bhv_lookup(bhv_head_t *bhp, void *ops)
+{
+	bhv_desc_t	*curdesc;
+
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		if (curdesc->bd_ops == ops)
+			return curdesc;
+	}
+
+	return NULL;
+}
+
+/*
+ * Looks for the first behavior within a specified range of positions.
+ * Return the associated behavior descriptor.  Or NULL, if none found.
+ */
+bhv_desc_t *
+bhv_lookup_range(bhv_head_t *bhp, int low, int high)
+{
+	bhv_desc_t	*curdesc;
+
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		int	position = BHV_POSITION(curdesc);
+
+		if (position <= high) {
+			if (position >= low)
+				return curdesc;
+			return NULL;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Return the base behavior in the chain, or NULL if the chain
+ * is empty.
+ *
+ * The caller has not read locked the behavior chain, so acquire the
+ * lock before traversing the chain.
+ */
+bhv_desc_t *
+bhv_base(bhv_head_t *bhp)
+{
+	bhv_desc_t	*curdesc;
+
+	for (curdesc = bhp->bh_first;
+	     curdesc != NULL;
+	     curdesc = curdesc->bd_next) {
+
+		if (curdesc->bd_next == NULL) {
+			return curdesc;
+		}
+	}
+
+	return NULL;
+}
+
+void
+bhv_head_init(
+	bhv_head_t *bhp,
+	char *name)
+{
+	bhp->bh_first = NULL;
+}
+
+void
+bhv_insert_initial(
+	bhv_head_t *bhp,
+	bhv_desc_t *bdp)
+{
+	ASSERT(bhp->bh_first == NULL);
+	(bhp)->bh_first = bdp;
+}
+
+void
+bhv_head_destroy(
+	bhv_head_t *bhp)
+{
+	ASSERT(bhp->bh_first == NULL);
+}
diff -Nru a/fs/xfs/xfs_behavior.h b/fs/xfs/xfs_behavior.h
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/xfs_behavior.h	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_BEHAVIOR_H__
+#define __XFS_BEHAVIOR_H__
+
+/*
+ * Header file used to associate behaviors with virtualized objects.
+ *
+ * A virtualized object is an internal, virtualized representation of
+ * OS entities such as persistent files, processes, or sockets.  Examples
+ * of virtualized objects include vnodes, vprocs, and vsockets.  Often
+ * a virtualized object is referred to simply as an "object."
+ *
+ * A behavior is essentially an implementation layer associated with
+ * an object.  Multiple behaviors for an object are chained together,
+ * the order of chaining determining the order of invocation.  Each
+ * behavior of a given object implements the same set of interfaces
+ * (e.g., the VOP interfaces).
+ *
+ * Behaviors may be dynamically inserted into an object's behavior chain,
+ * such that the addition is transparent to consumers that already have
+ * references to the object.  Typically, a given behavior will be inserted
+ * at a particular location in the behavior chain.  Insertion of new
+ * behaviors is synchronized with operations-in-progress (oip's) so that
+ * the oip's always see a consistent view of the chain.
+ *
+ * The term "interpostion" is used to refer to the act of inserting
+ * a behavior such that it interposes on (i.e., is inserted in front
+ * of) a particular other behavior.  A key example of this is when a
+ * system implementing distributed single system image wishes to
+ * interpose a distribution layer (providing distributed coherency)
+ * in front of an object that is otherwise only accessed locally.
+ *
+ * Note that the traditional vnode/inode combination is simply a virtualized
+ * object that has exactly one associated behavior.
+ *
+ * Behavior synchronization is logic which is necessary under certain
+ * circumstances that there is no conflict between ongoing operations
+ * traversing the behavior chain and those dunamically modifying the
+ * behavior chain.  Because behavior synchronization adds extra overhead
+ * to virtual operation invocation, we want to restrict, as much as
+ * we can, the requirement for this extra code, to those situations
+ * in which it is truly necessary.
+ *
+ * Behavior synchronization is needed whenever there's at least one class
+ * of object in the system for which:
+ * 1) multiple behaviors for a given object are supported,
+ * -- AND --
+ * 2a) insertion of a new behavior can happen dynamically at any time during
+ *     the life of an active object,
+ *	-- AND --
+ *	3a) insertion of a new behavior needs to synchronize with existing
+ *	    ops-in-progress.
+ *	-- OR --
+ *	3b) multiple different behaviors can be dynamically inserted at
+ *	    any time during the life of an active object
+ *	-- OR --
+ *	3c) removal of a behavior can occur at any time during the life of
+ *	    an active object.
+ * -- OR --
+ * 2b) removal of a behavior can occur at any time during the life of an
+ *     active object
+ *
+ */
+
+struct bhv_head_lock;
+
+/*
+ * Behavior head.  Head of the chain of behaviors.
+ * Contained within each virtualized object data structure.
+ */
+typedef struct bhv_head {
+	struct bhv_desc *bh_first;	/* first behavior in chain */
+	struct bhv_head_lock *bh_lockp;	/* pointer to lock info struct */
+} bhv_head_t;
+
+/*
+ * Behavior descriptor.	 Descriptor associated with each behavior.
+ * Contained within the behavior's private data structure.
+ */
+typedef struct bhv_desc {
+	void		*bd_pdata;	/* private data for this behavior */
+	void		*bd_vobj;	/* virtual object associated with */
+	void		*bd_ops;	/* ops for this behavior */
+	struct bhv_desc *bd_next;	/* next behavior in chain */
+} bhv_desc_t;
+
+/*
+ * Behavior identity field.  A behavior's identity determines the position
+ * where it lives within a behavior chain, and it's always the first field
+ * of the behavior's ops vector. The optional id field further identifies the
+ * subsystem responsible for the behavior.
+ */
+typedef struct bhv_identity {
+	__u16	bi_id;		/* owning subsystem id */
+	__u16	bi_position;	/* position in chain */
+} bhv_identity_t;
+
+typedef bhv_identity_t bhv_position_t;
+
+#define BHV_IDENTITY_INIT(id,pos)	{id, pos}
+#define BHV_IDENTITY_INIT_POSITION(pos) BHV_IDENTITY_INIT(0, pos)
+
+/*
+ * Define boundaries of position values.
+ */
+#define BHV_POSITION_INVALID	0	/* invalid position number */
+#define BHV_POSITION_BASE	1	/* base (last) implementation layer */
+#define BHV_POSITION_TOP	63	/* top (first) implementation layer */
+
+/*
+ * Plumbing macros.
+ */
+#define BHV_HEAD_FIRST(bhp)	(ASSERT((bhp)->bh_first), (bhp)->bh_first)
+#define BHV_NEXT(bdp)		(ASSERT((bdp)->bd_next), (bdp)->bd_next)
+#define BHV_NEXTNULL(bdp)	((bdp)->bd_next)
+#define BHV_VOBJ(bdp)		(ASSERT((bdp)->bd_vobj), (bdp)->bd_vobj)
+#define BHV_VOBJNULL(bdp)	((bdp)->bd_vobj)
+#define BHV_PDATA(bdp)		(bdp)->bd_pdata
+#define BHV_OPS(bdp)		(bdp)->bd_ops
+#define BHV_IDENTITY(bdp)	((bhv_identity_t *)(bdp)->bd_ops)
+#define BHV_POSITION(bdp)	(BHV_IDENTITY(bdp)->bi_position)
+
+extern void bhv_head_init(bhv_head_t *, char *);
+extern void bhv_head_destroy(bhv_head_t *);
+extern int  bhv_insert(bhv_head_t *, bhv_desc_t *);
+extern void bhv_insert_initial(bhv_head_t *, bhv_desc_t *);
+
+/*
+ * Initialize a new behavior descriptor.
+ * Arguments:
+ *   bdp - pointer to behavior descriptor
+ *   pdata - pointer to behavior's private data
+ *   vobj - pointer to associated virtual object
+ *   ops - pointer to ops for this behavior
+ */
+#define bhv_desc_init(bdp, pdata, vobj, ops)		\
+ {							\
+	(bdp)->bd_pdata = pdata;			\
+	(bdp)->bd_vobj = vobj;				\
+	(bdp)->bd_ops = ops;				\
+	(bdp)->bd_next = NULL;				\
+ }
+
+/*
+ * Remove a behavior descriptor from a behavior chain.
+ */
+#define bhv_remove(bhp, bdp)				\
+ {							\
+	if ((bhp)->bh_first == (bdp)) {			\
+		/*					\
+		* Remove from front of chain.		\
+		* Atomic wrt oip's.			\
+		*/					\
+	       (bhp)->bh_first = (bdp)->bd_next;	\
+	} else {					\
+	       /* remove from non-front of chain */	\
+	       bhv_remove_not_first(bhp, bdp);		\
+	}						\
+	(bdp)->bd_vobj = NULL;				\
+ }
+
+/*
+ * Behavior module prototypes.
+ */
+extern void		bhv_remove_not_first(bhv_head_t *bhp, bhv_desc_t *bdp);
+extern bhv_desc_t *	bhv_lookup(bhv_head_t *bhp, void *ops);
+extern bhv_desc_t *	bhv_lookup_range(bhv_head_t *bhp, int low, int high);
+extern bhv_desc_t *	bhv_base(bhv_head_t *bhp);
+
+/* No bhv locking on Linux */
+#define bhv_lookup_unlocked	bhv_lookup
+#define bhv_base_unlocked	bhv_base
+
+#endif /* __XFS_BEHAVIOR_H__ */
diff -Nru a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
--- a/fs/xfs/xfs_bmap_btree.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_bmap_btree.c	Thu Jan 29 23:15:58 2004
@@ -61,7 +61,7 @@
 #include "xfs_error.h"
 #include "xfs_quota.h"
 
-#ifdef DEBUG
+#if defined(XFS_BMBT_TRACE)
 ktrace_t	*xfs_bmbt_trace_buf;
 #endif
 
diff -Nru a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
--- a/fs/xfs/xfs_buf.h	Thu Jan 29 23:15:58 2004
+++ /dev/null	Wed Dec 31 16:00:00 1969
@@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- * Further, this software is distributed without any warranty that it is
- * free of the rightful claim of any third person regarding infringement
- * or the like.	 Any license provided herein, whether implied or
- * otherwise, applies only to this software file.  Patent licenses, if
- * any, provided herein do not apply to combinations of this program with
- * other software, or any other product whatsoever.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
- * Mountain View, CA  94043, or:
- *
- * http://www.sgi.com
- *
- * For further information regarding this notice, see:
- *
- * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-/* These are just for xfs_syncsub... it sets an internal variable
- * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
- */
-#define XFS_B_ASYNC		PBF_ASYNC
-#define XFS_B_DELWRI		PBF_DELWRI
-#define XFS_B_READ		PBF_READ
-#define XFS_B_WRITE		PBF_WRITE
-#define XFS_B_STALE		PBF_STALE
-
-#define XFS_BUF_TRYLOCK		PBF_TRYLOCK
-#define XFS_INCORE_TRYLOCK	PBF_TRYLOCK
-#define XFS_BUF_LOCK		PBF_LOCK
-#define XFS_BUF_MAPPED		PBF_MAPPED
-
-#define BUF_BUSY		PBF_DONT_BLOCK
-
-#define XFS_BUF_BFLAGS(x)	((x)->pb_flags)
-#define XFS_BUF_ZEROFLAGS(x)	\
-	((x)->pb_flags &= ~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_SYNC|PBF_DELWRI))
-
-#define XFS_BUF_STALE(x)	((x)->pb_flags |= XFS_B_STALE)
-#define XFS_BUF_UNSTALE(x)	((x)->pb_flags &= ~XFS_B_STALE)
-#define XFS_BUF_ISSTALE(x)	((x)->pb_flags & XFS_B_STALE)
-#define XFS_BUF_SUPER_STALE(x)	do {				\
-					XFS_BUF_STALE(x);	\
-					xfs_buf_undelay(x);	\
-					XFS_BUF_DONE(x);	\
-				} while (0)
-
-#define XFS_BUF_MANAGE		PBF_FS_MANAGED
-#define XFS_BUF_UNMANAGE(x)	((x)->pb_flags &= ~PBF_FS_MANAGED)
-
-static inline void xfs_buf_undelay(page_buf_t *pb)
-{
-	if (pb->pb_flags & PBF_DELWRI) {
-		if (pb->pb_list.next != &pb->pb_list) {
-			pagebuf_delwri_dequeue(pb);
-			pagebuf_rele(pb);
-		} else {
-			pb->pb_flags &= ~PBF_DELWRI;
-		}
-	}
-}
-
-#define XFS_BUF_DELAYWRITE(x)	 ((x)->pb_flags |= PBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(x)	 xfs_buf_undelay(x)
-#define XFS_BUF_ISDELAYWRITE(x)	 ((x)->pb_flags & PBF_DELWRI)
-
-#define XFS_BUF_ERROR(x,no)	 pagebuf_ioerror(x,no)
-#define XFS_BUF_GETERROR(x)	 pagebuf_geterror(x)
-#define XFS_BUF_ISERROR(x)	 (pagebuf_geterror(x)?1:0)
-
-#define XFS_BUF_DONE(x)		 ((x)->pb_flags &= ~(PBF_PARTIAL|PBF_NONE))
-#define XFS_BUF_UNDONE(x)	 ((x)->pb_flags |= PBF_PARTIAL|PBF_NONE)
-#define XFS_BUF_ISDONE(x)	 (!(PBF_NOT_DONE(x)))
-
-#define XFS_BUF_BUSY(x)		 ((x)->pb_flags |= PBF_FORCEIO)
-#define XFS_BUF_UNBUSY(x)	 ((x)->pb_flags &= ~PBF_FORCEIO)
-#define XFS_BUF_ISBUSY(x)	 (1)
-
-#define XFS_BUF_ASYNC(x)	 ((x)->pb_flags |= PBF_ASYNC)
-#define XFS_BUF_UNASYNC(x)	 ((x)->pb_flags &= ~PBF_ASYNC)
-#define XFS_BUF_ISASYNC(x)	 ((x)->pb_flags & PBF_ASYNC)
-
-#define XFS_BUF_FLUSH(x)	 ((x)->pb_flags |= PBF_FLUSH)
-#define XFS_BUF_UNFLUSH(x)	 ((x)->pb_flags &= ~PBF_FLUSH)
-#define XFS_BUF_ISFLUSH(x)	 ((x)->pb_flags & PBF_FLUSH)
-
-#define XFS_BUF_SHUT(x)		 printk("XFS_BUF_SHUT not implemented yet\n")
-#define XFS_BUF_UNSHUT(x)	 printk("XFS_BUF_UNSHUT not implemented yet\n")
-#define XFS_BUF_ISSHUT(x)	 (0)
-
-#define XFS_BUF_HOLD(x)		pagebuf_hold(x)
-#define XFS_BUF_READ(x)		((x)->pb_flags |= PBF_READ)
-#define XFS_BUF_UNREAD(x)	((x)->pb_flags &= ~PBF_READ)
-#define XFS_BUF_ISREAD(x)	((x)->pb_flags & PBF_READ)
-
-#define XFS_BUF_WRITE(x)	((x)->pb_flags |= PBF_WRITE)
-#define XFS_BUF_UNWRITE(x)	((x)->pb_flags &= ~PBF_WRITE)
-#define XFS_BUF_ISWRITE(x)	((x)->pb_flags & PBF_WRITE)
-
-#define XFS_BUF_ISUNINITIAL(x)	 (0)
-#define XFS_BUF_UNUNINITIAL(x)	 (0)
-
-#define XFS_BUF_BP_ISMAPPED(bp)	 1
-
-typedef struct page_buf_s xfs_buf_t;
-#define xfs_buf page_buf_s
-
-typedef struct pb_target xfs_buftarg_t;
-#define xfs_buftarg pb_target
-
-#define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
-#define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
-
-#define XFS_BUF_IODONE_FUNC(buf)	(buf)->pb_iodone
-#define XFS_BUF_SET_IODONE_FUNC(buf, func)	\
-			(buf)->pb_iodone = (func)
-#define XFS_BUF_CLR_IODONE_FUNC(buf)		\
-			(buf)->pb_iodone = NULL
-#define XFS_BUF_SET_BDSTRAT_FUNC(buf, func)	\
-			(buf)->pb_strat = (func)
-#define XFS_BUF_CLR_BDSTRAT_FUNC(buf)		\
-			(buf)->pb_strat = NULL
-
-#define XFS_BUF_FSPRIVATE(buf, type)		\
-			((type)(buf)->pb_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(buf, value)	\
-			(buf)->pb_fspriv = (void *)(value)
-#define XFS_BUF_FSPRIVATE2(buf, type)		\
-			((type)(buf)->pb_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(buf, value)	\
-			(buf)->pb_fspriv2 = (void *)(value)
-#define XFS_BUF_FSPRIVATE3(buf, type)		\
-			((type)(buf)->pb_fspriv3)
-#define XFS_BUF_SET_FSPRIVATE3(buf, value)	\
-			(buf)->pb_fspriv3  = (void *)(value)
-#define XFS_BUF_SET_START(buf)
-
-#define XFS_BUF_SET_BRELSE_FUNC(buf, value) \
-			(buf)->pb_relse = (value)
-
-#define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
-
-extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
-{
-	if (bp->pb_flags & PBF_MAPPED)
-		return XFS_BUF_PTR(bp) + offset;
-	return (xfs_caddr_t) pagebuf_offset(bp, offset);
-}
-
-#define XFS_BUF_SET_PTR(bp, val, count)		\
-				pagebuf_associate_memory(bp, val, count)
-#define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
-#define XFS_BUF_SET_ADDR(bp, blk)		\
-			((bp)->pb_bn = (page_buf_daddr_t)(blk))
-#define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off)		\
-			((bp)->pb_file_offset = (off))
-#define XFS_BUF_COUNT(bp)	((bp)->pb_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt)		\
-			((bp)->pb_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp)	((bp)->pb_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt)		\
-			((bp)->pb_buffer_length = (cnt))
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-#define XFS_BUF_ISPINNED(bp)	pagebuf_ispin(bp)
-
-#define XFS_BUF_VALUSEMA(bp)	pagebuf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp)	(pagebuf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp)	pagebuf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x)	pagebuf_lock(bp)
-#define XFS_BUF_V_IODONESEMA(bp) up(&bp->pb_iodonesema);
-
-/* setup the buffer target from a buftarg structure */
-#define XFS_BUF_SET_TARGET(bp, target)	\
-		(bp)->pb_target = (target)
-#define XFS_BUF_TARGET(bp)	((bp)->pb_target)
-#define XFS_BUFTARG_NAME(target)	\
-		pagebuf_target_name(target)
-
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref)
-#define XFS_BUF_SET_VTYPE(bp, type)
-#define XFS_BUF_SET_REF(bp, ref)
-
-#define xfs_buf_read(target, blkno, len, flags) \
-		pagebuf_get((target), (blkno), (len), \
-			PBF_LOCK | PBF_READ | PBF_MAPPED | PBF_MAPPABLE)
-#define xfs_buf_get(target, blkno, len, flags) \
-		pagebuf_get((target), (blkno), (len), \
-			PBF_LOCK | PBF_MAPPED | PBF_MAPPABLE)
-
-#define xfs_buf_read_flags(target, blkno, len, flags) \
-		pagebuf_get((target), (blkno), (len), \
-			PBF_READ | PBF_MAPPABLE | flags)
-#define xfs_buf_get_flags(target, blkno, len, flags) \
-		pagebuf_get((target), (blkno), (len), \
-			PBF_MAPPABLE | flags)
-
-static inline int	xfs_bawrite(void *mp, page_buf_t *bp)
-{
-	bp->pb_fspriv3 = mp;
-	bp->pb_strat = xfs_bdstrat_cb;
-	xfs_buf_undelay(bp);
-	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | PBF_RUN_QUEUES);
-}
-
-static inline void	xfs_buf_relse(page_buf_t *bp)
-{
-	if ((bp->pb_flags & _PBF_LOCKABLE) && !bp->pb_relse)
-		pagebuf_unlock(bp);
-	pagebuf_rele(bp);
-}
-
-#define xfs_bpin(bp)		pagebuf_pin(bp)
-#define xfs_bunpin(bp)		pagebuf_unpin(bp)
-
-#define xfs_buftrace(id, bp)	\
-	    pagebuf_trace(bp, id, NULL, (void *)__builtin_return_address(0))
-
-#define xfs_biodone(pb)		    \
-	    pagebuf_iodone(pb, (pb->pb_flags & PBF_FS_DATAIOD), 0)
-
-#define xfs_incore(buftarg,blkno,len,lockit) \
-	    pagebuf_find(buftarg, blkno ,len, lockit)
-
-
-#define xfs_biomove(pb, off, len, data, rw) \
-	    pagebuf_iomove((pb), (off), (len), (data), \
-		((rw) == XFS_B_WRITE) ? PBRW_WRITE : PBRW_READ)
-
-#define xfs_biozero(pb, off, len) \
-	    pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
-
-
-static inline int	XFS_bwrite(page_buf_t *pb)
-{
-	int	iowait = (pb->pb_flags & PBF_ASYNC) == 0;
-	int	error = 0;
-
-	pb->pb_flags |= PBF_SYNC;
-	if (!iowait)
-		pb->pb_flags |= PBF_RUN_QUEUES;
-
-	xfs_buf_undelay(pb);
-	pagebuf_iostrategy(pb);
-	if (iowait) {
-		error = pagebuf_iowait(pb);
-		xfs_buf_relse(pb);
-	}
-	return error;
-}
-
-#define XFS_bdwrite(pb)		     \
-	    pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
-
-static inline int xfs_bdwrite(void *mp, page_buf_t *bp)
-{
-	bp->pb_strat = xfs_bdstrat_cb;
-	bp->pb_fspriv3 = mp;
-
-	return pagebuf_iostart(bp, PBF_DELWRI | PBF_ASYNC);
-}
-
-#define XFS_bdstrat(bp) pagebuf_iorequest(bp)
-
-#define xfs_iowait(pb)	pagebuf_iowait(pb)
-
-
-/*
- * Go through all incore buffers, and release buffers
- * if they belong to the given device. This is used in
- * filesystem error handling to preserve the consistency
- * of its metadata.
- */
-
-#define xfs_binval(buftarg)	xfs_flush_buftarg(buftarg)
-
-#define XFS_bflush(buftarg)	xfs_flush_buftarg(buftarg)
-
-#define xfs_incore_relse(buftarg,delwri_only,wait)	\
-	xfs_relse_buftarg(buftarg)
-
-#define xfs_baread(target, rablkno, ralen)  \
-	pagebuf_readahead((target), (rablkno), (ralen), PBF_DONT_BLOCK)
-
-#define xfs_buf_get_empty(len, target)	pagebuf_get_empty((len), (target))
-#define xfs_buf_get_noaddr(len, target)	pagebuf_get_no_daddr((len), (target))
-#define xfs_buf_free(bp)		pagebuf_free(bp)
-
-#endif	/* __XFS_BUF_H__ */
diff -Nru a/fs/xfs/xfs_dir.c b/fs/xfs/xfs_dir.c
--- a/fs/xfs/xfs_dir.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_dir.c	Thu Jan 29 23:15:58 2004
@@ -162,7 +162,7 @@
 					     xfs_dir_put_t put);
 STATIC int xfs_dir_node_replace(xfs_da_args_t *args);
 
-#if defined(DEBUG)
+#if defined(XFS_DIR_TRACE)
 ktrace_t *xfs_dir_trace_buf;
 #endif
 
diff -Nru a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
--- a/fs/xfs/xfs_dir2_trace.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_dir2_trace.c	Thu Jan 29 23:15:58 2004
@@ -49,11 +49,9 @@
 #include "xfs_da_btree.h"
 #include "xfs_dir2_trace.h"
 
-#ifdef DEBUG
+#ifdef XFS_DIR2_TRACE
 ktrace_t	*xfs_dir2_trace_buf;
-#endif /* DEBUG */
 
-#ifdef XFS_DIR2_TRACE
 /*
  * Enter something in the trace buffers.
  */
diff -Nru a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
--- /dev/null	Wed Dec 31 16:00:00 1969
+++ b/fs/xfs/xfs_iomap.c	Thu Jan 29 23:15:58 2004
@@ -0,0 +1,795 @@
+/*
+ * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.	 Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+#include "xfs_fs.h"
+#include "xfs_inum.h"
+#include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_ag.h"
+#include "xfs_dir.h"
+#include "xfs_dir2.h"
+#include "xfs_alloc.h"
+#include "xfs_dmapi.h"
+#include "xfs_quota.h"
+#include "xfs_mount.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_attr_sf.h"
+#include "xfs_dir_sf.h"
+#include "xfs_dir2_sf.h"
+#include "xfs_dinode.h"
+#include "xfs_inode.h"
+#include "xfs_bmap.h"
+#include "xfs_bit.h"
+#include "xfs_rtalloc.h"
+#include "xfs_error.h"
+#include "xfs_itable.h"
+#include "xfs_rw.h"
+#include "xfs_acl.h"
+#include "xfs_cap.h"
+#include "xfs_mac.h"
+#include "xfs_attr.h"
+#include "xfs_buf_item.h"
+#include "xfs_trans_space.h"
+#include "xfs_utils.h"
+#include "xfs_iomap.h"
+
+#define XFS_WRITEIO_ALIGN(mp,off)	(((off) >> mp->m_writeio_log) \
+						<< mp->m_writeio_log)
+#define XFS_STRAT_WRITE_IMAPS	2
+#define XFS_WRITE_IMAPS		XFS_BMAP_MAX_NMAP
+
+STATIC int
+xfs_imap_to_bmap(
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,
+	xfs_bmbt_irec_t *imap,
+	xfs_iomap_t	*iomapp,
+	int		imaps,			/* Number of imap entries */
+	int		iomaps,			/* Number of iomap entries */
+	int		flags)
+{
+	xfs_mount_t	*mp;
+	xfs_fsize_t	nisize;
+	int		pbm;
+	xfs_fsblock_t	start_block;
+
+	mp = io->io_mount;
+	nisize = XFS_SIZE(mp, io);
+	if (io->io_new_size > nisize)
+		nisize = io->io_new_size;
+
+	for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
+		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+		iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+		iomapp->iomap_delta = offset - iomapp->iomap_offset;
+		iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
+		iomapp->iomap_flags = flags;
+
+		start_block = imap->br_startblock;
+		if (start_block == HOLESTARTBLOCK) {
+			iomapp->iomap_bn = IOMAP_DADDR_NULL;
+			iomapp->iomap_flags = IOMAP_HOLE;
+		} else if (start_block == DELAYSTARTBLOCK) {
+			iomapp->iomap_bn = IOMAP_DADDR_NULL;
+			iomapp->iomap_flags = IOMAP_DELAY;
+		} else {
+			iomapp->iomap_bn = XFS_FSB_TO_DB_IO(io, start_block);
+			if (ISUNWRITTEN(imap))
+				iomapp->iomap_flags |= IOMAP_UNWRITTEN;
+		}
+
+		if ((iomapp->iomap_offset + iomapp->iomap_bsize) >= nisize) {
+			iomapp->iomap_flags |= IOMAP_EOF;
+		}
+
+		offset += iomapp->iomap_bsize - iomapp->iomap_delta;
+	}
+	return pbm;	/* Return the number filled */
+}
+
+int
+xfs_iomap(
+	xfs_iocore_t	*io,
+	xfs_off_t	offset,
+	ssize_t		count,
+	int		flags,
+	xfs_iomap_t	*iomapp,
+	int		*niomaps)
+{
+	xfs_mount_t	*mp = io->io_mount;
+	xfs_fileoff_t	offset_fsb, end_fsb;
+	int		error = 0;
+	int		lockmode = 0;
+	xfs_bmbt_irec_t	imap;
+	int		nimaps = 1;
+	int		bmapi_flags = 0;
+	int		iomap_flags = 0;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return XFS_ERROR(EIO);
+
+	switch (flags &
+		(BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE |
+		 BMAPI_UNWRITTEN | BMAPI_DEVICE)) {
+	case BMAPI_READ:
+		lockmode = XFS_LCK_MAP_SHARED(mp, io);
+		bmapi_flags = XFS_BMAPI_ENTIRE;
+		if (flags & BMAPI_IGNSTATE)
+			bmapi_flags |= XFS_BMAPI_IGSTATE;
+		break;
+	case BMAPI_WRITE:
+		lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
+		bmapi_flags = 0;
+		XFS_ILOCK(mp, io, lockmode);
+		break;
+	case BMAPI_ALLOCATE:
+		lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
+		bmapi_flags = XFS_BMAPI_ENTIRE;
+		/* Attempt non-blocking lock */
+		if (flags & BMAPI_TRYLOCK) {
+			if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
+				return XFS_ERROR(EAGAIN);
+		} else {
+			XFS_ILOCK(mp, io, lockmode);
+		}
+		break;
+	case BMAPI_UNWRITTEN:
+		goto phase2;
+	case BMAPI_DEVICE:
+		lockmode = XFS_LCK_MAP_SHARED(mp, io);
+		iomapp->iomap_target = io->io_flags & XFS_IOCORE_RT ?
+			mp->m_rtdev_targp : mp->m_ddev_targp;
+		error = 0;
+		*niomaps = 1;
+		goto out;
+	default:
+		BUG();
+	}
+
+	ASSERT(offset <= mp->m_maxioffset);
+	if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
+		count = mp->m_maxioffset - offset;
+	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+
+	error = XFS_BMAPI(mp, NULL, io, offset_fsb,
+			(xfs_filblks_t)(end_fsb - offset_fsb),
+			bmapi_flags,  NULL, 0, &imap,
+			&nimaps, NULL);
+
+	if (error)
+		goto out;
+
+phase2:
+	switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE|BMAPI_UNWRITTEN)) {
+	case BMAPI_WRITE:
+		/* If we found an extent, return it */
+		if (nimaps && (imap.br_startblock != HOLESTARTBLOCK))
+			break;
+
+		if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
+			error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
+					count, flags, &imap, &nimaps, nimaps);
+		} else {
+			error = XFS_IOMAP_WRITE_DELAY(mp, io, offset, count,
+					flags, &imap, &nimaps);
+		}
+		iomap_flags = IOMAP_NEW;
+		break;
+	case BMAPI_ALLOCATE:
+		/* If we found an extent, return it */
+		XFS_IUNLOCK(mp, io, lockmode);
+		lockmode = 0;
+
+		if (nimaps && !ISNULLSTARTBLOCK(imap.br_startblock))
+			break;
+
+		error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, &imap, &nimaps);
+		break;
+	case BMAPI_UNWRITTEN:
+		lockmode = 0;
+		error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
+		nimaps = 0;
+		break;
+	}
+
+	if (nimaps) {
+		*niomaps = xfs_imap_to_bmap(io, offset, &imap,
+					iomapp, nimaps, *niomaps, iomap_flags);
+	} else if (niomaps) {
+		*niomaps = 0;
+	}
+
+out:
+	if (lockmode)
+		XFS_IUNLOCK(mp, io, lockmode);
+	return XFS_ERROR(error);
+}
+
+STATIC int
+xfs_flush_space(
+	xfs_inode_t	*ip,
+	int		*fsynced,
+	int		*ioflags)
+{
+	switch (*fsynced) {
+	case 0:
+		if (ip->i_delayed_blks) {
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+			xfs_flush_inode(ip);
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			*fsynced = 1;
+		} else {
+			*ioflags |= BMAPI_SYNC;
+			*fsynced = 2;
+		}
+		return 0;
+	case 1:
+		*fsynced = 2;
+		*ioflags |= BMAPI_SYNC;
+		return 0;
+	case 2:
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_flush_device(ip);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		*fsynced = 3;
+		return 0;
+	}
+	return 1;
+}
+
+int
+xfs_iomap_write_direct(
+	xfs_inode_t	*ip,
+	loff_t		offset,
+	size_t		count,
+	int		flags,
+	xfs_bmbt_irec_t *ret_imap,
+	int		*nmaps,
+	int		found)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_iocore_t	*io = &ip->i_iocore;
+	xfs_fileoff_t	offset_fsb;
+	xfs_fileoff_t	last_fsb;
+	xfs_filblks_t	count_fsb;
+	xfs_fsize_t	isize;
+	xfs_fsblock_t	firstfsb;
+	int		nimaps, maps;
+	int		error;
+	int		bmapi_flag;
+	int		rt;
+	xfs_trans_t	*tp;
+	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
+	xfs_bmap_free_t free_list;
+	int		aeof;
+	xfs_filblks_t	datablocks;
+	int		committed;
+	int		numrtextents;
+	uint		resblks;
+
+	/*
+	 * Make sure that the dquots are there. This doesn't hold
+	 * the ilock across a disk read.
+	 */
+
+	error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+	if (error)
+		return XFS_ERROR(error);
+
+	maps = min(XFS_WRITE_IMAPS, *nmaps);
+	nimaps = maps;
+
+	isize = ip->i_d.di_size;
+	aeof = (offset + count) > isize;
+
+	if (io->io_new_size > isize)
+		isize = io->io_new_size;
+
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+	count_fsb = last_fsb - offset_fsb;
+	if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) {
+		xfs_fileoff_t	map_last_fsb;
+
+		map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff;
+
+		if (map_last_fsb < last_fsb) {
+			last_fsb = map_last_fsb;
+			count_fsb = last_fsb - offset_fsb;
+		}
+		ASSERT(count_fsb > 0);
+	}
+
+	/*
+	 * determine if reserving space on
+	 * the data or realtime partition.
+	 */
+	if ((rt = XFS_IS_REALTIME_INODE(ip))) {
+		int	sbrtextsize, iprtextsize;
+
+		sbrtextsize = mp->m_sb.sb_rextsize;
+		iprtextsize =
+			ip->i_d.di_extsize ? ip->i_d.di_extsize : sbrtextsize;
+		numrtextents = (count_fsb + iprtextsize - 1);
+		do_div(numrtextents, sbrtextsize);
+		datablocks = 0;
+	} else {
+		datablocks = count_fsb;
+		numrtextents = 0;
+	}
+
+	/*
+	 * allocate and setup the transaction
+	 */
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+
+	resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
+
+	error = xfs_trans_reserve(tp, resblks,
+			XFS_WRITE_LOG_RES(mp), numrtextents,
+			XFS_TRANS_PERM_LOG_RES,
+			XFS_WRITE_LOG_COUNT);
+
+	/*
+	 * check for running out of space
+	 */
+	if (error)
+		/*
+		 * Free the transaction structure.
+		 */
+		xfs_trans_cancel(tp, 0);
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	if (error)
+		goto error_out; /* Don't return in above if .. trans ..,
+					need lock to return */
+
+	if (XFS_TRANS_RESERVE_BLKQUOTA(mp, tp, ip, resblks)) {
+		error = (EDQUOT);
+		goto error1;
+	}
+	nimaps = 1;
+
+	bmapi_flag = XFS_BMAPI_WRITE;
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	xfs_trans_ihold(tp, ip);
+
+	if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt))
+		bmapi_flag |= XFS_BMAPI_PREALLOC;
+
+	/*
+	 * issue the bmapi() call to allocate the blocks
+	 */
+	XFS_BMAP_INIT(&free_list, &firstfsb);
+	imapp = &imap[0];
+	error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
+		bmapi_flag, &firstfsb, 0, imapp, &nimaps, &free_list);
+	if (error) {
+		goto error0;
+	}
+
+	/*
+	 * complete the transaction
+	 */
+
+	error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+	if (error) {
+		goto error0;
+	}
+
+	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
+	if (error) {
+		goto error_out;
+	}
+
+	/* copy any maps to caller's array and return any error. */
+	if (nimaps == 0) {
+		error = (ENOSPC);
+		goto error_out;
+	}
+
+	*ret_imap = imap[0];
+	*nmaps = 1;
+	return 0;
+
+ error0:	/* Cancel bmap, unlock inode, and cancel trans */
+	xfs_bmap_cancel(&free_list);
+
+ error1:	/* Just cancel transaction */
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+	*nmaps = 0;	/* nothing set-up here */
+
+error_out:
+	return XFS_ERROR(error);
+}
+
+int
+xfs_iomap_write_delay(
+	xfs_inode_t	*ip,
+	loff_t		offset,
+	size_t		count,
+	int		ioflag,
+	xfs_bmbt_irec_t *ret_imap,
+	int		*nmaps)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_iocore_t	*io = &ip->i_iocore;
+	xfs_fileoff_t	offset_fsb;
+	xfs_fileoff_t	last_fsb;
+	xfs_fsize_t	isize;
+	xfs_fsblock_t	firstblock;
+	int		nimaps;
+	int		error;
+	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
+	int		aeof;
+	int		fsynced = 0;
+
+	ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+
+	/*
+	 * Make sure that the dquots are there. This doesn't hold
+	 * the ilock across a disk read.
+	 */
+
+	error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+	if (error)
+		return XFS_ERROR(error);
+
+retry:
+	isize = ip->i_d.di_size;
+	if (io->io_new_size > isize) {
+		isize = io->io_new_size;
+	}
+
+	aeof = 0;
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
+	/*
+	 * If the caller is doing a write at the end of the file,
+	 * then extend the allocation (and the buffer used for the write)
+	 * out to the file system's write iosize.  We clean up any extra
+	 * space left over when the file is closed in xfs_inactive().
+	 *
+	 * We don't bother with this for sync writes, because we need
+	 * to minimize the amount we write for good performance.
+	 */
+	if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) {
+		xfs_off_t	aligned_offset;
+		unsigned int	iosize;
+		xfs_fileoff_t	ioalign;
+
+		iosize = mp->m_writeio_blocks;
+		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
+		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
+		last_fsb = ioalign + iosize;
+		aeof = 1;
+	}
+
+	nimaps = XFS_WRITE_IMAPS;
+	firstblock = NULLFSBLOCK;
+
+	/*
+	 * roundup the allocation request to m_dalign boundary if file size
+	 * is greater that 512K and we are allocating past the allocation eof
+	 */
+	if (mp->m_dalign && (isize >= mp->m_dalign) && aeof) {
+		int eof;
+		xfs_fileoff_t new_last_fsb;
+		new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
+		error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
+		if (error) {
+			return error;
+		}
+		if (eof) {
+			last_fsb = new_last_fsb;
+		}
+	}
+
+	error = xfs_bmapi(NULL, ip, offset_fsb,
+			  (xfs_filblks_t)(last_fsb - offset_fsb),
+			  XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
+			  XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
+			  &nimaps, NULL);
+	/*
+	 * This can be EDQUOT, if nimaps == 0
+	 */
+	if (error && (error != ENOSPC)) {
+		return XFS_ERROR(error);
+	}
+	/*
+	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
+	 * then we must have run out of space.
+	 */
+
+	if (nimaps == 0) {
+		if (xfs_flush_space(ip, &fsynced, &ioflag))
+			return XFS_ERROR(ENOSPC);
+
+		error = 0;
+		goto retry;
+	}
+
+	*ret_imap = imap[0];
+	*nmaps = 1;
+	return 0;
+}
+
+/*
+ * Pass in a delayed allocate extent, convert it to real extents;
+ * return to the caller the extent we create which maps on top of
+ * the originating callers request.
+ *
+ * Called without a lock on the inode.
+ */
+int
+xfs_iomap_write_allocate(
+	xfs_inode_t	*ip,
+	xfs_bmbt_irec_t *map,
+	int		*retmap)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_fileoff_t	offset_fsb, last_block;
+	xfs_fileoff_t	end_fsb, map_start_fsb;
+	xfs_fsblock_t	first_block;
+	xfs_bmap_free_t	free_list;
+	xfs_filblks_t	count_fsb;
+	xfs_bmbt_irec_t	imap[XFS_STRAT_WRITE_IMAPS];
+	xfs_trans_t	*tp;
+	int		i, nimaps, committed;
+	int		error = 0;
+	int		nres;
+
+	*retmap = 0;
+
+	/*
+	 * Make sure that the dquots are there.
+	 */
+
+	if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+		return XFS_ERROR(error);
+
+	offset_fsb = map->br_startoff;
+	count_fsb = map->br_blockcount;
+	map_start_fsb = offset_fsb;
+
+	XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
+
+	while (count_fsb != 0) {
+		/*
+		 * Set up a transaction with which to allocate the
+		 * backing store for the file.  Do allocations in a
+		 * loop until we get some space in the range we are
+		 * interested in.  The other space that might be allocated
+		 * is in the delayed allocation extent on which we sit
+		 * but before our buffer starts.
+		 */
+
+		nimaps = 0;
+		while (nimaps == 0) {
+			tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
+			error = xfs_trans_reserve(tp, nres,
+					XFS_WRITE_LOG_RES(mp),
+					0, XFS_TRANS_PERM_LOG_RES,
+					XFS_WRITE_LOG_COUNT);
+
+			if (error == ENOSPC) {
+				error = xfs_trans_reserve(tp, 0,
+						XFS_WRITE_LOG_RES(mp),
+						0,
+						XFS_TRANS_PERM_LOG_RES,
+						XFS_WRITE_LOG_COUNT);
+			}
+			if (error) {
+				xfs_trans_cancel(tp, 0);
+				return XFS_ERROR(error);
+			}
+			xfs_ilock(ip, XFS_ILOCK_EXCL);
+			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+			xfs_trans_ihold(tp, ip);
+
+			XFS_BMAP_INIT(&free_list, &first_block);
+
+			nimaps = XFS_STRAT_WRITE_IMAPS;
+			/*
+			 * Ensure we don't go beyond eof - it is possible
+			 * the extents changed since we did the read call,
+			 * we dropped the ilock in the interim.
+			 */
+
+			end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size);
+			xfs_bmap_last_offset(NULL, ip, &last_block,
+				XFS_DATA_FORK);
+			last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
+			if ((map_start_fsb + count_fsb) > last_block) {
+				count_fsb = last_block - map_start_fsb;
+				if (count_fsb == 0) {
+					error = EAGAIN;
+					goto trans_cancel;
+				}
+			}
+
+			/* Go get the actual blocks */
+			error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
+					XFS_BMAPI_WRITE, &first_block, 1,
+					imap, &nimaps, &free_list);
+
+			if (error)
+				goto trans_cancel;
+
+			error = xfs_bmap_finish(&tp, &free_list,
+					first_block, &committed);
+
+			if (error)
+				goto trans_cancel;
+
+			error = xfs_trans_commit(tp,
+					XFS_TRANS_RELEASE_LOG_RES, NULL);
+
+			if (error)
+				goto error0;
+
+			xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		}
+
+		/*
+		 * See if we were able to allocate an extent that
+		 * covers at least part of the callers request
+		 */
+
+		for (i = 0; i < nimaps; i++) {
+			if ((map->br_startoff >= imap[i].br_startoff) &&
+			    (map->br_startoff < (imap[i].br_startoff +
+						 imap[i].br_blockcount))) {
+				*map = imap[i];
+				*retmap = 1;
+				XFS_STATS_INC(xs_xstrat_quick);
+				return 0;
+			}
+			count_fsb -= imap[i].br_blockcount;
+		}
+
+		/* So far we have not mapped the requested part of the
+		 * file, just surrounding data, try again.
+		 */
+		nimaps--;
+		offset_fsb = imap[nimaps].br_startoff +
+			     imap[nimaps].br_blockcount;
+		map_start_fsb = offset_fsb;
+	}
+
+trans_cancel:
+	xfs_bmap_cancel(&free_list);
+	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+error0:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return XFS_ERROR(error);
+}
+
+int
+xfs_iomap_write_unwritten(
+	xfs_inode_t	*ip,
+	loff_t		offset,
+	size_t		count)
+{
+	xfs_mount_t	*mp = ip->i_mount;
+	xfs_trans_t	*tp;
+	xfs_fileoff_t	offset_fsb;
+	xfs_filblks_t	count_fsb;
+	xfs_filblks_t	numblks_fsb;
+	xfs_bmbt_irec_t	imap;
+	int		committed;
+	int		error;
+	int		nres;
+	int		nimaps;
+	xfs_fsblock_t	firstfsb;
+	xfs_bmap_free_t	free_list;
+
+	offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	count_fsb = XFS_B_TO_FSB(mp, count);
+
+	do {
+		nres = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+
+		/*
+		 * set up a transaction to convert the range of extents
+		 * from unwritten to real. Do allocations in a loop until
+		 * we have covered the range passed in.
+		 */
+
+		tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
+		error = xfs_trans_reserve(tp, nres,
+				XFS_WRITE_LOG_RES(mp), 0,
+				XFS_TRANS_PERM_LOG_RES,
+				XFS_WRITE_LOG_COUNT);
+		if (error) {
+			xfs_trans_cancel(tp, 0);
+			goto error0;
+		}
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+		xfs_trans_ihold(tp, ip);
+
+		/*
+		 * Modify the unwritten extent state of the buffer.
+		 */
+		XFS_BMAP_INIT(&free_list, &firstfsb);
+		nimaps = 1;
+		error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
+				  XFS_BMAPI_WRITE, &firstfsb,
+				  1, &imap, &nimaps, &free_list);
+		if (error)
+			goto error_on_bmapi_transaction;
+
+		error = xfs_bmap_finish(&(tp), &(free_list),
+				firstfsb, &committed);
+		if (error)
+			goto error_on_bmapi_transaction;
+
+		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (error)
+			goto error0;
+
+		if ((numblks_fsb = imap.br_blockcount) == 0) {
+			/*
+			 * The numblks_fsb value should always get
+			 * smaller, otherwise the loop is stuck.
+			 */
+			ASSERT(imap.br_blockcount);
+			break;
+		}
+		offset_fsb += numblks_fsb;
+		count_fsb -= numblks_fsb;
+	} while (count_fsb > 0);
+
+	return 0;
+
+error_on_bmapi_transaction:
+	xfs_bmap_cancel(&free_list);
+	xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+error0:
+	return XFS_ERROR(error);
+}
diff -Nru a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
--- a/fs/xfs/xfs_log_recover.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_log_recover.c	Thu Jan 29 23:15:58 2004
@@ -1531,7 +1531,7 @@
 	xlog_recover_item_t	*first_item, *itemq, *itemq_next;
 	xfs_buf_log_format_t	*buf_f;
 	xfs_buf_log_format_v1_t	*obuf_f;
-	ushort			flags;
+	ushort			flags = 0;
 
 	first_item = itemq = trans->r_itemq;
 	trans->r_itemq = NULL;
diff -Nru a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
--- a/fs/xfs/xfs_types.h	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_types.h	Thu Jan 29 23:15:58 2004
@@ -75,24 +75,6 @@
 #error BITS_PER_LONG must be 32 or 64
 #endif
 
-/*
- * Some types are conditional depending on the target system.
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well
- * as requiring XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBD) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS	1
-# if BITS_PER_LONG == 64
-#  define XFS_BIG_INUMS	1
-# else
-#  define XFS_BIG_INUMS	0
-# endif
-#else
-# define XFS_BIG_BLKNOS	0
-# define XFS_BIG_INUMS	0
-#endif
-
 #endif	/* __KERNEL__ */
 
 typedef __uint32_t	xfs_agblock_t;	/* blockno in alloc. group */
diff -Nru a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
--- a/fs/xfs/xfs_vfsops.c	Thu Jan 29 23:15:58 2004
+++ b/fs/xfs/xfs_vfsops.c	Thu Jan 29 23:15:58 2004
@@ -171,6 +171,25 @@
 	xfs_sysctl_unregister();
 	xfs_refcache_destroy();
 
+#ifdef XFS_DIR2_TRACE
+	ktrace_free(xfs_dir2_trace_buf);
+#endif
+#ifdef XFS_ATTR_TRACE
+	ktrace_free(xfs_attr_trace_buf);
+#endif
+#ifdef XFS_DIR_TRACE
+	ktrace_free(xfs_dir_trace_buf);
+#endif
+#ifdef XFS_BMBT_TRACE
+	ktrace_free(xfs_bmbt_trace_buf);
+#endif
+#ifdef XFS_BMAP_TRACE
+	ktrace_free(xfs_bmap_trace_buf);
+#endif
+#ifdef XFS_ALLOC_TRACE
+	ktrace_free(xfs_alloc_trace_buf);
+#endif
+
 	kmem_cache_destroy(xfs_bmap_free_item_zone);
 	kmem_cache_destroy(xfs_btree_cur_zone);
 	kmem_cache_destroy(xfs_inode_zone);