bk://linux-ntfs.bkbits.net/ntfs-2.6-devel aia21@cantab.net|ChangeSet|20040528152423|29302 aia21 # This is a BitKeeper generated diff -Nru style patch. # # ChangeSet # 2004/05/28 16:24:23+01:00 aia21@cantab.net # NTFS: Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the # ntfs super operations. This gives us inode writing via the VFS inode # dirty code paths. Note: Access time updates are not implemented yet. # # Signed-off-by: Anton Altaparmakov # # fs/ntfs/super.c # 2004/05/28 16:24:17+01:00 aia21@cantab.net +2 -2 # Set ntfs_write_inode() to be our sops->write_inode. # # fs/ntfs/inode.c # 2004/05/28 16:24:17+01:00 aia21@cantab.net +101 -16 # Implement ntfs_write_inode(). # # fs/ntfs/ChangeLog # 2004/05/28 16:24:17+01:00 aia21@cantab.net +7 -1 # Update. # # ChangeSet # 2004/05/28 12:38:37+01:00 aia21@cantab.net # NTFS: Commit open system inodes at umount time. This should make it # virtually impossible for sync_mft_mirror_umount() to ever be needed. # # Signed-off-by: Anton Altaparmakov # # fs/ntfs/super.c # 2004/05/28 12:38:31+01:00 aia21@cantab.net +35 -0 # Commit open system inodes at umount time. # # fs/ntfs/ChangeLog # 2004/05/28 12:38:31+01:00 aia21@cantab.net +2 -0 # Update. # # ChangeSet # 2004/05/28 12:29:35+01:00 aia21@cantab.net # NTFS: Implement writing of mft records (fs/ntfs/mft.[hc]), which includes # keeping the mft mirror in sync with the mft when mirrored mft records # are written. The functions are write_mft_record{,_nolock}(). The # implementation is quite rudimentary for now with lots of things not # implemented yet but I am not sure any of them can actually occur so # I will wait for people to hit each one and only then implement it. # # Signed-off-by: Anton Altaparmakov # # fs/ntfs/mft.h # 2004/05/28 12:29:29+01:00 aia21@cantab.net +35 -0 # Add write_mft_record{,_nolock}(). # # fs/ntfs/mft.c # 2004/05/28 12:29:29+01:00 aia21@cantab.net +387 -0 # Add write_mft_record{,_nolock}(). # # fs/ntfs/compress.c # 2004/05/28 12:29:29+01:00 aia21@cantab.net +2 -2 # Error messages typo fixes. # # fs/ntfs/attrib.c # 2004/05/28 12:29:29+01:00 aia21@cantab.net +2 -2 # Debug and error messages typo fixes. # # fs/ntfs/aops.c # 2004/05/28 12:29:29+01:00 aia21@cantab.net +1 -1 # Debug message typo fix. # # fs/ntfs/Makefile # 2004/05/28 12:29:29+01:00 aia21@cantab.net +1 -1 # Update. # # fs/ntfs/ChangeLog # 2004/05/28 12:29:29+01:00 aia21@cantab.net +15 -0 # Update. # # ChangeSet # 2004/05/26 16:32:13+01:00 aia21@cantab.net # NTFS: 2.1.12 release - Fix the second fix to the decompression engine. # # fs/ntfs/super.c # 2004/05/26 16:32:04+01:00 aia21@cantab.net +1 -1 # Fix typo in error message. # # fs/ntfs/compress.c # 2004/05/26 16:32:04+01:00 aia21@cantab.net +6 -14 # Undo the second decompression engine fix (see 2.1.9 release ChangeLog # entry) as it was only fixing a theoretical bug but at the same time # it badly broke the handling of sparse and uncompressed compression # blocks. # # fs/ntfs/Makefile # 2004/05/26 16:32:04+01:00 aia21@cantab.net +1 -1 # Update for 2.1.12 release. # # fs/ntfs/ChangeLog # 2004/05/26 16:32:04+01:00 aia21@cantab.net +5 -1 # Update # # Documentation/filesystems/ntfs.txt # 2004/05/26 16:32:03+01:00 aia21@cantab.net +3 -0 # Update # # ChangeSet # 2004/05/25 16:31:28+01:00 aia21@cantab.net # NTFS: Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also # includes an adapted ntfs_commit_inode() and an implementation of # ntfs_write_inode() which for now just cleans dirty inodes without # writing them (it does emit a warning that this is happening). # # fs/ntfs/inode.h # 2004/05/25 16:31:22+01:00 aia21@cantab.net +9 -0 # Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also # includes an adapted ntfs_commit_inode() and an implementation of # ntfs_write_inode() which for now just cleans dirty inodes without # writing them (it does emit a warning that this is happening). # # fs/ntfs/inode.c # 2004/05/25 16:31:22+01:00 aia21@cantab.net +84 -41 # Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also # includes an adapted ntfs_commit_inode() and an implementation of # ntfs_write_inode() which for now just cleans dirty inodes without # writing them (it does emit a warning that this is happening). # # fs/ntfs/dir.c # 2004/05/25 16:31:22+01:00 aia21@cantab.net +1 -1 # Change a rogue a debug message to print inode number in hex, not decimal. # # fs/ntfs/ChangeLog # 2004/05/25 16:31:21+01:00 aia21@cantab.net +4 -0 # Update. # # fs/ntfs/super.c # 2004/05/20 14:52:43+01:00 aia21@cantab.net +1 -1 # Improve comment. # # fs/ntfs/inode.c # 2004/05/20 14:52:43+01:00 aia21@cantab.net +1 -1 # Improve comment. # # ChangeSet # 2004/05/17 22:45:04+01:00 aia21@cantab.net # NTFS: Add a new address space operations struct, ntfs_mst_aops, for mst # protected attributes. This is because the default ntfs_aops do not # make sense with mst protected data and were they to write anything to # such an attribute they would cause data corruption so we provide # ntfs_mst_aops which does not have any write related operations set. # # fs/ntfs/ntfs.h # 2004/05/17 22:42:28+01:00 aia21@cantab.net +1 -0 # Add a new address space operations struct, ntfs_mst_aops, for mst # protected attributes. # # fs/ntfs/inode.c # 2004/05/17 22:42:15+01:00 aia21@cantab.net +5 -2 # Add a new address space operations struct, ntfs_mst_aops, for mst # protected attributes. # # fs/ntfs/aops.c # 2004/05/17 22:41:44+01:00 aia21@cantab.net +9 -0 # Add a new address space operations struct, ntfs_mst_aops, for mst # protected attributes. # # fs/ntfs/Makefile # 2004/05/17 22:41:38+01:00 aia21@cantab.net +1 -1 # Update # # fs/ntfs/ChangeLog # 2004/05/17 22:41:31+01:00 aia21@cantab.net +8 -0 # Update # diff -Nru a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt --- a/Documentation/filesystems/ntfs.txt 2004-05-31 16:20:49 -07:00 +++ b/Documentation/filesystems/ntfs.txt 2004-05-31 16:20:49 -07:00 @@ -273,6 +273,9 @@ Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. +2.1.12: + - Fix the second fix to the decompression engine from the 2.1.9 release + and some further internals cleanups. 2.1.11: - Driver internal cleanups. 2.1.10: diff -Nru a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog --- a/fs/ntfs/ChangeLog 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/ChangeLog 2004-05-31 16:20:49 -07:00 @@ -11,7 +11,10 @@ pages as nothing can dirty a page other than ourselves. Should this change, we will really need to roll our own ->set_page_dirty(). - Implement sops->dirty_inode() to implement {a,m,c}time updates and - such things. + such things. This should probably just flag the ntfs inode such that + sops->write_inode(), i.e. ntfs_write_inode(), will copy the times + when it is invoked rather than having to update the mft record + every time. - Implement sops->write_inode(). - In between ntfs_prepare/commit_write, need exclusion between simultaneous file extensions. Need perhaps an NInoResizeUnderway() @@ -24,6 +27,42 @@ OTOH, perhaps i_sem, which is held accross generic_file_write is sufficient for synchronisation here. We then just need to make sure ntfs_readpage/writepage/truncate interoperate properly with us. + - Implement mft.c::sync_mft_mirror_umount(). We currently will just + leave the volume dirty on umount if the final iput(vol->mft_ino) + causes a write of any mirrored mft records due to the mft mirror + inode having been discarded already. Whether this can actually ever + happen is unclear however so it is worth waiting until someone hits + the problem. + +2.1.13 - WIP. + + - Implement writing of mft records (fs/ntfs/mft.[hc]), which includes + keeping the mft mirror in sync with the mft when mirrored mft records + are written. The functions are write_mft_record{,_nolock}(). The + implementation is quite rudimentary for now with lots of things not + implemented yet but I am not sure any of them can actually occur so + I will wait for people to hit each one and only then implement it. + - Commit open system inodes at umount time. This should make it + virtually impossible for sync_mft_mirror_umount() to ever be needed. + - Implement ->write_inode (fs/ntfs/inode.c::ntfs_write_inode()) for the + ntfs super operations. This gives us inode writing via the VFS inode + dirty code paths. Note: Access time updates are not implemented yet. + +2.1.12 - Fix the second fix to the decompression engine and some cleanups. + + - Add a new address space operations struct, ntfs_mst_aops, for mst + protected attributes. This is because the default ntfs_aops do not + make sense with mst protected data and were they to write anything to + such an attribute they would cause data corruption so we provide + ntfs_mst_aops which does not have any write related operations set. + - Cleanup dirty ntfs inode handling (fs/ntfs/inode.[hc]) which also + includes an adapted ntfs_commit_inode() and an implementation of + ntfs_write_inode() which for now just cleans dirty inodes without + writing them (it does emit a warning that this is happening). + - Undo the second decompression engine fix (see 2.1.9 release ChangeLog + entry) as it was only fixing a theoretical bug but at the same time + it badly broke the handling of sparse and uncompressed compression + blocks. 2.1.11 - Driver internal cleanups. diff -Nru a/fs/ntfs/Makefile b/fs/ntfs/Makefile --- a/fs/ntfs/Makefile 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/Makefile 2004-05-31 16:20:49 -07:00 @@ -5,7 +5,7 @@ ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \ mst.o namei.o super.o sysctl.o unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.11\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.13-WIP\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff -Nru a/fs/ntfs/aops.c b/fs/ntfs/aops.c --- a/fs/ntfs/aops.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/aops.c 2004-05-31 16:20:49 -07:00 @@ -479,7 +479,7 @@ vol = ni->vol; ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " - "0x%lx.\n", vi->i_ino, ni->type, page->index); + "0x%lx.", vi->i_ino, ni->type, page->index); BUG_ON(!NInoNonResident(ni)); BUG_ON(NInoMstProtected(ni)); @@ -1788,3 +1788,12 @@ #endif }; +/** + * ntfs_mst_aops - general address space operations for mst protecteed inodes + * and attributes + */ +struct address_space_operations ntfs_mst_aops = { + .readpage = ntfs_readpage, /* Fill page with data. */ + .sync_page = block_sync_page, /* Currently, just unplugs the + disk request queue. */ +}; diff -Nru a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c --- a/fs/ntfs/attrib.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/attrib.c 2004-05-31 16:20:49 -07:00 @@ -624,7 +624,7 @@ if (drl[ds].vcn == marker_vcn) { ntfs_debug("Old marker = 0x%llx, replacing " - "with LCN_ENOENT.\n", + "with LCN_ENOENT.", (unsigned long long) drl[ds].lcn); drl[ds].lcn = (LCN)LCN_ENOENT; @@ -1565,7 +1565,7 @@ goto do_next_attr_loop; } ntfs_error(base_ni->vol->sb, "Inode contains corrupt attribute list " - "attribute.\n"); + "attribute."); if (ni != base_ni) { unmap_extent_mft_record(ni); ctx->ntfs_ino = base_ni; diff -Nru a/fs/ntfs/compress.c b/fs/ntfs/compress.c --- a/fs/ntfs/compress.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/compress.c 2004-05-31 16:20:49 -07:00 @@ -433,7 +433,7 @@ goto do_next_tag; return_overflow: - ntfs_error(NULL, "Failed. Returning -EOVERFLOW.\n"); + ntfs_error(NULL, "Failed. Returning -EOVERFLOW."); goto return_error; } @@ -507,7 +507,7 @@ */ unsigned int nr_pages = (end_vcn - start_vcn) << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; - unsigned int xpage, max_page, max_ofs, cur_page, cur_ofs, i; + unsigned int xpage, max_page, cur_page, cur_ofs, i; unsigned int cb_clusters, cb_max_ofs; int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; struct page **pages; @@ -550,11 +550,8 @@ */ max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - offset; - max_ofs = (VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) & ~PAGE_CACHE_MASK; - if (nr_pages < max_page) { + if (nr_pages < max_page) max_page = nr_pages; - max_ofs = 0; - } for (i = 0; i < max_page; i++, offset++) { if (i != xpage) pages[i] = grab_cache_page_nowait(mapping, offset); @@ -722,14 +719,8 @@ cb_max_page >>= PAGE_CACHE_SHIFT; /* Catch end of file inside a compression block. */ - if (cb_max_page >= max_page) { - if (cb_max_page > max_page) { - cb_max_page = max_page; - cb_max_ofs = max_ofs; - } else if (cb_max_ofs > max_ofs) { - cb_max_ofs = max_ofs; - } - } + if (cb_max_page > max_page) + cb_max_page = max_page; if (vcn == start_vcn - cb_clusters) { /* Sparse cb, zero out page range overlapping the cb. */ @@ -860,7 +851,7 @@ if (err) { ntfs_error(vol->sb, "ntfs_decompress() failed in inode " "0x%lx with error code %i. Skipping " - "this compression block.\n", + "this compression block.", ni->mft_no, -err); /* Release the unfinished pages. */ for (; prev_cur_page < cur_page; prev_cur_page++) { @@ -897,7 +888,8 @@ if (page) { ntfs_error(vol->sb, "Still have pages left! " "Terminating them with extreme " - "prejudice."); + "prejudice. Inode 0x%lx, page index " + "0x%lx.", ni->mft_no, page->index); if (cur_page == xpage && !xpage_done) SetPageError(page); flush_dcache_page(page); diff -Nru a/fs/ntfs/dir.c b/fs/ntfs/dir.c --- a/fs/ntfs/dir.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/dir.c 2004-05-31 16:20:49 -07:00 @@ -1196,7 +1196,7 @@ ia_mapping = vdir->i_mapping; bmp_vi = ndir->itype.index.bmp_ino; if (unlikely(!bmp_vi)) { - ntfs_debug("Inode %lu, regetting index bitmap.", vdir->i_ino); + ntfs_debug("Inode 0x%lx, regetting index bitmap.", vdir->i_ino); bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); if (unlikely(IS_ERR(bmp_vi))) { ntfs_error(sb, "Failed to get bitmap attribute."); diff -Nru a/fs/ntfs/inode.c b/fs/ntfs/inode.c --- a/fs/ntfs/inode.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/inode.c 2004-05-31 16:20:49 -07:00 @@ -872,7 +872,7 @@ /* Setup the operations for this inode. */ vi->i_op = &ntfs_dir_inode_ops; vi->i_fop = &ntfs_dir_ops; - vi->i_mapping->a_ops = &ntfs_aops; + vi->i_mapping->a_ops = &ntfs_mst_aops; } else { /* It is a file. */ reinit_attr_search_ctx(ctx); @@ -1249,7 +1249,10 @@ /* Setup the operations for this attribute inode. */ vi->i_op = NULL; vi->i_fop = NULL; - vi->i_mapping->a_ops = &ntfs_aops; + if (NInoMstProtected(ni)) + vi->i_mapping->a_ops = &ntfs_mst_aops; + else + vi->i_mapping->a_ops = &ntfs_aops; if (!NInoCompressed(ni)) vi->i_blocks = ni->allocated_size >> 9; @@ -1339,7 +1342,7 @@ ni->name_len = 0; /* - * This sets up our little cheat allowing us to reuse the async io + * This sets up our little cheat allowing us to reuse the async read io * completion handler for directories. */ ni->itype.index.block_size = vol->mft_record_size; @@ -1703,18 +1706,6 @@ } /** - * ntfs_commit_inode - write out a dirty inode - * @ni: inode to write out - * - */ -int ntfs_commit_inode(ntfs_inode *ni) -{ - ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); - NInoClearDirty(ni); - return 0; -} - -/** * ntfs_put_inode - handler for when the inode reference count is decremented * @vi: vfs inode * @@ -1742,34 +1733,6 @@ void __ntfs_clear_inode(ntfs_inode *ni) { - int err; - - ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); - if (NInoDirty(ni)) { - err = ntfs_commit_inode(ni); - if (err) { - ntfs_error(ni->vol->sb, "Failed to commit dirty " - "inode synchronously."); - // FIXME: Do something!!! - } - } - /* Synchronize with ntfs_commit_inode(). */ - down(&ni->mrec_lock); - up(&ni->mrec_lock); - if (NInoDirty(ni)) { - ntfs_error(ni->vol->sb, "Failed to commit dirty inode " - "asynchronously."); - // FIXME: Do something!!! - } - /* No need to lock at this stage as no one else has a reference. */ - if (ni->nr_extents > 0) { - int i; - - // FIXME: Handle dirty case for each extent inode! - for (i = 0; i < ni->nr_extents; i++) - ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]); - kfree(ni->ext.extent_ntfs_inos); - } /* Free all alocated memory. */ down_write(&ni->run_list.lock); if (ni->run_list.rl) { @@ -1799,6 +1762,20 @@ void ntfs_clear_extent_inode(ntfs_inode *ni) { + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + + BUG_ON(NInoAttr(ni)); + BUG_ON(ni->nr_extents != -1); + +#ifdef NTFS_RW + if (NInoDirty(ni)) { + if (!is_bad_inode(VFS_I(ni->ext.base_ntfs_ino))) + ntfs_error(ni->vol->sb, "Clearing dirty extent inode! " + "Losing data! This is a BUG!!!"); + // FIXME: Do something!!! + } +#endif /* NTFS_RW */ + __ntfs_clear_inode(ni); /* Bye, bye... */ @@ -1819,6 +1796,30 @@ { ntfs_inode *ni = NTFS_I(vi); +#ifdef NTFS_RW + if (NInoDirty(ni)) { + BOOL was_bad = (is_bad_inode(vi)); + + /* Committing the inode also commits all extent inodes. */ + ntfs_commit_inode(vi); + + if (!was_bad && (is_bad_inode(vi) || NInoDirty(ni))) { + ntfs_error(vi->i_sb, "Failed to commit dirty inode " + "0x%lx. Losing data!", vi->i_ino); + // FIXME: Do something!!! + } + } +#endif /* NTFS_RW */ + + /* No need to lock at this stage as no one else has a reference. */ + if (ni->nr_extents > 0) { + int i; + + for (i = 0; i < ni->nr_extents; i++) + ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]); + kfree(ni->ext.extent_ntfs_inos); + } + __ntfs_clear_inode(ni); if (NInoAttr(ni)) { @@ -1959,4 +1960,134 @@ return err; } +/** + * ntfs_write_inode - write out a dirty inode + * @vi: inode to write out + * @sync: if true, write out synchronously + * + * Write out a dirty inode to disk including any extent inodes if present. + * + * If @sync is true, commit the inode to disk and wait for io completion. This + * is done using write_mft_record(). + * + * If @sync is false, just schedule the write to happen but do not wait for i/o + * completion. In 2.6 kernels, scheduling usually happens just by virtue of + * marking the page (and in this case mft record) dirty but we do not implement + * this yet as write_mft_record() largely ignores the @sync parameter and + * always performs synchronous writes. + */ +void ntfs_write_inode(struct inode *vi, int sync) +{ + ntfs_inode *ni = NTFS_I(vi); +#if 0 + attr_search_context *ctx; #endif + MFT_RECORD *m; + int err = 0; + + ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "", + vi->i_ino); + /* + * Dirty attribute inodes are written via their real inodes so just + * clean them here. TODO: Take care of access time updates. + */ + if (NInoAttr(ni)) { + NInoClearDirty(ni); + return; + } + /* Map, pin, and lock the mft record belonging to the inode. */ + m = map_mft_record(ni); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + goto err_out; + } +#if 0 + /* Obtain the standard information attribute. */ + ctx = get_attr_search_ctx(ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto unm_err_out; + } + if (unlikely(!lookup_attr(AT_STANDARD_INFORMATION, NULL, 0, + IGNORE_CASE, 0, NULL, 0, ctx))) { + put_attr_search_ctx(ctx); + err = -ENOENT; + goto unm_err_out; + } + // TODO: Update the access times in the standard information attribute + // which is now in ctx->attr. + // - Probably want to have use sops->dirty_inode() to set a flag that + // we need to update the times here rather than having to blindly do + // it every time. Or even don't do it here at all and do it in + // sops->dirty_inode() instead. Problem with this would be that + // sops->dirty_inode() must be atomic under certain circumstances + // and mapping mft records and such like is not atomic. + // - For atime updates also need to check whether they are enabled in + // the superblock flags. + ntfs_warning(vi->i_sb, "Access time updates not implement yet."); + /* + * We just modified the mft record containing the standard information + * attribute. So need to mark the mft record dirty, too, but we do it + * manually so that mark_inode_dirty() is not called again. + * TODO: Only do this if there was a change in any of the times! + */ + if (!NInoTestSetDirty(ctx->ntfs_ino)) + __set_page_dirty_nobuffers(ctx->ntfs_ino->page); + put_attr_search_ctx(ctx); +#endif + /* Write this base mft record. */ + if (NInoDirty(ni)) + err = write_mft_record(ni, m, sync); + /* Write all attached extent mft records. */ + down(&ni->extent_lock); + if (ni->nr_extents > 0) { + ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos; + int i; + + ntfs_debug("Writing %i extent inodes.", ni->nr_extents); + for (i = 0; i < ni->nr_extents; i++) { + ntfs_inode *tni = extent_nis[i]; + + if (NInoDirty(tni)) { + MFT_RECORD *tm = map_mft_record(tni); + int ret; + + if (unlikely(IS_ERR(tm))) { + if (!err || err == -ENOMEM) + err = PTR_ERR(tm); + continue; + } + ret = write_mft_record(tni, tm, sync); + unmap_mft_record(tni); + if (unlikely(ret)) { + if (!err || err == -ENOMEM) + err = ret; + } + } + } + } + up(&ni->extent_lock); + unmap_mft_record(ni); + if (unlikely(err)) + goto err_out; + ntfs_debug("Done."); + return; +#if 0 +unm_err_out: + unmap_mft_record(ni); +#endif +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Not enough memory to write inode. " + "Marking the inode dirty again, so the VFS " + "retries later."); + mark_inode_dirty(vi); + } else { + ntfs_error(vi->i_sb, "Failed (error code %i): Marking inode " + "as bad. You should run chkdsk.", -err); + make_bad_inode(vi); + } + return; +} + +#endif /* NTFS_RW */ diff -Nru a/fs/ntfs/inode.h b/fs/ntfs/inode.h --- a/fs/ntfs/inode.h 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/inode.h 2004-05-31 16:20:49 -07:00 @@ -281,6 +281,15 @@ extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); +extern void ntfs_write_inode(struct inode *vi, int sync); + +static inline void ntfs_commit_inode(struct inode *vi) +{ + if (!is_bad_inode(vi)) + ntfs_write_inode(vi, 1); + return; +} + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_INODE_H */ diff -Nru a/fs/ntfs/mft.c b/fs/ntfs/mft.c --- a/fs/ntfs/mft.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/mft.c 2004-05-31 16:20:49 -07:00 @@ -429,3 +429,390 @@ ntfs_clear_extent_inode(ni); return m; } + +#ifdef NTFS_RW + +static const char *ntfs_please_email = "Please email " + "linux-ntfs-dev@lists.sourceforge.net and say that you saw " + "this message. Thank you."; + +/** + * sync_mft_mirror_umount - synchronise an mft record to the mft mirror + * @ni: ntfs inode whose mft record to synchronize + * @m: mapped, mst protected (extent) mft record to synchronize + * + * Write the mapped, mst protected (extent) mft record @m described by the + * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr) bypassing + * the page cache and the $MFTMirr inode itself. + * + * This function is only for use at umount time when the mft mirror inode has + * already been disposed off. We BUG() if we are called while the mft mirror + * inode is still attached to the volume. + * + * On success return 0. On error return -errno. + * + * NOTE: This function is not implemented yet as I am not convinced it can + * actually be triggered considering the sequence of commits we do in super.c:: + * ntfs_put_super(). But just in case we provide this place holder as the + * alternative would be either to BUG() or to get a NULL pointer dereference + * and Oops. + */ +static int sync_mft_mirror_umount(ntfs_inode *ni, MFT_RECORD *m) +{ + ntfs_volume *vol = ni->vol; + + BUG_ON(vol->mftmirr_ino); + ntfs_error(vol->sb, "Umount time mft mirror syncing is not " + "implemented yet. %s", ntfs_please_email); + return -EOPNOTSUPP; +} + +/** + * sync_mft_mirror - synchronize an mft record to the mft mirror + * @ni: ntfs inode whose mft record to synchronize + * @m: mapped, mst protected (extent) mft record to synchronize + * @sync: if true, wait for i/o completion + * + * Write the mapped, mst protected (extent) mft record @m described by the + * (regular or extent) ntfs inode @ni to the mft mirror ($MFTMirr). + * + * On success return 0. On error return -errno and set the volume errors flag + * in the ntfs_volume to which @ni belongs. + * + * NOTE: We always perform synchronous i/o and ignore the @sync parameter. + * + * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just + * schedule i/o via ->writepage or do it via kntfsd or whatever. + */ +static int sync_mft_mirror(ntfs_inode *ni, MFT_RECORD *m, int sync) +{ + ntfs_volume *vol = ni->vol; + struct page *page; + unsigned int blocksize = vol->sb->s_blocksize; + int max_bhs = vol->mft_record_size / blocksize; + struct buffer_head *bhs[max_bhs]; + struct buffer_head *bh, *head; + u8 *kmirr; + unsigned int block_start, block_end, m_start, m_end; + int i_bhs, nr_bhs, err = 0; + + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + BUG_ON(!max_bhs); + if (unlikely(!vol->mftmirr_ino)) { + /* This could happen during umount... */ + err = sync_mft_mirror_umount(ni, m); + if (likely(!err)) + return err; + goto err_out; + } + /* Get the page containing the mirror copy of the mft record @m. */ + page = ntfs_map_page(vol->mftmirr_ino->i_mapping, ni->mft_no >> + (PAGE_CACHE_SHIFT - vol->mft_record_size_bits)); + if (unlikely(IS_ERR(page))) { + ntfs_error(vol->sb, "Failed to map mft mirror page."); + err = PTR_ERR(page); + goto err_out; + } + /* + * Exclusion against other writers. This should never be a problem + * since the page in which the mft record @m resides is also locked and + * hence any other writers would be held up there but it is better to + * make sure no one is writing from elsewhere. + */ + lock_page(page); + /* The address in the page of the mirror copy of the mft record @m. */ + kmirr = page_address(page) + ((ni->mft_no << vol->mft_record_size_bits) + & ~PAGE_CACHE_MASK); + /* Copy the mst protected mft record to the mirror. */ + memcpy(kmirr, m, vol->mft_record_size); + /* Make sure we have mapped buffers. */ + if (!page_has_buffers(page)) { +no_buffers_err_out: + ntfs_error(vol->sb, "Writing mft mirror records without " + "existing buffers is not implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + goto unlock_err_out; + } + bh = head = page_buffers(page); + if (!bh) + goto no_buffers_err_out; + nr_bhs = 0; + block_start = 0; + m_start = kmirr - (u8*)page_address(page); + m_end = m_start + vol->mft_record_size; + do { + block_end = block_start + blocksize; + /* + * If the buffer is outside the mft record, just skip it, + * clearing it if it is dirty to make sure it is not written + * out. It should never be marked dirty but better be safe. + */ + if ((block_end <= m_start) || (block_start >= m_end)) { + if (buffer_dirty(bh)) { + ntfs_warning(vol->sb, "Clearing dirty mft " + "record page buffer. %s", + ntfs_please_email); + clear_buffer_dirty(bh); + } + continue; + } + if (!buffer_mapped(bh)) { + ntfs_error(vol->sb, "Writing mft mirror records " + "without existing mapped buffers is " + "not implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + continue; + } + if (!buffer_uptodate(bh)) { + ntfs_error(vol->sb, "Writing mft mirror records " + "without existing uptodate buffers is " + "not implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + continue; + } + BUG_ON(!nr_bhs && (m_start != block_start)); + BUG_ON(nr_bhs >= max_bhs); + bhs[nr_bhs++] = bh; + BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); + } while (block_start = block_end, (bh = bh->b_this_page) != head); + if (likely(!err)) { + /* Lock buffers and start synchronous write i/o on them. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { + struct buffer_head *tbh = bhs[i_bhs]; + + if (unlikely(test_set_buffer_locked(tbh))) + BUG(); + BUG_ON(!buffer_uptodate(tbh)); + if (buffer_dirty(tbh)) + clear_buffer_dirty(tbh); + get_bh(tbh); + tbh->b_end_io = end_buffer_write_sync; + submit_bh(WRITE, tbh); + } + /* Wait on i/o completion of buffers. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { + struct buffer_head *tbh = bhs[i_bhs]; + + wait_on_buffer(tbh); + if (unlikely(!buffer_uptodate(tbh))) { + err = -EIO; + /* + * Set the buffer uptodate so the page & buffer + * states don't become out of sync. + */ + if (PageUptodate(page)) + set_buffer_uptodate(tbh); + } + } + } else /* if (unlikely(err)) */ { + /* Clean the buffers. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) + clear_buffer_dirty(bhs[i_bhs]); + } +unlock_err_out: + /* Current state: all buffers are clean, unlocked, and uptodate. */ + /* Remove the mst protection fixups again. */ + post_write_mst_fixup((NTFS_RECORD*)kmirr); + flush_dcache_page(page); + unlock_page(page); + ntfs_unmap_page(page); + if (unlikely(err)) { + /* I/O error during writing. This is really bad! */ + ntfs_error(vol->sb, "I/O error while writing mft mirror " + "record 0x%lx! You should unmount the volume " + "and run chkdsk or ntfsfix.", ni->mft_no); + goto err_out; + } + ntfs_debug("Done."); + return 0; +err_out: + ntfs_error(vol->sb, "Failed to synchronize $MFTMirr (error code %i). " + "Volume will be left marked dirty on umount. Run " + "ntfsfix on the partition after umounting to correct " + "this.", -err); + /* We don't want to clear the dirty bit on umount. */ + NVolSetErrors(vol); + return err; +} + +/** + * write_mft_record_nolock - write out a mapped (extent) mft record + * @ni: ntfs inode describing the mapped (extent) mft record + * @m: mapped (extent) mft record to write + * @sync: if true, wait for i/o completion + * + * Write the mapped (extent) mft record @m described by the (regular or extent) + * ntfs inode @ni to backing store. If the mft record @m has a counterpart in + * the mft mirror, that is also updated. + * + * On success, clean the mft record and return 0. On error, leave the mft + * record dirty and return -errno. The caller should call make_bad_inode() on + * the base inode to ensure no more access happens to this inode. We do not do + * it here as the caller may want to finish writing other extent mft records + * first to minimize on-disk metadata inconsistencies. + * + * NOTE: We always perform synchronous i/o and ignore the @sync parameter. + * However, if the mft record has a counterpart in the mft mirror and @sync is + * true, we write the mft record, wait for i/o completion, and only then write + * the mft mirror copy. This ensures that if the system crashes either the mft + * or the mft mirror will contain a self-consistent mft record @m. If @sync is + * false on the other hand, we start i/o on both and then wait for completion + * on them. This provides a speedup but no longer guarantees that you will end + * up with a self-consistent mft record in the case of a crash but if you asked + * for asynchronous writing you probably do not care about that anyway. + * + * TODO: If @sync is false, want to do truly asynchronous i/o, i.e. just + * schedule i/o via ->writepage or do it via kntfsd or whatever. + */ +int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) +{ + ntfs_volume *vol = ni->vol; + struct page *page = ni->page; + unsigned int blocksize = vol->sb->s_blocksize; + int max_bhs = vol->mft_record_size / blocksize; + struct buffer_head *bhs[max_bhs]; + struct buffer_head *bh, *head; + unsigned int block_start, block_end, m_start, m_end; + int i_bhs, nr_bhs, err = 0; + + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + BUG_ON(NInoAttr(ni)); + BUG_ON(!max_bhs); + BUG_ON(!page); + BUG_ON(!PageLocked(page)); + /* + * If the ntfs_inode is clean no need to do anything. If it is dirty, + * mark it as clean now so that it can be redirtied later on if needed. + * There is no danger of races as as long as the caller is holding the + * locks for the mft record @m and the page it is in. + */ + if (!NInoTestClearDirty(ni)) + goto done; + /* Make sure we have mapped buffers. */ + if (!page_has_buffers(page)) { +no_buffers_err_out: + ntfs_error(vol->sb, "Writing mft records without existing " + "buffers is not implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + goto err_out; + } + bh = head = page_buffers(page); + if (!bh) + goto no_buffers_err_out; + nr_bhs = 0; + block_start = 0; + m_start = ni->page_ofs; + m_end = m_start + vol->mft_record_size; + do { + block_end = block_start + blocksize; + /* + * If the buffer is outside the mft record, just skip it, + * clearing it if it is dirty to make sure it is not written + * out. It should never be marked dirty but better be safe. + */ + if ((block_end <= m_start) || (block_start >= m_end)) { + if (buffer_dirty(bh)) { + ntfs_warning(vol->sb, "Clearing dirty mft " + "record page buffer. %s", + ntfs_please_email); + clear_buffer_dirty(bh); + } + continue; + } + if (!buffer_mapped(bh)) { + ntfs_error(vol->sb, "Writing mft records without " + "existing mapped buffers is not " + "implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + continue; + } + if (!buffer_uptodate(bh)) { + ntfs_error(vol->sb, "Writing mft records without " + "existing uptodate buffers is not " + "implemented yet. %s", + ntfs_please_email); + err = -EOPNOTSUPP; + continue; + } + BUG_ON(!nr_bhs && (m_start != block_start)); + BUG_ON(nr_bhs >= max_bhs); + bhs[nr_bhs++] = bh; + BUG_ON((nr_bhs >= max_bhs) && (m_end != block_end)); + } while (block_start = block_end, (bh = bh->b_this_page) != head); + if (unlikely(err)) + goto cleanup_out; + /* Apply the mst protection fixups. */ + err = pre_write_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size); + if (err) { + ntfs_error(vol->sb, "Failed to apply mst fixups!"); + goto cleanup_out; + } + flush_dcache_mft_record_page(ni); + /* Lock buffers and start synchronous write i/o on them. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { + struct buffer_head *tbh = bhs[i_bhs]; + + if (unlikely(test_set_buffer_locked(tbh))) + BUG(); + BUG_ON(!buffer_uptodate(tbh)); + if (buffer_dirty(tbh)) + clear_buffer_dirty(tbh); + get_bh(tbh); + tbh->b_end_io = end_buffer_write_sync; + submit_bh(WRITE, tbh); + } + /* Synchronize the mft mirror now if not @sync. */ + if (!sync && ni->mft_no < vol->mftmirr_size) + sync_mft_mirror(ni, m, sync); + /* Wait on i/o completion of buffers. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { + struct buffer_head *tbh = bhs[i_bhs]; + + wait_on_buffer(tbh); + if (unlikely(!buffer_uptodate(tbh))) { + err = -EIO; + /* + * Set the buffer uptodate so the page & buffer states + * don't become out of sync. + */ + if (PageUptodate(page)) + set_buffer_uptodate(tbh); + } + } + /* If @sync, now synchronize the mft mirror. */ + if (sync && ni->mft_no < vol->mftmirr_size) + sync_mft_mirror(ni, m, sync); + /* Remove the mst protection fixups again. */ + post_write_mst_fixup((NTFS_RECORD*)m); + flush_dcache_mft_record_page(ni); + if (unlikely(err)) { + /* I/O error during writing. This is really bad! */ + ntfs_error(vol->sb, "I/O error while writing mft record " + "0x%lx! Marking base inode as bad. You " + "should unmount the volume and run chkdsk.", + ni->mft_no); + goto err_out; + } +done: + ntfs_debug("Done."); + return 0; +cleanup_out: + /* Clean the buffers. */ + for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) + clear_buffer_dirty(bhs[i_bhs]); +err_out: + /* + * Current state: all buffers are clean, unlocked, and uptodate. + * The caller should mark the base inode as bad so that no more i/o + * happens. ->clear_inode() will still be invoked so all extent inodes + * and other allocated memory will be freed. + */ + return err; +} + +#endif /* NTFS_RW */ diff -Nru a/fs/ntfs/mft.h b/fs/ntfs/mft.h --- a/fs/ntfs/mft.h 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/mft.h 2004-05-31 16:20:49 -07:00 @@ -57,6 +57,41 @@ flush_dcache_page(ni->page); } +extern int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync); + +/** + * write_mft_record - write out a mapped (extent) mft record + * @ni: ntfs inode describing the mapped (extent) mft record + * @m: mapped (extent) mft record to write + * @sync: if true, wait for i/o completion + * + * This is just a wrapper for write_mft_record_nolock() (see mft.c), which + * locks the page for the duration of the write. This ensures that there are + * no race conditions between writing the mft record via the dirty inode code + * paths and via the page cache write back code paths or between writing + * neighbouring mft records residing in the same page. + * + * Locking the page also serializes us against ->readpage() if the page is not + * uptodate. + * + * On success, clean the mft record and return 0. On error, leave the mft + * record dirty and return -errno. The caller should call make_bad_inode() on + * the base inode to ensure no more access happens to this inode. We do not do + * it here as the caller may want to finish writing other extent mft records + * first to minimize on-disk metadata inconsistencies. + */ +static inline int write_mft_record(ntfs_inode *ni, MFT_RECORD *m, int sync) +{ + struct page *page = ni->page; + int err; + + BUG_ON(!page); + lock_page(page); + err = write_mft_record_nolock(ni, m, sync); + unlock_page(page); + return err; +} + #endif /* NTFS_RW */ #endif /* _LINUX_NTFS_MFT_H */ diff -Nru a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h --- a/fs/ntfs/ntfs.h 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/ntfs.h 2004-05-31 16:20:49 -07:00 @@ -62,6 +62,7 @@ /* The various operations structs defined throughout the driver files. */ extern struct super_operations ntfs_sops; extern struct address_space_operations ntfs_aops; +extern struct address_space_operations ntfs_mst_aops; extern struct address_space_operations ntfs_mft_aops; extern struct file_operations ntfs_file_ops; diff -Nru a/fs/ntfs/super.c b/fs/ntfs/super.c --- a/fs/ntfs/super.c 2004-05-31 16:20:49 -07:00 +++ b/fs/ntfs/super.c 2004-05-31 16:20:49 -07:00 @@ -763,7 +763,7 @@ /* The $MFTMirr, like the $MFT is multi sector transfer protected. */ NInoSetMstProtected(tmp_ni); /* - * Set up our little cheat allowing us to reuse the async io + * Set up our little cheat allowing us to reuse the async read io * completion handler for directories. */ tmp_ni->itype.index.block_size = vol->mft_record_size; @@ -1142,7 +1142,7 @@ #ifdef NTFS_RW /* Make sure that no unsupported volume flags are set. */ if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { - static const char *es1 = "Volume has unsupported flags set "; + static const char *es1 = "Volume has unsupported flags set"; static const char *es2 = ". Run chkdsk and mount in Windows."; /* If a read-write mount, convert it to a read-only mount. */ @@ -1302,6 +1302,38 @@ ntfs_debug("Entering."); +#ifdef NTFS_RW + /* + * Commit all inodes while they are still open in case some of them + * cause others to be dirtied. + */ + ntfs_commit_inode(vol->vol_ino); + + /* NTFS 3.0+ specific. */ + if (vol->major_ver >= 3) { + if (vol->secure_ino) + ntfs_commit_inode(vol->secure_ino); + } + + ntfs_commit_inode(vol->root_ino); + + down_write(&vol->lcnbmp_lock); + ntfs_commit_inode(vol->lcnbmp_ino); + up_write(&vol->lcnbmp_lock); + + down_write(&vol->mftbmp_lock); + ntfs_commit_inode(vol->mftbmp_ino); + up_write(&vol->mftbmp_lock); + + if (vol->logfile_ino) + ntfs_commit_inode(vol->logfile_ino); + + if (vol->mftmirr_ino) + ntfs_commit_inode(vol->mftmirr_ino); + + ntfs_commit_inode(vol->mft_ino); +#endif /* NTFS_RW */ + iput(vol->vol_ino); vol->vol_ino = NULL; @@ -1333,6 +1365,9 @@ } if (vol->mftmirr_ino) { + /* Re-commit the mft mirror and mft just in case. */ + ntfs_commit_inode(vol->mftmirr_ino); + ntfs_commit_inode(vol->mft_ino); iput(vol->mftmirr_ino); vol->mftmirr_ino = NULL; } @@ -1629,8 +1664,8 @@ #ifdef NTFS_RW //.dirty_inode = NULL, /* VFS: Called from // __mark_inode_dirty(). */ - //.write_inode = NULL, /* VFS: Write dirty inode to - // disk. */ + .write_inode = ntfs_write_inode, /* VFS: Write dirty inode to + disk. */ //.drop_inode = NULL, /* VFS: Called just after the // inode reference count has // been decreased to zero.