diff options
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_super.c')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 206 |
1 files changed, 162 insertions, 44 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 77414db10dc2..f2d1718c9165 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -61,6 +61,7 @@ #include <linux/namei.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/mount.h> #include <linux/mempool.h> #include <linux/writeback.h> @@ -118,6 +119,8 @@ mempool_t *xfs_ioend_pool; #define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */ #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ +#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */ +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */ /* * Table driven mount option parser. @@ -373,6 +376,13 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DMI)) { mp->m_flags |= XFS_MOUNT_DMAPI; + } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { + mp->m_flags |= XFS_MOUNT_DELAYLOG; + cmn_err(CE_WARN, + "Enabling EXPERIMENTAL delayed logging feature " + "- use at your own risk.\n"); + } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { + mp->m_flags &= ~XFS_MOUNT_DELAYLOG; } else if (!strcmp(this_char, "ihashsize")) { cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); @@ -534,6 +544,7 @@ xfs_showargs( { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI }, { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, + { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, { 0, NULL } }; static struct proc_xfs_info xfs_info_unset[] = { @@ -724,7 +735,8 @@ void xfs_blkdev_issue_flush( xfs_buftarg_t *buftarg) { - blkdev_issue_flush(buftarg->bt_bdev, NULL); + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, + BLKDEV_IFL_WAIT); } STATIC void @@ -788,18 +800,18 @@ xfs_open_devices( * Setup xfs_mount buffer target pointers */ error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); + mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); if (!mp->m_ddev_targp) goto out_close_rtdev; if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); + mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); if (!mp->m_rtdev_targp) goto out_free_ddev_targ; } if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); + mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); if (!mp->m_logdev_targp) goto out_free_rtdev_targ; } else { @@ -877,12 +889,11 @@ xfsaild( { struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; - long tout = 0; + long tout = 0; /* milliseconds */ while (!kthread_should_stop()) { - if (tout) - schedule_timeout_interruptible(msecs_to_jiffies(tout)); - tout = 1000; + schedule_timeout_interruptible(tout ? + msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT); /* swsusp */ try_to_freeze(); @@ -902,7 +913,8 @@ xfsaild_start( struct xfs_ail *ailp) { ailp->xa_target = 0; - ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s", + ailp->xa_mount->m_fsname); if (IS_ERR(ailp->xa_task)) return -PTR_ERR(ailp->xa_task); return 0; @@ -1022,59 +1034,109 @@ xfs_fs_dirty_inode( XFS_I(inode)->i_update_core = 1; } -/* - * Attempt to flush the inode, this will actually fail - * if the inode is pinned, but we dirty the inode again - * at the point when it is unpinned after a log write, - * since this is when the inode itself becomes flushable. - */ +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + STATIC int xfs_fs_write_inode( struct inode *inode, - int sync) + struct writeback_control *wbc) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - int error = 0; + int error = EAGAIN; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - if (sync) { - error = xfs_wait_on_pages(ip, 0, -1); - if (error) + if (wbc->sync_mode == WB_SYNC_ALL) { + /* + * Make sure the inode has hit stable storage. By using the + * log and the fsync transactions we reduce the IOs we have + * to do here from two (log and inode) to just the log. + * + * Note: We still need to do a delwri write of the inode after + * this to flush it to the backing buffer so that bulkstat + * works properly if this is the first time the inode has been + * written. Because we hold the ilock atomically over the + * transaction commit and the inode flush we are guaranteed + * that the inode is not pinned when it returns. If the flush + * lock is already held, then the inode has already been + * flushed once and we don't need to flush it again. Hence + * the code will only flush the inode if it isn't already + * being flushed. + */ + xfs_ioend_wait(ip); + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } + } else { + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by xfs_sync. + */ + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) goto out; } - /* - * Bypass inodes which have already been cleaned by - * the inode flush clustering code inside xfs_iflush - */ - if (xfs_inode_clean(ip)) - goto out; + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by xfs_sync. + * Now we have the flush lock and the inode is not pinned, we can check + * if the inode is really clean as we know that there are no pending + * transaction completions, it is not waiting on the delayed write + * queue and there is no IO in progress. */ - if (sync) { - xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - - error = xfs_iflush(ip, XFS_IFLUSH_SYNC); - } else { - error = EAGAIN; - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) - goto out; - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; - - error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK); + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; } + error = xfs_iflush(ip, 0); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -1160,6 +1222,7 @@ xfs_fs_put_super( xfs_unmountfs(mp); xfs_freesb(mp); + xfs_inode_shrinker_unregister(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_dmops_put(mp); @@ -1257,6 +1320,29 @@ xfs_fs_statfs( return 0; } +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + STATIC int xfs_fs_remount( struct super_block *sb, @@ -1336,11 +1422,27 @@ xfs_fs_remount( } mp->m_update_flags = 0; } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); } /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1359,11 +1461,22 @@ xfs_fs_freeze( { struct xfs_mount *mp = XFS_M(sb); + xfs_save_resvblks(mp); xfs_quiesce_attr(mp); return -xfs_fs_log_dummy(mp); } STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int xfs_fs_show_options( struct seq_file *m, struct vfsmount *mnt) @@ -1523,6 +1636,8 @@ xfs_fs_fill_super( if (error) goto fail_vnrele; + xfs_inode_shrinker_register(mp); + kfree(mtpt); return 0; @@ -1585,6 +1700,7 @@ static const struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, @@ -1649,7 +1765,7 @@ xfs_init_zones(void) * but it is much faster. */ xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + - (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / + (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD) * sizeof(int))), "xfs_buf_item"); if (!xfs_buf_item_zone) goto out_destroy_trans_zone; @@ -1767,6 +1883,7 @@ init_xfs_fs(void) goto out_cleanup_procfs; vfs_initquota(); + xfs_inode_shrinker_init(); error = register_filesystem(&xfs_fs_type); if (error) @@ -1794,6 +1911,7 @@ exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); + xfs_inode_shrinker_destroy(); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_buf_terminate(); |