diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/Kconfig | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 27 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.c | 28 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_defer.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_buf.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot_item_recover.c | 21 | ||||
-rw-r--r-- | fs/xfs/xfs_fsops.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item_recover.c | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_ioctl.c | 30 | ||||
-rw-r--r-- | fs/xfs/xfs_iops.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 24 |
16 files changed, 155 insertions, 78 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index ed0bc8cbc703..567fb37274d3 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select XFS_DEBUG + select DEBUG_FS help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3069194527dd..100ab5931b31 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist( ASSERT(mp->m_alloc_maxlevels > 0); + /* + * For a btree shorter than the maximum height, the worst case is that + * every level gets split and a new level is added, then while inserting + * another entry to refill the AGFL, every level under the old root gets + * split again. This is: + * + * (full height split reservation) + (AGFL refill split height) + * = (current height + 1) + (current height - 1) + * = (new height) + (new height - 2) + * = 2 * new height - 2 + * + * For a btree of maximum height, the worst case is that every level + * under the root gets split, then while inserting another entry to + * refill the AGFL, every level under the root gets split again. This is + * also: + * + * 2 * (current height - 1) + * = 2 * (new height - 1) + * = 2 * new height - 2 + */ + /* space needed by-bno freespace btree */ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_alloc_maxlevels); + mp->m_alloc_maxlevels) * 2 - 2; /* space needed by-size freespace btree */ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_alloc_maxlevels); + mp->m_alloc_maxlevels) * 2 - 2; /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, - mp->m_rmap_maxlevels); + mp->m_rmap_maxlevels) * 2 - 2; return min_free; } diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index bcfb6a4203cd..f71679ce23b9 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -245,21 +245,18 @@ xfs_defer_create_intents( return ret; } -/* Abort all the intents that were committed. */ STATIC void -xfs_defer_trans_abort( - struct xfs_trans *tp, - struct list_head *dop_pending) +xfs_defer_pending_abort( + struct xfs_mount *mp, + struct list_head *dop_list) { struct xfs_defer_pending *dfp; const struct xfs_defer_op_type *ops; - trace_xfs_defer_trans_abort(tp, _RET_IP_); - /* Abort intent items that don't have a done item. */ - list_for_each_entry(dfp, dop_pending, dfp_list) { + list_for_each_entry(dfp, dop_list, dfp_list) { ops = defer_op_types[dfp->dfp_type]; - trace_xfs_defer_pending_abort(tp->t_mountp, dfp); + trace_xfs_defer_pending_abort(mp, dfp); if (dfp->dfp_intent && !dfp->dfp_done) { ops->abort_intent(dfp->dfp_intent); dfp->dfp_intent = NULL; @@ -267,6 +264,16 @@ xfs_defer_trans_abort( } } +/* Abort all the intents that were committed. */ +STATIC void +xfs_defer_trans_abort( + struct xfs_trans *tp, + struct list_head *dop_pending) +{ + trace_xfs_defer_trans_abort(tp, _RET_IP_); + xfs_defer_pending_abort(tp->t_mountp, dop_pending); +} + /* * Capture resources that the caller said not to release ("held") when the * transaction commits. Caller is responsible for zero-initializing @dres. @@ -756,12 +763,13 @@ xfs_defer_ops_capture( /* Release all resources that we used to capture deferred ops. */ void -xfs_defer_ops_capture_free( +xfs_defer_ops_capture_abort( struct xfs_mount *mp, struct xfs_defer_capture *dfc) { unsigned short i; + xfs_defer_pending_abort(mp, &dfc->dfc_dfops); xfs_defer_cancel_list(mp, &dfc->dfc_dfops); for (i = 0; i < dfc->dfc_held.dr_bufs; i++) @@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit( /* Commit the transaction and add the capture structure to the list. */ error = xfs_trans_commit(tp); if (error) { - xfs_defer_ops_capture_free(mp, dfc); + xfs_defer_ops_capture_abort(mp, dfc); return error; } diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 114a3a4930a3..8788ad5f6a73 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, struct list_head *capture_list); void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, struct xfs_defer_resources *dres); -void xfs_defer_ops_capture_free(struct xfs_mount *mp, +void xfs_defer_ops_capture_abort(struct xfs_mount *mp, struct xfs_defer_capture *d); void xfs_defer_resources_rele(struct xfs_defer_resources *dres); diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 543f3748c2a3..137a65bda95d 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -510,6 +510,9 @@ xfs_dinode_verify( if (mode && nextents + naextents > nblocks) return __this_address; + if (nextents + naextents == 0 && nblocks != 0) + return __this_address; + if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) return __this_address; diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index ac6ba646624d..a013b87ab8d5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -562,7 +562,8 @@ xfs_dquot_from_disk( struct xfs_dquot *dqp, struct xfs_buf *bp) { - struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; + struct xfs_dqblk *dqb = xfs_buf_offset(bp, dqp->q_bufoffset); + struct xfs_disk_dquot *ddqp = &dqb->dd_diskdq; /* * Ensure that we got the type and ID we were looking for. @@ -1250,7 +1251,7 @@ xfs_qm_dqflush( } /* Flush the incore dquot to the ondisk buffer. */ - dqblk = bp->b_addr + dqp->q_bufoffset; + dqblk = xfs_buf_offset(bp, dqp->q_bufoffset); xfs_dquot_to_disk(&dqblk->dd_diskdq, dqp); /* diff --git a/fs/xfs/xfs_dquot_item_recover.c b/fs/xfs/xfs_dquot_item_recover.c index 8966ba842395..2c2720ce6923 100644 --- a/fs/xfs/xfs_dquot_item_recover.c +++ b/fs/xfs/xfs_dquot_item_recover.c @@ -19,6 +19,7 @@ #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_error.h" STATIC void xlog_recover_dquot_ra_pass2( @@ -65,6 +66,7 @@ xlog_recover_dquot_commit_pass2( { struct xfs_mount *mp = log->l_mp; struct xfs_buf *bp; + struct xfs_dqblk *dqb; struct xfs_disk_dquot *ddq, *recddq; struct xfs_dq_logformat *dq_f; xfs_failaddr_t fa; @@ -130,14 +132,14 @@ xlog_recover_dquot_commit_pass2( return error; ASSERT(bp); - ddq = xfs_buf_offset(bp, dq_f->qlf_boffset); + dqb = xfs_buf_offset(bp, dq_f->qlf_boffset); + ddq = &dqb->dd_diskdq; /* * If the dquot has an LSN in it, recover the dquot only if it's less * than the lsn of the transaction we are replaying. */ if (xfs_has_crc(mp)) { - struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddq; xfs_lsn_t lsn = be64_to_cpu(dqb->dd_lsn); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { @@ -147,10 +149,23 @@ xlog_recover_dquot_commit_pass2( memcpy(ddq, recddq, item->ri_buf[1].i_len); if (xfs_has_crc(mp)) { - xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), + xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), XFS_DQUOT_CRC_OFF); } + /* Validate the recovered dquot. */ + fa = xfs_dqblk_verify(log->l_mp, dqb, dq_f->qlf_id); + if (fa) { + XFS_CORRUPTION_ERROR("Bad dquot after recovery", + XFS_ERRLEVEL_LOW, mp, dqb, + sizeof(struct xfs_dqblk)); + xfs_alert(mp, + "Metadata corruption detected at %pS, dquot 0x%x", + fa, dq_f->qlf_id); + error = -EFSCORRUPTED; + goto out_release; + } + ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_mount == mp); bp->b_flags |= _XBF_LOGRECOVERY; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 7cb75cb6b8e9..57076a25f17d 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -482,9 +482,9 @@ xfs_fs_goingdown( { switch (inflags) { case XFS_FSOP_GOING_FLAGS_DEFAULT: { - if (!freeze_bdev(mp->m_super->s_bdev)) { + if (!bdev_freeze(mp->m_super->s_bdev)) { xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); - thaw_bdev(mp->m_super->s_bdev); + bdev_thaw(mp->m_super->s_bdev); } break; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 3dc47937da5d..3beb470f1892 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -569,6 +569,14 @@ extern void xfs_setup_inode(struct xfs_inode *ip); extern void xfs_setup_iops(struct xfs_inode *ip); extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init); +static inline void xfs_update_stable_writes(struct xfs_inode *ip) +{ + if (bdev_stable_writes(xfs_inode_buftarg(ip)->bt_bdev)) + mapping_set_stable_writes(VFS_I(ip)->i_mapping); + else + mapping_clear_stable_writes(VFS_I(ip)->i_mapping); +} + /* * When setting up a newly allocated inode, we need to call * xfs_finish_inode_setup() once the inode is fully instantiated at diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 0e5dba2343ea..144198a6b270 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2( struct xfs_log_dinode *ldip; uint isize; int need_free = 0; + xfs_failaddr_t fa; if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { in_f = item->ri_buf[0].i_addr; @@ -369,24 +370,26 @@ xlog_recover_inode_commit_pass2( * superblock flag to determine whether we need to look at di_flushiter * to skip replay when the on disk inode is newer than the log one */ - if (!xfs_has_v3inodes(mp) && - ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { - /* - * Deal with the wrap case, DI_MAX_FLUSH is less - * than smaller numbers - */ - if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && - ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { - /* do nothing */ - } else { - trace_xfs_log_recover_inode_skip(log, in_f); - error = 0; - goto out_release; + if (!xfs_has_v3inodes(mp)) { + if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * Deal with the wrap case, DI_MAX_FLUSH is less + * than smaller numbers + */ + if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH && + ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) { + /* do nothing */ + } else { + trace_xfs_log_recover_inode_skip(log, in_f); + error = 0; + goto out_release; + } } + + /* Take the opportunity to reset the flush iteration count */ + ldip->di_flushiter = 0; } - /* Take the opportunity to reset the flush iteration count */ - ldip->di_flushiter = 0; if (unlikely(S_ISREG(ldip->di_mode))) { if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && @@ -528,8 +531,19 @@ out_owner_change: (dip->di_mode != 0)) error = xfs_recover_inode_owner_change(mp, dip, in_f, buffer_list); - /* re-generate the checksum. */ + /* re-generate the checksum and validate the recovered inode. */ xfs_dinode_calc_crc(log->l_mp, dip); + fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip); + if (fa) { + XFS_CORRUPTION_ERROR( + "Bad dinode after recovery", + XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip)); + xfs_alert(mp, + "Metadata corruption detected at %pS, inode 0x%llx", + fa, in_f->ilf_ino); + error = -EFSCORRUPTED; + goto out_release; + } ASSERT(bp->b_mount == mp); bp->b_flags |= _XBF_LOGRECOVERY; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index a82470e027f7..6c3919687ea6 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1121,23 +1121,25 @@ xfs_ioctl_setattr_xflags( struct fileattr *fa) { struct xfs_mount *mp = ip->i_mount; + bool rtflag = (fa->fsx_xflags & FS_XFLAG_REALTIME); uint64_t i_flags2; - /* Can't change realtime flag if any extents are allocated. */ - if ((ip->i_df.if_nextents || ip->i_delayed_blks) && - XFS_IS_REALTIME_INODE(ip) != (fa->fsx_xflags & FS_XFLAG_REALTIME)) - return -EINVAL; + if (rtflag != XFS_IS_REALTIME_INODE(ip)) { + /* Can't change realtime flag if any extents are allocated. */ + if (ip->i_df.if_nextents || ip->i_delayed_blks) + return -EINVAL; + } - /* If realtime flag is set then must have realtime device */ - if (fa->fsx_xflags & FS_XFLAG_REALTIME) { + if (rtflag) { + /* If realtime flag is set then must have realtime device */ if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 || xfs_extlen_to_rtxmod(mp, ip->i_extsize)) return -EINVAL; - } - /* Clear reflink if we are actually able to set the rt flag. */ - if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip)) - ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + /* Clear reflink if we are actually able to set the rt flag. */ + if (xfs_is_reflink_inode(ip)) + ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; + } /* diflags2 only valid for v3 inodes. */ i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); @@ -1148,6 +1150,14 @@ xfs_ioctl_setattr_xflags( ip->i_diflags2 = i_flags2; xfs_diflags_to_iflags(ip, false); + + /* + * Make the stable writes flag match that of the device the inode + * resides on when flipping the RT flag. + */ + if (rtflag != XFS_IS_REALTIME_INODE(ip) && S_ISREG(VFS_I(ip)->i_mode)) + xfs_update_stable_writes(ip); + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index fdfda4fba12b..a0d77f5f512e 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1299,6 +1299,13 @@ xfs_setup_inode( mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); /* + * For real-time inodes update the stable write flags to that of the RT + * device instead of the data device. + */ + if (S_ISREG(inode->i_mode) && XFS_IS_REALTIME_INODE(ip)) + xfs_update_stable_writes(ip); + + /* * If there is no attribute fork no ACL can exist on this inode, * and it can't have any file capabilities attached to it either. */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 51c100c86177..ee206facf0dc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1893,9 +1893,7 @@ xlog_write_iclog( * the buffer manually, the code needs to be kept in sync * with the I/O completion path. */ - xlog_state_done_syncing(iclog); - up(&iclog->ic_sema); - return; + goto sync; } /* @@ -1925,20 +1923,17 @@ xlog_write_iclog( * avoid shutdown re-entering this path and erroring out again. */ if (log->l_targ != log->l_mp->m_ddev_targp && - blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { - xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return; - } + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) + goto shutdown; } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { - xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return; - } + if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) + goto shutdown; + if (is_vmalloc_addr(iclog->ic_data)) flush_kernel_vmap_range(iclog->ic_data, count); @@ -1959,6 +1954,12 @@ xlog_write_iclog( } submit_bio(&iclog->ic_bio); + return; +shutdown: + xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); +sync: + xlog_state_done_syncing(iclog); + up(&iclog->ic_sema); } /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 13b94d2e605b..a1e18b24971a 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2511,7 +2511,7 @@ xlog_abort_defer_ops( list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); - xfs_defer_ops_capture_free(mp, dfc); + xfs_defer_ops_capture_abort(mp, dfc); } } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 658edee8381d..e5b62dc28466 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent( } } del = got; + xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); /* Grab the corresponding mapping in the data fork. */ nmaps = 1; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 764304595e8b..07857d967ee8 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -366,8 +366,9 @@ xfs_blkdev_get( { int error = 0; - *handlep = bdev_open_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, - mp->m_super, &fs_holder_ops); + *handlep = bdev_open_by_path(name, + BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES, + mp->m_super, &fs_holder_ops); if (IS_ERR(*handlep)) { error = PTR_ERR(*handlep); *handlep = NULL; @@ -439,18 +440,12 @@ xfs_open_devices( int error; /* - * blkdev_put() can't be called under s_umount, see the comment - * in get_tree_bdev() for more details - */ - up_write(&sb->s_umount); - - /* * Open real time and log devices - order is important. */ if (mp->m_logname) { error = xfs_blkdev_get(mp, mp->m_logname, &logdev_handle); if (error) - goto out_relock; + return error; } if (mp->m_rtname) { @@ -493,10 +488,7 @@ xfs_open_devices( bdev_release(logdev_handle); } - error = 0; -out_relock: - down_write(&sb->s_umount); - return error; + return 0; out_free_rtdev_targ: if (mp->m_rtdev_targp) @@ -509,7 +501,7 @@ out_relock: out_close_logdev: if (logdev_handle) bdev_release(logdev_handle); - goto out_relock; + return error; } /* @@ -759,10 +751,6 @@ static void xfs_mount_free( struct xfs_mount *mp) { - /* - * Free the buftargs here because blkdev_put needs to be called outside - * of sb->s_umount, which is held around the call to ->put_super. - */ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_free_buftarg(mp->m_logdev_targp); if (mp->m_rtdev_targp) |