diff options
38 files changed, 811 insertions, 450 deletions
@@ -267,8 +267,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) inode_unlock(inode); - if ((retval > 0) && end_io) - end_io(iocb, pos, retval, bh.b_private); + if (end_io) { + int err; + + err = end_io(iocb, pos, retval, bh.b_private); + if (err) + retval = err; + } if (!(flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(inode); diff --git a/fs/direct-io.c b/fs/direct-io.c index 1b2f7ffc8b84..9c6f885cc518 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, if (ret == 0) ret = transferred; - if (dio->end_io && dio->result) - dio->end_io(dio->iocb, offset, transferred, dio->private); + if (dio->end_io) { + int err; + + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0662b285dc8a..56c12df107ab 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1504,15 +1504,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } -static inline void ext4_set_io_unwritten_flag(struct inode *inode, - struct ext4_io_end *io_end) -{ - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { - io_end->flag |= EXT4_IO_END_UNWRITTEN; - atomic_inc(&EXT4_I(inode)->i_unwritten); - } -} - static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode) { return inode->i_private; @@ -3293,6 +3284,27 @@ extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; extern int ext4_resize_begin(struct super_block *sb); extern void ext4_resize_end(struct super_block *sb); +static inline void ext4_set_io_unwritten_flag(struct inode *inode, + struct ext4_io_end *io_end) +{ + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_unwritten); + } +} + +static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) +{ + struct inode *inode = io_end->inode; + + if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + io_end->flag &= ~EXT4_IO_END_UNWRITTEN; + /* Wake up anyone waiting on unwritten extent conversion */ + if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) + wake_up_all(ext4_ioend_wq(inode)); + } +} + #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bfb3bea..2b98171a9432 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3161,14 +3161,14 @@ out: } #endif -static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, +static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { ext4_io_end_t *io_end = iocb->private; /* if not async direct IO just return */ if (!io_end) - return; + return 0; ext_debug("ext4_end_io_dio(): io_end 0x%p " "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", @@ -3176,9 +3176,19 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, size); iocb->private = NULL; + /* + * Error during AIO DIO. We cannot convert unwritten extents as the + * data was not written. Just clear the unwritten flag and drop io_end. + */ + if (size <= 0) { + ext4_clear_io_unwritten_flag(io_end); + size = 0; + } io_end->offset = offset; io_end->size = size; ext4_put_io_end(io_end); + + return 0; } /* @@ -3301,16 +3311,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (io_end) { ext4_inode_aio_set(inode, NULL); ext4_put_io_end(io_end); - /* - * When no IO was submitted ext4_end_io_dio() was not - * called so we have to put iocb's reference. - */ - if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { - WARN_ON(iocb->private != io_end); - WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); - ext4_put_io_end(io_end); - iocb->private = NULL; - } } if (ret > 0 && !overwrite && ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN)) { diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 090b3498638e..f49a87c4fb63 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -139,16 +139,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) kmem_cache_free(io_end_cachep, io_end); } -static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) -{ - struct inode *inode = io_end->inode; - - io_end->flag &= ~EXT4_IO_END_UNWRITTEN; - /* Wake up anyone waiting on unwritten extent conversion */ - if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) - wake_up_all(ext4_ioend_wq(inode)); -} - /* * Check a range of space and convert unwritten extents to written. Note that * we are protected from truncate touching same part of extent tree by the diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 794fd1587f34..5dcc5f5a842e 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -620,7 +620,7 @@ bail: * particularly interested in the aio/dio case. We use the rw_lock DLM lock * to protect io on one node from truncation on another. */ -static void ocfs2_dio_end_io(struct kiocb *iocb, +static int ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private) @@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, struct inode *inode = file_inode(iocb->ki_filp); int level; + if (bytes <= 0) + return 0; + /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); @@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); } + + return 0; } static int ocfs2_releasepage(struct page *page, gfp_t wait) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 3746367098fd..0ebc90496525 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type) return 0; } -static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, +static int quota_quotaon(struct super_block *sb, int type, qid_t id, struct path *path) { if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) @@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } +/* + * Return quota for next active quota >= this id, if any exists, + * otherwise return -ESRCH via ->get_nextdqblk + */ +static int quota_getnextquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct kqid qid; + struct qc_dqblk fdq; + struct if_nextdqblk idq; + int ret; + + if (!sb->s_qcop->get_nextdqblk) + return -ENOSYS; + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq); + if (ret) + return ret; + /* struct if_nextdqblk is a superset of struct if_dqblk */ + copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq); + idq.dqb_id = from_kqid(current_user_ns(), qid); + if (copy_to_user(addr, &idq, sizeof(idq))) + return -EFAULT; + return 0; +} + static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); @@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, return ret; } +/* + * Return quota for next active quota >= this id, if any exists, + * otherwise return -ESRCH via ->get_nextdqblk. + */ +static int quota_getnextxquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct fs_disk_quota fdq; + struct qc_dqblk qdq; + struct kqid qid; + qid_t id_out; + int ret; + + if (!sb->s_qcop->get_nextdqblk) + return -ENOSYS; + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq); + if (ret) + return ret; + id_out = from_kqid(current_user_ns(), qid); + copy_to_xfs_dqblk(&fdq, &qdq, type, id_out); + if (copy_to_user(addr, &fdq, sizeof(fdq))) + return -EFAULT; + return ret; +} + static int quota_rmxquota(struct super_block *sb, void __user *addr) { __u32 flags; @@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, switch (cmd) { case Q_QUOTAON: - return quota_quotaon(sb, type, cmd, id, path); + return quota_quotaon(sb, type, id, path); case Q_QUOTAOFF: return quota_quotaoff(sb, type); case Q_GETFMT: @@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return quota_setinfo(sb, type, addr); case Q_GETQUOTA: return quota_getquota(sb, type, id, addr); + case Q_GETNEXTQUOTA: + return quota_getnextquota(sb, type, id, addr); case Q_SETQUOTA: return quota_setquota(sb, type, id, addr); case Q_SYNC: @@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); + case Q_XGETNEXTQUOTA: + return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: if (sb->s_flags & MS_RDONLY) return -EROFS; @@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd) switch (cmd) { case Q_GETFMT: case Q_GETINFO: + case Q_GETNEXTQUOTA: case Q_SYNC: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XGETQUOTA: + case Q_XGETNEXTQUOTA: case Q_XQUOTASYNC: return 0; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 444626ddbd1b..d9b42425291e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -118,8 +118,6 @@ xfs_allocbt_free_block( xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); - - xfs_trans_binval(cur->bc_tp, bp); return 0; } diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 919756e3ba53..90928bbe693c 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -24,22 +24,6 @@ * Small attribute lists are packed as tightly as possible so as * to fit into the literal area of the inode. */ - -/* - * Entries are packed toward the top as tight as possible. - */ -typedef struct xfs_attr_shortform { - struct xfs_attr_sf_hdr { /* constant-structure header block */ - __be16 totsize; /* total bytes in shortform list */ - __u8 count; /* count of active entries */ - } hdr; - struct xfs_attr_sf_entry { - __uint8_t namelen; /* actual length of name (no NULL) */ - __uint8_t valuelen; /* actual length of value (no NULL) */ - __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - __uint8_t nameval[1]; /* name & value bytes concatenated */ - } list[1]; /* variable sized array */ -} xfs_attr_shortform_t; typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 1637c37bfbaa..e37508ae589b 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -531,7 +531,6 @@ xfs_bmbt_free_block( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); - xfs_trans_binval(tp, bp); return 0; } diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index a0eb18ce3ad3..1f88e1ce770f 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc( return true; } +static int +xfs_btree_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + int error; + + error = cur->bc_ops->free_block(cur, bp); + if (!error) { + xfs_trans_binval(cur->bc_tp, bp); + XFS_BTREE_STATS_INC(cur, free); + } + return error; +} + /* * Delete the btree cursor. */ @@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot( int level; int index; int numrecs; + int error; #ifdef DEBUG union xfs_btree_ptr ptr; int i; @@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot( cpp = xfs_btree_ptr_addr(cur, 1, cblock); #ifdef DEBUG for (i = 0; i < numrecs; i++) { - int error; - error = xfs_btree_check_ptr(cur, cpp, i, level - 1); if (error) { XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); @@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot( #endif xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); - cur->bc_ops->free_block(cur, cbp); - XFS_BTREE_STATS_INC(cur, free); + error = xfs_btree_free_block(cur, cbp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } cur->bc_bufs[level - 1] = NULL; be16_add_cpu(&block->bb_level, -1); @@ -3317,14 +3334,12 @@ xfs_btree_kill_root( */ cur->bc_ops->set_root(cur, newroot, -1); - error = cur->bc_ops->free_block(cur, bp); + error = xfs_btree_free_block(cur, bp); if (error) { XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); return error; } - XFS_BTREE_STATS_INC(cur, free); - cur->bc_bufs[level] = NULL; cur->bc_ra[level] = 0; cur->bc_nlevels--; @@ -3830,10 +3845,9 @@ xfs_btree_delrec( } /* Free the deleted block. */ - error = cur->bc_ops->free_block(cur, rbp); + error = xfs_btree_free_block(cur, rbp); if (error) goto error0; - XFS_BTREE_STATS_INC(cur, free); /* * If we joined with the left neighbor, set the buffer in the diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index b14bbd6bb05f..8d4d8bce41bf 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) */ #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ +/* + * Entries are packed toward the top as tight as possible. + */ +typedef struct xfs_attr_shortform { + struct xfs_attr_sf_hdr { /* constant-structure header block */ + __be16 totsize; /* total bytes in shortform list */ + __u8 count; /* count of active entries */ + } hdr; + struct xfs_attr_sf_entry { + __uint8_t namelen; /* actual length of name (no NULL) */ + __uint8_t valuelen; /* actual length of value (no NULL) */ + __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ + __uint8_t nameval[1]; /* name & value bytes concatenated */ + } list[1]; /* variable sized array */ +} xfs_attr_shortform_t; + typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ __be16 base; /* base of free region */ __be16 size; /* length of free region */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index c679f3c05b63..89c21d771e35 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -125,16 +125,8 @@ xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - xfs_fsblock_t fsbno; - int error; - - fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)); - error = xfs_free_extent(cur->bc_tp, fsbno, 1); - if (error) - return error; - - xfs_trans_binval(cur->bc_tp, bp); - return error; + return xfs_free_extent(cur->bc_tp, + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); } STATIC int diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 0bf1c747439d..11faf7df14c8 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -31,6 +31,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_attr_sf.h" +#include "xfs_da_format.h" kmem_zone_t *xfs_ifork_zone; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 03f90b99b8c8..d54a8018b079 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -496,6 +496,8 @@ enum xfs_blft { XFS_BLFT_ATTR_LEAF_BUF, XFS_BLFT_ATTR_RMT_BUF, XFS_BLFT_SB_BUF, + XFS_BLFT_RTBITMAP_BUF, + XFS_BLFT_RTSUMMARY_BUF, XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS), }; diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index f51078f1e92a..8eed51275bb3 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -37,7 +37,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ -#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ +#define XFS_DQ_FREEING 0x0010 /* dquot is being torn down */ #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) @@ -116,6 +116,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ +#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index acc71dd36a2b..951c044e24e4 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -42,6 +42,31 @@ */ /* + * Real time buffers need verifiers to avoid runtime warnings during IO. + * We don't have anything to verify, however, so these are just dummy + * operations. + */ +static void +xfs_rtbuf_verify_read( + struct xfs_buf *bp) +{ + return; +} + +static void +xfs_rtbuf_verify_write( + struct xfs_buf *bp) +{ + return; +} + +const struct xfs_buf_ops xfs_rtbuf_ops = { + .name = "rtbuf", + .verify_read = xfs_rtbuf_verify_read, + .verify_write = xfs_rtbuf_verify_write, +}; + +/* * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ @@ -68,9 +93,12 @@ xfs_rtbuf_get( ASSERT(map.br_startblock != NULLFSBLOCK); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, NULL); + mp->m_bsize, 0, &bp, &xfs_rtbuf_ops); if (error) return error; + + xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF + : XFS_BLFT_RTBITMAP_BUF); *bpp = bp; return 0; } diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index b25bb9a343f3..961e6475a309 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t, extern void xfs_perag_put(struct xfs_perag *pag); extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t); -extern void xfs_sb_calc_crc(struct xfs_buf *bp); extern void xfs_log_sb(struct xfs_trans *tp); extern int xfs_sync_sb(struct xfs_mount *mp, bool wait); extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 15c3ceb845b9..81ac870834da 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; +extern const struct xfs_buf_ops xfs_rtbuf_ops; /* * Transaction types. Used to distinguish types of buffers. These never reach diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 379c089fb051..14ac9822b303 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -36,6 +36,10 @@ #include <linux/pagevec.h> #include <linux/writeback.h> +/* flags for direct write completions */ +#define XFS_DIO_FLAG_UNWRITTEN (1 << 0) +#define XFS_DIO_FLAG_APPEND (1 << 1) + void xfs_count_page_state( struct page *page, @@ -214,10 +218,12 @@ xfs_end_io( struct xfs_inode *ip = XFS_I(ioend->io_inode); int error = 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + /* + * Set an error if the mount has shut down and proceed with end I/O + * processing so it can perform whatever cleanups are necessary. + */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) ioend->io_error = -EIO; - goto done; - } /* * For unwritten extents we need to issue transactions to convert a @@ -1238,27 +1244,8 @@ xfs_vm_releasepage( } /* - * When we map a DIO buffer, we may need to attach an ioend that describes the - * type of write IO we are doing. This passes to the completion function the - * operations it needs to perform. If the mapping is for an overwrite wholly - * within the EOF then we don't need an ioend and so we don't allocate one. - * This avoids the unnecessary overhead of allocating and freeing ioends for - * workloads that don't require transactions on IO completion. - * - * If we get multiple mappings in a single IO, we might be mapping different - * types. But because the direct IO can only have a single private pointer, we - * need to ensure that: - * - * a) i) the ioend spans the entire region of unwritten mappings; or - * ii) the ioend spans all the mappings that cross or are beyond EOF; and - * b) if it contains unwritten extents, it is *permanently* marked as such - * - * We could do this by chaining ioends like buffered IO does, but we only - * actually get one IO completion callback from the direct IO, and that spans - * the entire IO regardless of how many mappings and IOs are needed to complete - * the DIO. There is only going to be one reference to the ioend and its life - * cycle is constrained by the DIO completion code. hence we don't need - * reference counting here. + * When we map a DIO buffer, we may need to pass flags to + * xfs_end_io_direct_write to tell it what kind of write IO we are doing. * * Note that for DIO, an IO to the highest supported file block offset (i.e. * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 @@ -1266,68 +1253,26 @@ xfs_vm_releasepage( * extending the file size. We won't know for sure until IO completion is run * and the actual max write offset is communicated to the IO completion * routine. - * - * For DAX page faults, we are preparing to never see unwritten extents here, - * nor should we ever extend the inode size. Hence we will soon have nothing to - * do here for this case, ensuring we don't have to provide an IO completion - * callback to free an ioend that we don't actually need for a fault into the - * page at offset (2^63 - 1FSB) bytes. */ - static void xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset, - bool dax_fault) + xfs_off_t offset) { - struct xfs_ioend *ioend; + uintptr_t *flags = (uintptr_t *)&bh_result->b_private; xfs_off_t size = bh_result->b_size; - int type; - if (ISUNWRITTEN(imap)) - type = XFS_IO_UNWRITTEN; - else - type = XFS_IO_OVERWRITE; - - trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); - - if (dax_fault) { - ASSERT(type == XFS_IO_OVERWRITE); - trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, - imap); - return; - } - - if (bh_result->b_private) { - ioend = bh_result->b_private; - ASSERT(ioend->io_size > 0); - ASSERT(offset >= ioend->io_offset); - if (offset + size > ioend->io_offset + ioend->io_size) - ioend->io_size = offset - ioend->io_offset + size; - - if (type == XFS_IO_UNWRITTEN && type != ioend->io_type) - ioend->io_type = XFS_IO_UNWRITTEN; - - trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, - ioend->io_size, ioend->io_type, - imap); - } else if (type == XFS_IO_UNWRITTEN || - offset + size > i_size_read(inode) || - offset + size < 0) { - ioend = xfs_alloc_ioend(inode, type); - ioend->io_offset = offset; - ioend->io_size = size; + trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, + ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap); - bh_result->b_private = ioend; + if (ISUNWRITTEN(imap)) { + *flags |= XFS_DIO_FLAG_UNWRITTEN; + set_buffer_defer_completion(bh_result); + } else if (offset + size > i_size_read(inode) || offset + size < 0) { + *flags |= XFS_DIO_FLAG_APPEND; set_buffer_defer_completion(bh_result); - - trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, - imap); - } else { - trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, - imap); } } @@ -1498,9 +1443,12 @@ __xfs_get_blocks( if (ISUNWRITTEN(&imap)) set_buffer_unwritten(bh_result); /* direct IO needs special help */ - if (create && direct) - xfs_map_direct(inode, bh_result, &imap, offset, - dax_fault); + if (create && direct) { + if (dax_fault) + ASSERT(!ISUNWRITTEN(&imap)); + else + xfs_map_direct(inode, bh_result, &imap, offset); + } } /* @@ -1570,42 +1518,50 @@ xfs_get_blocks_dax_fault( return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); } -static void -__xfs_end_io_direct_write( - struct inode *inode, - struct xfs_ioend *ioend, +/* + * Complete a direct I/O write request. + * + * xfs_map_direct passes us some flags in the private data to tell us what to + * do. If no flags are set, then the write IO is an overwrite wholly within + * the existing allocated file size and so there is nothing for us to do. + * + * Note that in this case the completion can be called in interrupt context, + * whereas if we have flags set we will always be called in task context + * (i.e. from a workqueue). + */ +STATIC int +xfs_end_io_direct_write( + struct kiocb *iocb, loff_t offset, - ssize_t size) + ssize_t size, + void *private) { - struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + uintptr_t flags = (uintptr_t)private; + int error = 0; - if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error) - goto out_end_io; + trace_xfs_end_io_direct_write(ip, offset, size); - /* - * dio completion end_io functions are only called on writes if more - * than 0 bytes was written. - */ - ASSERT(size > 0); + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; - /* - * The ioend only maps whole blocks, while the IO may be sector aligned. - * Hence the ioend offset/size may not match the IO offset/size exactly. - * Because we don't map overwrites within EOF into the ioend, the offset - * may not match, but only if the endio spans EOF. Either way, write - * the IO sizes into the ioend so that completion processing does the - * right thing. - */ - ASSERT(offset + size <= ioend->io_offset + ioend->io_size); - ioend->io_size = size; - ioend->io_offset = offset; + if (size <= 0) + return size; /* - * The ioend tells us whether we are doing unwritten extent conversion + * The flags tell us whether we are doing unwritten extent conversions * or an append transaction that updates the on-disk file size. These * cases are the only cases where we should *potentially* be needing * to update the VFS inode size. - * + */ + if (flags == 0) { + ASSERT(offset + size <= i_size_read(inode)); + return 0; + } + + /* * We need to update the in-core inode size here so that we don't end up * with the on-disk inode size being outside the in-core inode size. We * have no other method of updating EOF for AIO, so always do it here @@ -1616,91 +1572,56 @@ __xfs_end_io_direct_write( * here can result in EOF moving backwards and Bad Things Happen when * that occurs. */ - spin_lock(&XFS_I(inode)->i_flags_lock); + spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) i_size_write(inode, offset + size); - spin_unlock(&XFS_I(inode)->i_flags_lock); + spin_unlock(&ip->i_flags_lock); - /* - * If we are doing an append IO that needs to update the EOF on disk, - * do the transaction reserve now so we can use common end io - * processing. Stashing the error (if there is one) in the ioend will - * result in the ioend processing passing on the error if it is - * possible as we can't return it from here. - */ - if (ioend->io_type == XFS_IO_OVERWRITE) - ioend->io_error = xfs_setfilesize_trans_alloc(ioend); + if (flags & XFS_DIO_FLAG_UNWRITTEN) { + trace_xfs_end_io_direct_write_unwritten(ip, offset, size); -out_end_io: - xfs_end_io(&ioend->io_work); - return; -} + error = xfs_iomap_write_unwritten(ip, offset, size); + } else if (flags & XFS_DIO_FLAG_APPEND) { + struct xfs_trans *tp; -/* - * Complete a direct I/O write request. - * - * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. - * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite - * wholly within the EOF and so there is nothing for us to do. Note that in this - * case the completion can be called in interrupt context, whereas if we have an - * ioend we will always be called in task context (i.e. from a workqueue). - */ -STATIC void -xfs_end_io_direct_write( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) -{ - struct inode *inode = file_inode(iocb->ki_filp); - struct xfs_ioend *ioend = private; + trace_xfs_end_io_direct_write_append(ip, offset, size); - trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size, - ioend ? ioend->io_type : 0, NULL); - - if (!ioend) { - ASSERT(offset + size <= i_size_read(inode)); - return; + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); + if (error) { + xfs_trans_cancel(tp); + return error; + } + error = xfs_setfilesize(ip, tp, offset, size); } - __xfs_end_io_direct_write(inode, ioend, offset, size); + return error; } -static inline ssize_t -xfs_vm_do_dio( - struct inode *inode, +STATIC ssize_t +xfs_vm_direct_IO( struct kiocb *iocb, struct iov_iter *iter, - loff_t offset, - void (*endio)(struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private), - int flags) + loff_t offset) { + struct inode *inode = iocb->ki_filp->f_mapping->host; + dio_iodone_t *endio = NULL; + int flags = 0; struct block_device *bdev; - if (IS_DAX(inode)) + if (iov_iter_rw(iter) == WRITE) { + endio = xfs_end_io_direct_write; + flags = DIO_ASYNC_EXTEND; + } + + if (IS_DAX(inode)) { return dax_do_io(iocb, inode, iter, offset, xfs_get_blocks_direct, endio, 0); + } bdev = xfs_find_bdev_for_inode(inode); return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, - xfs_get_blocks_direct, endio, NULL, flags); -} - -STATIC ssize_t -xfs_vm_direct_IO( - struct kiocb *iocb, - struct iov_iter *iter, - loff_t offset) -{ - struct inode *inode = iocb->ki_filp->f_mapping->host; - - if (iov_iter_rw(iter) == WRITE) - return xfs_vm_do_dio(inode, iocb, iter, offset, - xfs_end_io_direct_write, DIO_ASYNC_EXTEND); - return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0); + xfs_get_blocks_direct, endio, NULL, flags); } /* @@ -1783,14 +1704,22 @@ xfs_vm_write_failed( if (block_start >= to) break; - if (!buffer_delay(bh)) + /* + * Process delalloc and unwritten buffers beyond EOF. We can + * encounter unwritten buffers in the event that a file has + * post-EOF unwritten extents and an extending write happens to + * fail (e.g., an unaligned write that also involves a delalloc + * to the same page). + */ + if (!buffer_delay(bh) && !buffer_unwritten(bh)) continue; if (!buffer_new(bh) && block_offset < i_size_read(inode)) continue; - xfs_vm_kill_delalloc_range(inode, block_offset, - block_offset + bh->b_size); + if (buffer_delay(bh)) + xfs_vm_kill_delalloc_range(inode, block_offset, + block_offset + bh->b_size); /* * This buffer does not contain data anymore. make sure anyone @@ -1801,6 +1730,7 @@ xfs_vm_write_failed( clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_dirty(bh); + clear_buffer_unwritten(bh); } } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 708775613e55..fd7f51c39b3f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -202,10 +202,12 @@ xfs_bmap_rtalloc( ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; /* - * Lock out other modifications to the RT bitmap inode. + * Lock out modifications to both the RT bitmap and summary inodes */ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* * If it's an allocation to an empty file at offset 0, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9c44d38dcd1f..316b2a1bdba5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits( { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_disk_dquot *d = &dq->q_core; + struct xfs_def_quota *defq; int prealloc = 0; ASSERT(d->d_id); + defq = xfs_get_defquota(dq, q); - if (q->qi_bsoftlimit && !d->d_blk_softlimit) { - d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); + if (defq->bsoftlimit && !d->d_blk_softlimit) { + d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit); prealloc = 1; } - if (q->qi_bhardlimit && !d->d_blk_hardlimit) { - d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); + if (defq->bhardlimit && !d->d_blk_hardlimit) { + d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit); prealloc = 1; } - if (q->qi_isoftlimit && !d->d_ino_softlimit) - d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); - if (q->qi_ihardlimit && !d->d_ino_hardlimit) - d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); - if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) - d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); - if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) - d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); + if (defq->isoftlimit && !d->d_ino_softlimit) + d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit); + if (defq->ihardlimit && !d->d_ino_hardlimit) + d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit); + if (defq->rtbsoftlimit && !d->d_rtb_softlimit) + d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit); + if (defq->rtbhardlimit && !d->d_rtb_hardlimit) + d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit); if (prealloc) xfs_dquot_set_prealloc_limits(dq); @@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk( { struct xfs_quotainfo *q = mp->m_quotainfo; xfs_dqblk_t *d; - int curid, i; + xfs_dqid_t curid; + int i; ASSERT(tp); ASSERT(xfs_buf_islocked(bp)); @@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk( * ID of the first dquot in the block - id's are zero based. */ curid = id - (id % q->qi_dqperchunk); - ASSERT(curid >= 0); memset(d, 0, BBTOB(q->qi_dqchunklen)); for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -464,12 +466,13 @@ xfs_qm_dqtobp( struct xfs_bmbt_irec map; int nmaps = 1, error; struct xfs_buf *bp; - struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); + struct xfs_inode *quotip; struct xfs_mount *mp = dqp->q_mount; xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); struct xfs_trans *tp = (tpp ? *tpp : NULL); uint lock_mode; + quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags); dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; lock_mode = xfs_ilock_data_map_shared(quotip); @@ -685,6 +688,56 @@ error0: } /* + * Advance to the next id in the current chunk, or if at the + * end of the chunk, skip ahead to first id in next allocated chunk + * using the SEEK_DATA interface. + */ +int +xfs_dq_get_next_id( + xfs_mount_t *mp, + uint type, + xfs_dqid_t *id, + loff_t eof) +{ + struct xfs_inode *quotip; + xfs_fsblock_t start; + loff_t offset; + uint lock; + xfs_dqid_t next_id; + int error = 0; + + /* Simple advance */ + next_id = *id + 1; + + /* If new ID is within the current chunk, advancing it sufficed */ + if (next_id % mp->m_quotainfo->qi_dqperchunk) { + *id = next_id; + return 0; + } + + /* Nope, next_id is now past the current chunk, so find the next one */ + start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; + + quotip = xfs_quota_inode(mp, type); + lock = xfs_ilock_data_map_shared(quotip); + + offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start), + eof, SEEK_DATA); + if (offset < 0) + error = offset; + + xfs_iunlock(quotip, lock); + + /* -ENXIO is essentially "no more data" */ + if (error) + return (error == -ENXIO ? -ENOENT: error); + + /* Convert next data offset back to a quota id */ + *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk; + return 0; +} + +/* * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a * a locked dquot, doing an allocation (if requested) as needed. * When both an inode and an id are given, the inode's id takes precedence. @@ -704,6 +757,7 @@ xfs_qm_dqget( struct xfs_quotainfo *qi = mp->m_quotainfo; struct radix_tree_root *tree = xfs_dquot_tree(qi, type); struct xfs_dquot *dqp; + loff_t eof = 0; int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -731,6 +785,21 @@ xfs_qm_dqget( } #endif + /* Get the end of the quota file if we need it */ + if (flags & XFS_QMOPT_DQNEXT) { + struct xfs_inode *quotip; + xfs_fileoff_t last; + uint lock_mode; + + quotip = xfs_quota_inode(mp, type); + lock_mode = xfs_ilock_data_map_shared(quotip); + error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK); + xfs_iunlock(quotip, lock_mode); + if (error) + return error; + eof = XFS_FSB_TO_B(mp, last); + } + restart: mutex_lock(&qi->qi_tree_lock); dqp = radix_tree_lookup(tree, id); @@ -744,6 +813,18 @@ restart: goto restart; } + /* uninit / unused quota found in radix tree, keep looking */ + if (flags & XFS_QMOPT_DQNEXT) { + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_dqunlock(dqp); + mutex_unlock(&qi->qi_tree_lock); + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (error) + return error; + goto restart; + } + } + dqp->q_nrefs++; mutex_unlock(&qi->qi_tree_lock); @@ -770,6 +851,13 @@ restart: if (ip) xfs_ilock(ip, XFS_ILOCK_EXCL); + /* If we are asked to find next active id, keep looking */ + if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) { + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (!error) + goto restart; + } + if (error) return error; @@ -820,6 +908,17 @@ restart: qi->qi_dquots++; mutex_unlock(&qi->qi_tree_lock); + /* If we are asked to find next active id, keep looking */ + if (flags & XFS_QMOPT_DQNEXT) { + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (error) + return error; + goto restart; + } + } + dqret: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index f7333fbba5c2..ac0fd32de31e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1337,31 +1337,31 @@ out: return found; } -STATIC loff_t -xfs_seek_hole_data( - struct file *file, +/* + * caller must lock inode with xfs_ilock_data_map_shared, + * can we craft an appropriate ASSERT? + * + * end is because the VFS-level lseek interface is defined such that any + * offset past i_size shall return -ENXIO, but we use this for quota code + * which does not maintain i_size, and we want to SEEK_DATA past i_size. + */ +loff_t +__xfs_seek_hole_data( + struct inode *inode, loff_t start, + loff_t end, int whence) { - struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); - xfs_fsize_t isize; xfs_fileoff_t fsbno; - xfs_filblks_t end; - uint lock; + xfs_filblks_t lastbno; int error; - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - lock = xfs_ilock_data_map_shared(ip); - - isize = i_size_read(inode); - if (start >= isize) { + if (start >= end) { error = -ENXIO; - goto out_unlock; + goto out_error; } /* @@ -1369,22 +1369,22 @@ xfs_seek_hole_data( * by fsbno to the end block of the file. */ fsbno = XFS_B_TO_FSBT(mp, start); - end = XFS_B_TO_FSB(mp, isize); + lastbno = XFS_B_TO_FSB(mp, end); for (;;) { struct xfs_bmbt_irec map[2]; int nmap = 2; unsigned int i; - error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, + error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) - goto out_unlock; + goto out_error; /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { error = -ENXIO; - goto out_unlock; + goto out_error; } for (i = 0; i < nmap; i++) { @@ -1426,7 +1426,7 @@ xfs_seek_hole_data( * hole at the end of any file). */ if (whence == SEEK_HOLE) { - offset = isize; + offset = end; break; } /* @@ -1434,7 +1434,7 @@ xfs_seek_hole_data( */ ASSERT(whence == SEEK_DATA); error = -ENXIO; - goto out_unlock; + goto out_error; } ASSERT(i > 1); @@ -1445,14 +1445,14 @@ xfs_seek_hole_data( */ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); - if (start >= isize) { + if (start >= end) { if (whence == SEEK_HOLE) { - offset = isize; + offset = end; break; } ASSERT(whence == SEEK_DATA); error = -ENXIO; - goto out_unlock; + goto out_error; } } @@ -1464,7 +1464,39 @@ out: * situation in particular. */ if (whence == SEEK_HOLE) - offset = min_t(loff_t, offset, isize); + offset = min_t(loff_t, offset, end); + + return offset; + +out_error: + return error; +} + +STATIC loff_t +xfs_seek_hole_data( + struct file *file, + loff_t start, + int whence) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + uint lock; + loff_t offset, end; + int error = 0; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + lock = xfs_ilock_data_map_shared(ip); + + end = i_size_read(inode); + offset = __xfs_seek_hole_data(inode, start, end, whence); + if (offset < 0) { + error = offset; + goto out_unlock; + } + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h index 1b6a98b66886..f32713f14f9a 100644 --- a/fs/xfs/xfs_fsops.h +++ b/fs/xfs/xfs_fsops.h @@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt); extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval, xfs_fsop_resblks_t *outval); extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags); -extern int xfs_fs_log_dummy(struct xfs_mount *mp); #endif /* __XFS_FSOPS_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index d6277494e606..43e1d51b15eb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -435,6 +435,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); +loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, + loff_t eof, int whence); /* from xfs_iops.c */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index bd6f23b952a5..e776594889c3 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1109,27 +1109,10 @@ xlog_verify_head( bool tmp_wrapped; /* - * Search backwards through the log looking for the log record header - * block. This wraps all the way back around to the head so something is - * seriously wrong if we can't find it. - */ - found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk, - rhead, wrapped); - if (found < 0) - return found; - if (!found) { - xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); - return -EIO; - } - - *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); - - /* - * Now that we have a tail block, check the head of the log for torn - * writes. Search again until we hit the tail or the maximum number of - * log record I/Os that could have been in flight at one time. Use a - * temporary buffer so we don't trash the rhead/bp pointer from the - * call above. + * Check the head of the log for torn writes. Search backwards from the + * head until we hit the tail or the maximum number of log record I/Os + * that could have been in flight at one time. Use a temporary buffer so + * we don't trash the rhead/bp pointers from the caller. */ tmp_bp = xlog_get_bp(log, 1); if (!tmp_bp) @@ -1216,6 +1199,115 @@ xlog_verify_head( } /* + * Check whether the head of the log points to an unmount record. In other + * words, determine whether the log is clean. If so, update the in-core state + * appropriately. + */ +static int +xlog_check_unmount_rec( + struct xlog *log, + xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, + struct xlog_rec_header *rhead, + xfs_daddr_t rhead_blk, + struct xfs_buf *bp, + bool *clean) +{ + struct xlog_op_header *op_head; + xfs_daddr_t umount_data_blk; + xfs_daddr_t after_umount_blk; + int hblks; + int error; + char *offset; + + *clean = false; + + /* + * Look for unmount record. If we find it, then we know there was a + * clean unmount. Since 'i' could be the last block in the physical + * log, we convert to a log block before comparing to the head_blk. + * + * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() + * below. We won't want to clear the unmount record if there is one, so + * we pass the lsn of the unmount record rather than the block after it. + */ + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + int h_size = be32_to_cpu(rhead->h_size); + int h_version = be32_to_cpu(rhead->h_version); + + if ((h_version & XLOG_VERSION_2) && + (h_size > XLOG_HEADER_CYCLE_SIZE)) { + hblks = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + hblks++; + } else { + hblks = 1; + } + } else { + hblks = 1; + } + after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); + after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); + if (*head_blk == after_umount_blk && + be32_to_cpu(rhead->h_num_logops) == 1) { + umount_data_blk = rhead_blk + hblks; + umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); + error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + if (error) + return error; + + op_head = (struct xlog_op_header *)offset; + if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { + /* + * Set tail and last sync so that newly written log + * records will point recovery to after the current + * unmount record. + */ + xlog_assign_atomic_lsn(&log->l_tail_lsn, + log->l_curr_cycle, after_umount_blk); + xlog_assign_atomic_lsn(&log->l_last_sync_lsn, + log->l_curr_cycle, after_umount_blk); + *tail_blk = after_umount_blk; + + *clean = true; + } + } + + return 0; +} + +static void +xlog_set_state( + struct xlog *log, + xfs_daddr_t head_blk, + struct xlog_rec_header *rhead, + xfs_daddr_t rhead_blk, + bool bump_cycle) +{ + /* + * Reset log values according to the state of the log when we + * crashed. In the case where head_blk == 0, we bump curr_cycle + * one because the next write starts a new cycle rather than + * continuing the cycle of the last good log record. At this + * point we have guaranteed that all partial log records have been + * accounted for. Therefore, we know that the last good log record + * written was complete and ended exactly on the end boundary + * of the physical log. + */ + log->l_prev_block = rhead_blk; + log->l_curr_block = (int)head_blk; + log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); + if (bump_cycle) + log->l_curr_cycle++; + atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); + atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); + xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, + BBTOB(log->l_curr_block)); + xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, + BBTOB(log->l_curr_block)); +} + +/* * Find the sync block number or the tail of the log. * * This will be the block number of the last record to have its @@ -1238,22 +1330,20 @@ xlog_find_tail( xfs_daddr_t *tail_blk) { xlog_rec_header_t *rhead; - xlog_op_header_t *op_head; char *offset = NULL; xfs_buf_t *bp; int error; - xfs_daddr_t umount_data_blk; - xfs_daddr_t after_umount_blk; xfs_daddr_t rhead_blk; xfs_lsn_t tail_lsn; - int hblks; bool wrapped = false; + bool clean = false; /* * Find previous log record */ if ((error = xlog_find_head(log, head_blk))) return error; + ASSERT(*head_blk < INT_MAX); bp = xlog_get_bp(log, 1); if (!bp) @@ -1271,100 +1361,75 @@ xlog_find_tail( } /* - * Trim the head block back to skip over torn records. We can have - * multiple log I/Os in flight at any time, so we assume CRC failures - * back through the previous several records are torn writes and skip - * them. + * Search backwards through the log looking for the log record header + * block. This wraps all the way back around to the head so something is + * seriously wrong if we can't find it. */ - ASSERT(*head_blk < INT_MAX); - error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, - &rhead, &wrapped); - if (error) - goto done; + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, + &rhead_blk, &rhead, &wrapped); + if (error < 0) + return error; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); + return -EIO; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); /* - * Reset log values according to the state of the log when we - * crashed. In the case where head_blk == 0, we bump curr_cycle - * one because the next write starts a new cycle rather than - * continuing the cycle of the last good log record. At this - * point we have guaranteed that all partial log records have been - * accounted for. Therefore, we know that the last good log record - * written was complete and ended exactly on the end boundary - * of the physical log. + * Set the log state based on the current head record. */ - log->l_prev_block = rhead_blk; - log->l_curr_block = (int)*head_blk; - log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); - if (wrapped) - log->l_curr_cycle++; - atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); + xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); + tail_lsn = atomic64_read(&log->l_tail_lsn); /* - * Look for unmount record. If we find it, then we know there - * was a clean unmount. Since 'i' could be the last block in - * the physical log, we convert to a log block before comparing - * to the head_blk. + * Look for an unmount record at the head of the log. This sets the log + * state to determine whether recovery is necessary. + */ + error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, + rhead_blk, bp, &clean); + if (error) + goto done; + + /* + * Verify the log head if the log is not clean (e.g., we have anything + * but an unmount record at the head). This uses CRC verification to + * detect and trim torn writes. If discovered, CRC failures are + * considered torn writes and the log head is trimmed accordingly. * - * Save the current tail lsn to use to pass to - * xlog_clear_stale_blocks() below. We won't want to clear the - * unmount record if there is one, so we pass the lsn of the - * unmount record rather than the block after it. + * Note that we can only run CRC verification when the log is dirty + * because there's no guarantee that the log data behind an unmount + * record is compatible with the current architecture. */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - int h_size = be32_to_cpu(rhead->h_size); - int h_version = be32_to_cpu(rhead->h_version); + if (!clean) { + xfs_daddr_t orig_head = *head_blk; - if ((h_version & XLOG_VERSION_2) && - (h_size > XLOG_HEADER_CYCLE_SIZE)) { - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - hblks++; - } else { - hblks = 1; - } - } else { - hblks = 1; - } - after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); - after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); - tail_lsn = atomic64_read(&log->l_tail_lsn); - if (*head_blk == after_umount_blk && - be32_to_cpu(rhead->h_num_logops) == 1) { - umount_data_blk = rhead_blk + hblks; - umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + error = xlog_verify_head(log, head_blk, tail_blk, bp, + &rhead_blk, &rhead, &wrapped); if (error) goto done; - op_head = (xlog_op_header_t *)offset; - if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { - /* - * Set tail and last sync so that newly written - * log records will point recovery to after the - * current unmount record. - */ - xlog_assign_atomic_lsn(&log->l_tail_lsn, - log->l_curr_cycle, after_umount_blk); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, - log->l_curr_cycle, after_umount_blk); - *tail_blk = after_umount_blk; - - /* - * Note that the unmount was clean. If the unmount - * was not clean, we need to know this to rebuild the - * superblock counters from the perag headers if we - * have a filesystem using non-persistent counters. - */ - log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; + /* update in-core state again if the head changed */ + if (*head_blk != orig_head) { + xlog_set_state(log, *head_blk, rhead, rhead_blk, + wrapped); + tail_lsn = atomic64_read(&log->l_tail_lsn); + error = xlog_check_unmount_rec(log, head_blk, tail_blk, + rhead, rhead_blk, bp, + &clean); + if (error) + goto done; } } /* + * Note that the unmount was clean. If the unmount was not clean, we + * need to know this to rebuild the superblock counters from the perag + * headers if we have a filesystem using non-persistent counters. + */ + if (clean) + log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; + + /* * Make sure that there are no blocks in front of the head * with the same cycle number as the head. This can happen * because we allow multiple outstanding log writes concurrently, @@ -2473,6 +2538,13 @@ xlog_recover_validate_buf_type( } bp->b_ops = &xfs_sb_buf_ops; break; +#ifdef CONFIG_XFS_RT + case XFS_BLFT_RTBITMAP_BUF: + case XFS_BLFT_RTSUMMARY_BUF: + /* no magic numbers for verification of RT buffers */ + bp->b_ops = &xfs_rtbuf_ops; + break; +#endif /* CONFIG_XFS_RT */ default: xfs_warn(mp, "Unknown buffer type %d!", xfs_blft_from_flags(buf_f)); @@ -4491,7 +4563,7 @@ xlog_recover_process( * know precisely what failed. */ if (pass == XLOG_RECOVER_CRCPASS) { - if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) + if (rhead->h_crc && crc != rhead->h_crc) return -EFSBADCRC; return 0; } @@ -4502,7 +4574,7 @@ xlog_recover_process( * zero CRC check prevents warnings from being emitted when upgrading * the kernel from one that does not add CRCs by default. */ - if (crc != le32_to_cpu(rhead->h_crc)) { + if (crc != rhead->h_crc) { if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { xfs_alert(log->l_mp, "log record CRC mismatch: found 0x%x, expected 0x%x.", diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b57098481c10..a4e03ab50342 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -327,7 +327,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -extern int xfs_mount_log_sb(xfs_mount_t *); extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 532ab79d38fe..be125e1758c1 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -560,6 +560,37 @@ xfs_qm_shrink_count( return list_lru_shrink_count(&qi->qi_lru, sc); } +STATIC void +xfs_qm_set_defquota( + xfs_mount_t *mp, + uint type, + xfs_quotainfo_t *qinf) +{ + xfs_dquot_t *dqp; + struct xfs_def_quota *defq; + int error; + + error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); + + if (!error) { + xfs_disk_dquot_t *ddqp = &dqp->q_core; + + defq = xfs_get_defquota(dqp, qinf); + + /* + * Timers and warnings have been already set, let's just set the + * default limits for this quota type + */ + defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); + defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); + defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); + defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); + defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); + defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); + xfs_qm_dqdestroy(dqp); + } +} + /* * This initializes all the quota information that's kept in the * mount structure @@ -606,19 +637,19 @@ xfs_qm_init_quotainfo( * We try to get the limits from the superuser's limits fields. * This is quite hacky, but it is standard quota practice. * - * We look at the USR dquot with id == 0 first, but if user quotas - * are not enabled we goto the GRP dquot with id == 0. - * We don't really care to keep separate default limits for user - * and group quotas, at least not at this point. - * * Since we may not have done a quotacheck by this point, just read * the dquot without attaching it to any hashtables or lists. + * + * Timers and warnings are globally set by the first timer found in + * user/group/proj quota types, otherwise a default value is used. + * This should be split into different fields per quota type. */ error = xfs_qm_dqread(mp, 0, XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : XFS_DQ_PROJ), XFS_QMOPT_DOWARN, &dqp); + if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -639,13 +670,6 @@ xfs_qm_init_quotainfo( be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; - qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); - qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); - qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); - qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); - qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); - qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - xfs_qm_dqdestroy(dqp); } else { qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -656,6 +680,13 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } + if (XFS_IS_UQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf); + if (XFS_IS_GQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf); + if (XFS_IS_PQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf); + qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 996a04064894..2975a822e9f0 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -53,6 +53,15 @@ extern struct kmem_zone *xfs_qm_dqtrxzone; */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 +struct xfs_def_quota { + xfs_qcnt_t bhardlimit; /* default data blk hard limit */ + xfs_qcnt_t bsoftlimit; /* default data blk soft limit */ + xfs_qcnt_t ihardlimit; /* default inode count hard limit */ + xfs_qcnt_t isoftlimit; /* default inode count soft limit */ + xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */ + xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */ +}; + /* * Various quota information for individual filesystems. * The mount structure keeps a pointer to this. @@ -76,12 +85,9 @@ typedef struct xfs_quotainfo { struct mutex qi_quotaofflock;/* to serialize quotaoff */ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ uint qi_dqperchunk; /* # ondisk dqs in above chunk */ - xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ - xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ - xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ - xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ - xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ - xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ + struct xfs_def_quota qi_usr_default; + struct xfs_def_quota qi_grp_default; + struct xfs_def_quota qi_prj_default; struct shrinker qi_shrinker; } xfs_quotainfo_t; @@ -104,15 +110,15 @@ xfs_dquot_tree( } static inline struct xfs_inode * -xfs_dq_to_quota_inode(struct xfs_dquot *dqp) +xfs_quota_inode(xfs_mount_t *mp, uint dq_flags) { - switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { + switch (dq_flags & XFS_DQ_ALLTYPES) { case XFS_DQ_USER: - return dqp->q_mount->m_quotainfo->qi_uquotaip; + return mp->m_quotainfo->qi_uquotaip; case XFS_DQ_GROUP: - return dqp->q_mount->m_quotainfo->qi_gquotaip; + return mp->m_quotainfo->qi_gquotaip; case XFS_DQ_PROJ: - return dqp->q_mount->m_quotainfo->qi_pquotaip; + return mp->m_quotainfo->qi_pquotaip; default: ASSERT(0); } @@ -164,11 +170,27 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); -extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, - uint, struct qc_dqblk *); +extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *, + uint, struct qc_dqblk *, uint); extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, struct qc_dqblk *); extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); +static inline struct xfs_def_quota * +xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi) +{ + struct xfs_def_quota *defq; + + if (XFS_QM_ISUDQ(dqp)) + defq = &qi->qi_usr_default; + else if (XFS_QM_ISGDQ(dqp)) + defq = &qi->qi_grp_default; + else { + ASSERT(XFS_QM_ISPDQ(dqp)); + defq = &qi->qi_prj_default; + } + return defq; +} + #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 3640c6e896af..f4d0e0a8f517 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -404,6 +404,7 @@ xfs_qm_scall_setqlim( struct xfs_disk_dquot *ddq; struct xfs_dquot *dqp; struct xfs_trans *tp; + struct xfs_def_quota *defq; int error; xfs_qcnt_t hard, soft; @@ -431,6 +432,8 @@ xfs_qm_scall_setqlim( ASSERT(error != -ENOENT); goto out_unlock; } + + defq = xfs_get_defquota(dqp, q); xfs_dqunlock(dqp); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); @@ -458,8 +461,8 @@ xfs_qm_scall_setqlim( ddq->d_blk_softlimit = cpu_to_be64(soft); xfs_dquot_set_prealloc_limits(dqp); if (id == 0) { - q->qi_bhardlimit = hard; - q->qi_bsoftlimit = soft; + defq->bhardlimit = hard; + defq->bsoftlimit = soft; } } else { xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); @@ -474,8 +477,8 @@ xfs_qm_scall_setqlim( ddq->d_rtb_hardlimit = cpu_to_be64(hard); ddq->d_rtb_softlimit = cpu_to_be64(soft); if (id == 0) { - q->qi_rtbhardlimit = hard; - q->qi_rtbsoftlimit = soft; + defq->rtbhardlimit = hard; + defq->rtbsoftlimit = soft; } } else { xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); @@ -491,8 +494,8 @@ xfs_qm_scall_setqlim( ddq->d_ino_hardlimit = cpu_to_be64(hard); ddq->d_ino_softlimit = cpu_to_be64(soft); if (id == 0) { - q->qi_ihardlimit = hard; - q->qi_isoftlimit = soft; + defq->ihardlimit = hard; + defq->isoftlimit = soft; } } else { xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft); @@ -635,9 +638,10 @@ out: int xfs_qm_scall_getquota( struct xfs_mount *mp, - xfs_dqid_t id, + xfs_dqid_t *id, uint type, - struct qc_dqblk *dst) + struct qc_dqblk *dst, + uint dqget_flags) { struct xfs_dquot *dqp; int error; @@ -647,7 +651,7 @@ xfs_qm_scall_getquota( * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't * exist, we'll get ENOENT back. */ - error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp); + error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp); if (error) return error; @@ -660,6 +664,9 @@ xfs_qm_scall_getquota( goto out_put; } + /* Fill in the ID we actually read from disk */ + *id = be32_to_cpu(dqp->q_core.d_id); + memset(dst, 0, sizeof(*dst)); dst->d_spc_hardlimit = XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); @@ -701,7 +708,7 @@ xfs_qm_scall_getquota( if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) || (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) || (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) && - id != 0) { + *id != 0) { if ((dst->d_space > dst->d_spc_softlimit) && (dst->d_spc_softlimit > 0)) { ASSERT(dst->d_spc_timer != 0); diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7795e0d01382..f82d79a8c694 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -231,14 +231,45 @@ xfs_fs_get_dqblk( struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); + xfs_dqid_t id; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), qdq); + id = from_kqid(&init_user_ns, qid); + return xfs_qm_scall_getquota(mp, &id, + xfs_quota_type(qid.type), qdq, 0); +} + +/* Return quota info for active quota >= this qid */ +STATIC int +xfs_fs_get_nextdqblk( + struct super_block *sb, + struct kqid *qid, + struct qc_dqblk *qdq) +{ + int ret; + struct xfs_mount *mp = XFS_M(sb); + xfs_dqid_t id; + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + id = from_kqid(&init_user_ns, *qid); + ret = xfs_qm_scall_getquota(mp, &id, + xfs_quota_type(qid->type), qdq, + XFS_QMOPT_DQNEXT); + if (ret) + return ret; + + /* ID may be different, so convert back what we got */ + *qid = make_kqid(current_user_ns(), qid->type, id); + return 0; + } STATIC int @@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = { .quota_disable = xfs_quota_disable, .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, + .get_nextdqblk = xfs_fs_get_nextdqblk, .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 391d797cb53f..c8d58426008e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found); DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_get_blocks_found); DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); +DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), @@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 4f18fd92ca13..d6c9c3e9e02b 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -497,6 +497,7 @@ xfsaild( long tout = 0; /* milliseconds */ current->flags |= PF_MEMALLOC; + set_freezable(); while (!kthread_should_stop()) { if (tout && tout <= 20) @@ -519,14 +520,14 @@ xfsaild( if (!xfs_ail_min(ailp) && ailp->xa_target == ailp->xa_target_prev) { spin_unlock(&ailp->xa_lock); - schedule(); + freezable_schedule(); tout = 0; continue; } spin_unlock(&ailp->xa_lock); if (tout) - schedule_timeout(msecs_to_jiffies(tout)); + freezable_schedule_timeout(msecs_to_jiffies(tout)); __set_current_state(TASK_RUNNING); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 995170194df0..c3d547211d16 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -609,17 +609,20 @@ xfs_trans_dqresv( xfs_qcnt_t total_count; xfs_qcnt_t *resbcountp; xfs_quotainfo_t *q = mp->m_quotainfo; + struct xfs_def_quota *defq; xfs_dqlock(dqp); + defq = xfs_get_defquota(dqp, q); + if (flags & XFS_TRANS_DQ_RES_BLKS) { hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); if (!hardlimit) - hardlimit = q->qi_bhardlimit; + hardlimit = defq->bhardlimit; softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); if (!softlimit) - softlimit = q->qi_bsoftlimit; + softlimit = defq->bsoftlimit; timer = be32_to_cpu(dqp->q_core.d_btimer); warns = be16_to_cpu(dqp->q_core.d_bwarns); warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; @@ -628,10 +631,10 @@ xfs_trans_dqresv( ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); if (!hardlimit) - hardlimit = q->qi_rtbhardlimit; + hardlimit = defq->rtbhardlimit; softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); if (!softlimit) - softlimit = q->qi_rtbsoftlimit; + softlimit = defq->rtbsoftlimit; timer = be32_to_cpu(dqp->q_core.d_rtbtimer); warns = be16_to_cpu(dqp->q_core.d_rtbwarns); warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; @@ -672,10 +675,10 @@ xfs_trans_dqresv( warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); if (!hardlimit) - hardlimit = q->qi_ihardlimit; + hardlimit = defq->ihardlimit; softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); if (!softlimit) - softlimit = q->qi_isoftlimit; + softlimit = defq->isoftlimit; if (hardlimit && total_count > hardlimit) { xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1a2046275cdf..d7f37bfcbdce 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -70,7 +70,7 @@ extern int sysctl_protected_hardlinks; struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); -typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, +typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private); typedef void (dax_iodone_t)(struct buffer_head *bh_map, int uptodate); diff --git a/include/linux/quota.h b/include/linux/quota.h index b2505acfd3c0..fba92f5c1a63 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -425,6 +425,8 @@ struct quotactl_ops { int (*quota_sync)(struct super_block *, int); int (*set_info)(struct super_block *, int, struct qc_info *); int (*get_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); + int (*get_nextdqblk)(struct super_block *, struct kqid *, + struct qc_dqblk *); int (*set_dqblk)(struct super_block *, struct kqid, struct qc_dqblk *); int (*get_state)(struct super_block *, struct qc_state *); int (*rm_xquota)(struct super_block *, unsigned int); diff --git a/include/uapi/linux/dqblk_xfs.h b/include/uapi/linux/dqblk_xfs.h index dcd75cc26196..11b3b31faf14 100644 --- a/include/uapi/linux/dqblk_xfs.h +++ b/include/uapi/linux/dqblk_xfs.h @@ -39,6 +39,7 @@ #define Q_XQUOTARM XQM_CMD(6) /* free disk space used by dquots */ #define Q_XQUOTASYNC XQM_CMD(7) /* delalloc flush, updates dquots */ #define Q_XGETQSTATV XQM_CMD(8) /* newer version of get quota */ +#define Q_XGETNEXTQUOTA XQM_CMD(9) /* get disk limits and usage >= ID */ /* * fs_disk_quota structure: diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h index 9c95b2c1c88a..38baddb807f5 100644 --- a/include/uapi/linux/quota.h +++ b/include/uapi/linux/quota.h @@ -71,6 +71,7 @@ #define Q_SETINFO 0x800006 /* set information about quota files */ #define Q_GETQUOTA 0x800007 /* get user quota structure */ #define Q_SETQUOTA 0x800008 /* set user quota structure */ +#define Q_GETNEXTQUOTA 0x800009 /* get disk limits and usage >= ID */ /* Quota format type IDs */ #define QFMT_VFS_OLD 1 @@ -119,6 +120,19 @@ struct if_dqblk { __u32 dqb_valid; }; +struct if_nextdqblk { + __u64 dqb_bhardlimit; + __u64 dqb_bsoftlimit; + __u64 dqb_curspace; + __u64 dqb_ihardlimit; + __u64 dqb_isoftlimit; + __u64 dqb_curinodes; + __u64 dqb_btime; + __u64 dqb_itime; + __u32 dqb_valid; + __u32 dqb_id; +}; + /* * Structure used for setting quota information about file via quotactl * Following flags are used to specify which fields are valid |