From 04abeb699ddce800837c4039ea1cc7d4d139bb36 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Fri, 5 May 2023 13:40:00 -0700 Subject: f2fs: close unused open zones while mounting Zoned UFS allows only 6 open zones at the same time, so we need to take care of the count of open zones while mounting. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 53 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 22 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6db410f1bb8c..43d537d29b52 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4810,40 +4810,49 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, break; } - /* - * If last valid block is beyond the write pointer, report the - * inconsistency. This inconsistency does not cause write error - * because the zone will not be selected for write operation until - * it get discarded. Just report it. - */ - if (last_valid_block >= wp_block) { - f2fs_notice(sbi, "Valid block beyond write pointer: " - "valid block[0x%x,0x%x] wp[0x%x,0x%x]", - GET_SEGNO(sbi, last_valid_block), - GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), - wp_segno, wp_blkoff); + // The write pointer matches with the valid blocks + if (last_valid_block + 1 == wp_block) return 0; - } - /* - * If there is no valid block in the zone and if write pointer is - * not at zone start, reset the write pointer. - */ - if (last_valid_block + 1 == zone_block && zone->wp != zone->start) { + if (last_valid_block + 1 == zone_block) { + /* + * If there is no valid block in the zone and if write pointer + * is not at zone start, reset the write pointer. + */ f2fs_notice(sbi, "Zone without valid block has non-zero write " "pointer. Reset the write pointer: wp[0x%x,0x%x]", wp_segno, wp_blkoff); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, zone->len >> log_sectors_per_block); - if (ret) { + if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); - return ret; - } + + return ret; } - return 0; + /* + * If there are valid blocks and the write pointer doesn't + * match with them, we need to report the inconsistency and + * fill the zone till the end to close the zone. This inconsistency + * does not cause write error because the zone will not be selected + * for write operation until it get discarded. + */ + f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: " + "valid block[0x%x,0x%x] wp[0x%x,0x%x]", + GET_SEGNO(sbi, last_valid_block), + GET_BLKOFF_FROM_SEG0(sbi, last_valid_block), + wp_segno, wp_blkoff); + + ret = blkdev_issue_zeroout(fdev->bdev, zone->wp, + zone->len - (zone->wp - zone->start), + GFP_NOFS, 0); + if (ret) + f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)", + fdev->path, ret); + + return ret; } static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi, -- cgit From bfd476623999118d9c509cb0fa9380f2912bc225 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 23 May 2023 20:35:21 +0800 Subject: f2fs: clean up w/ sbi->log_sectors_per_block Use sbi->log_sectors_per_block to clean up below calculated one: unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 43d537d29b52..9282399cc810 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4768,17 +4768,17 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, { unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno; block_t zone_block, wp_block, last_valid_block; - unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; int i, s, b, ret; struct seg_entry *se; if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block); + wp_block = fdev->start_blk + (zone->wp >> sbi->log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block); + zone_block = fdev->start_blk + (zone->start >> + sbi->log_sectors_per_block); zone_segno = GET_SEGNO(sbi, zone_block); zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno); @@ -4824,7 +4824,7 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, "pointer. Reset the write pointer: wp[0x%x,0x%x]", wp_segno, wp_blkoff); ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block, - zone->len >> log_sectors_per_block); + zone->len >> sbi->log_sectors_per_block); if (ret) f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", fdev->path, ret); @@ -4885,7 +4885,6 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) struct blk_zone zone; unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off; block_t cs_zone_block, wp_block; - unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; sector_t zone_sector; int err; @@ -4897,8 +4896,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; /* report zone for the sector the curseg points to */ - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) - << log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << + sbi->log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4910,10 +4909,10 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ) return 0; - wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block); + wp_block = zbd->start_blk + (zone.wp >> sbi->log_sectors_per_block); wp_segno = GET_SEGNO(sbi, wp_block); wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); - wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0); + wp_sector_off = zone.wp & GENMASK(sbi->log_sectors_per_block - 1, 0); if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff && wp_sector_off == 0) @@ -4940,8 +4939,8 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) if (!zbd) return 0; - zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) - << log_sectors_per_block; + zone_sector = (sector_t)(cs_zone_block - zbd->start_blk) << + sbi->log_sectors_per_block; err = blkdev_report_zones(zbd->bdev, zone_sector, 1, report_one_zone_cb, &zone); if (err != 1) { @@ -4959,7 +4958,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) "Reset the zone: curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); err = __f2fs_issue_discard_zone(sbi, zbd->bdev, cs_zone_block, - zone.len >> log_sectors_per_block); + zone.len >> sbi->log_sectors_per_block); if (err) { f2fs_err(sbi, "Discard zone failed: %s (errno=%d)", zbd->path, err); -- cgit From 25f9080576b9549be9435123d7e45bfeebd2dc97 Mon Sep 17 00:00:00 2001 From: Daejun Park Date: Mon, 8 May 2023 17:10:42 +0900 Subject: f2fs: add async reset zone command support This patch enables submit reset zone command asynchornously. It helps decrease average latency of write IOs in high utilization scenario by faster checkpointing. Signed-off-by: Daejun Park Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/iostat.c | 1 + fs/f2fs/segment.c | 84 +++++++++++++++++++++++++++++++++++++++++++-- include/trace/events/f2fs.h | 24 +++++++++++-- 4 files changed, 104 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 86ac5f599575..4b249716ae7b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1176,6 +1176,7 @@ enum iostat_type { /* other */ FS_DISCARD_IO, /* discard */ FS_FLUSH_IO, /* flush */ + FS_ZONE_RESET_IO, /* zone reset */ NR_IO_TYPE, }; diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index 3d5bfb1ad585..f8703038e1d8 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -80,6 +80,7 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset) seq_puts(seq, "[OTHER]\n"); IOSTAT_INFO_SHOW("fs discard", FS_DISCARD_IO); IOSTAT_INFO_SHOW("fs flush", FS_FLUSH_IO); + IOSTAT_INFO_SHOW("fs zone reset", FS_ZONE_RESET_IO); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9282399cc810..0c0c033c4bdd 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1196,6 +1196,45 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, static void __update_discard_tree_range(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len); + +#ifdef CONFIG_BLK_DEV_ZONED +static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, blk_opf_t flag, + struct list_head *wait_list, + unsigned int *issued) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct block_device *bdev = dc->bdev; + struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS); + unsigned long flags; + + trace_f2fs_issue_reset_zone(bdev, dc->di.start); + + spin_lock_irqsave(&dc->lock, flags); + dc->state = D_SUBMIT; + dc->bio_ref++; + spin_unlock_irqrestore(&dc->lock, flags); + + if (issued) + (*issued)++; + + atomic_inc(&dcc->queued_discard); + dc->queued++; + list_move_tail(&dc->list, wait_list); + + /* sanity check on discard range */ + __check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len); + + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start); + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + submit_bio(bio); + + atomic_inc(&dcc->issued_discard); + f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE); +} +#endif + /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, @@ -1217,6 +1256,13 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) return 0; +#ifdef CONFIG_BLK_DEV_ZONED + if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) { + __submit_zone_reset_cmd(sbi, dc, flag, wait_list, issued); + return 0; + } +#endif + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; @@ -1461,6 +1507,19 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, } } +#ifdef CONFIG_BLK_DEV_ZONED +static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t lblkstart, + block_t blklen) +{ + trace_f2fs_queue_reset_zone(bdev, blkstart); + + mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock); + __insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); + mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock); +} +#endif + static void __queue_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { @@ -1724,6 +1783,19 @@ static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) mutex_lock(&dcc->cmd_lock); dc = __lookup_discard_cmd(sbi, blkaddr); +#ifdef CONFIG_BLK_DEV_ZONED + if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) { + /* force submit zone reset */ + if (dc->state == D_PREP) + __submit_zone_reset_cmd(sbi, dc, REQ_SYNC, + &dcc->wait_list, NULL); + dc->ref++; + mutex_unlock(&dcc->cmd_lock); + /* wait zone reset */ + __wait_one_discard_bio(sbi, dc); + return; + } +#endif if (dc) { if (dc->state == D_PREP) { __punch_discard_cmd(sbi, dc, blkaddr); @@ -1876,9 +1948,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, blkstart, blklen); return -EIO; } - trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, - sector, nr_sects, GFP_NOFS); + + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) { + trace_f2fs_issue_reset_zone(bdev, blkstart); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects, GFP_NOFS); + } + + __queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen); + return 0; } /* For conventional zones, use regular discard if supported */ diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 99cbc5949e3c..793f82cc1515 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1512,7 +1512,7 @@ DEFINE_EVENT(f2fs_discard, f2fs_remove_discard, TP_ARGS(dev, blkstart, blklen) ); -TRACE_EVENT(f2fs_issue_reset_zone, +DECLARE_EVENT_CLASS(f2fs_reset_zone, TP_PROTO(struct block_device *dev, block_t blkstart), @@ -1528,11 +1528,25 @@ TRACE_EVENT(f2fs_issue_reset_zone, __entry->blkstart = blkstart; ), - TP_printk("dev = (%d,%d), reset zone at block = 0x%llx", + TP_printk("dev = (%d,%d), zone at block = 0x%llx", show_dev(__entry->dev), (unsigned long long)__entry->blkstart) ); +DEFINE_EVENT(f2fs_reset_zone, f2fs_queue_reset_zone, + + TP_PROTO(struct block_device *dev, block_t blkstart), + + TP_ARGS(dev, blkstart) +); + +DEFINE_EVENT(f2fs_reset_zone, f2fs_issue_reset_zone, + + TP_PROTO(struct block_device *dev, block_t blkstart), + + TP_ARGS(dev, blkstart) +); + TRACE_EVENT(f2fs_issue_flush, TP_PROTO(struct block_device *dev, unsigned int nobarrier, @@ -1979,6 +1993,7 @@ TRACE_EVENT(f2fs_iostat, __field(unsigned long long, fs_nrio) __field(unsigned long long, fs_mrio) __field(unsigned long long, fs_discard) + __field(unsigned long long, fs_reset_zone) ), TP_fast_assign( @@ -2010,12 +2025,14 @@ TRACE_EVENT(f2fs_iostat, __entry->fs_nrio = iostat[FS_NODE_READ_IO]; __entry->fs_mrio = iostat[FS_META_READ_IO]; __entry->fs_discard = iostat[FS_DISCARD_IO]; + __entry->fs_reset_zone = iostat[FS_ZONE_RESET_IO]; ), TP_printk("dev = (%d,%d), " "app [write=%llu (direct=%llu, buffered=%llu), mapped=%llu, " "compr(buffered=%llu, mapped=%llu)], " - "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu], " + "fs [data=%llu, cdata=%llu, node=%llu, meta=%llu, discard=%llu, " + "reset_zone=%llu], " "gc [data=%llu, node=%llu], " "cp [data=%llu, node=%llu, meta=%llu], " "app [read=%llu (direct=%llu, buffered=%llu), mapped=%llu], " @@ -2026,6 +2043,7 @@ TRACE_EVENT(f2fs_iostat, __entry->app_bio, __entry->app_mio, __entry->app_bcdio, __entry->app_mcdio, __entry->fs_dio, __entry->fs_cdio, __entry->fs_nio, __entry->fs_mio, __entry->fs_discard, + __entry->fs_reset_zone, __entry->fs_gc_dio, __entry->fs_gc_nio, __entry->fs_cp_dio, __entry->fs_cp_nio, __entry->fs_cp_mio, __entry->app_rio, __entry->app_drio, __entry->app_brio, -- cgit From c9667b19e2cf13735fe2620f9d97b788897cd4af Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 12 Jun 2023 16:32:03 -0700 Subject: f2fs: check zone write pointer points to the end of zone We don't need to report an issue, when the zone write pointer already points to the end of the zone, since the zone mismatch is already taken care. Signed-off-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0c0c033c4bdd..8c7af8b4fc47 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4888,8 +4888,12 @@ static int check_zone_write_pointer(struct f2fs_sb_info *sbi, break; } - // The write pointer matches with the valid blocks - if (last_valid_block + 1 == wp_block) + /* + * The write pointer matches with the valid blocks or + * already points to the end of the zone. + */ + if ((last_valid_block + 1 == wp_block) || + (zone->wp == zone->start + zone->len)) return 0; if (last_valid_block + 1 == zone_block) { -- cgit From 9ac00e7cef106b66611e131f59f61f5ae35cf726 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 13 Jun 2023 13:35:31 -0700 Subject: f2fs: do not issue small discard commands during checkpoint If there're huge # of small discards, this will increase checkpoint latency insanely. Let's issue small discards only by trim. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs/segment.c') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8c7af8b4fc47..0457d620011f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2193,7 +2193,7 @@ find_next: len = next_pos - cur_pos; if (f2fs_sb_has_blkzoned(sbi) || - (force && len < cpc->trim_minlen)) + !force || len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, -- cgit