diff options
Diffstat (limited to 'drivers/lightnvm')
| -rw-r--r-- | drivers/lightnvm/Kconfig | 42 | ||||
| -rw-r--r-- | drivers/lightnvm/Makefile | 7 | ||||
| -rw-r--r-- | drivers/lightnvm/core.c | 829 | ||||
| -rw-r--r-- | drivers/lightnvm/gennvm.c | 518 | ||||
| -rw-r--r-- | drivers/lightnvm/gennvm.h | 48 | ||||
| -rw-r--r-- | drivers/lightnvm/rrpc.c | 1354 | ||||
| -rw-r--r-- | drivers/lightnvm/rrpc.h | 239 | 
7 files changed, 3037 insertions, 0 deletions
| diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig new file mode 100644 index 000000000000..a16bf56d3f28 --- /dev/null +++ b/drivers/lightnvm/Kconfig @@ -0,0 +1,42 @@ +# +# Open-Channel SSD NVM configuration +# + +menuconfig NVM +	bool "Open-Channel SSD target support" +	depends on BLOCK +	help +	  Say Y here to get to enable Open-channel SSDs. + +	  Open-Channel SSDs implement a set of extension to SSDs, that +	  exposes direct access to the underlying non-volatile memory. + +	  If you say N, all options in this submenu will be skipped and disabled +	  only do this if you know what you are doing. + +if NVM + +config NVM_DEBUG +	bool "Open-Channel SSD debugging support" +	---help--- +	Exposes a debug management interface to create/remove targets at: + +	  /sys/module/lnvm/parameters/configure_debug + +	It is required to create/remove targets without IOCTLs. + +config NVM_GENNVM +	tristate "Generic NVM manager for Open-Channel SSDs" +	---help--- +	NVM media manager for Open-Channel SSDs that offload management +	functionality to device, while keeping data placement and garbage +	collection decisions on the host. + +config NVM_RRPC +	tristate "Round-robin Hybrid Open-Channel SSD target" +	---help--- +	Allows an open-channel SSD to be exposed as a block device to the +	host. The target is implemented using a linear mapping table and +	cost-based garbage collection. It is optimized for 4K IO sizes. + +endif # NVM diff --git a/drivers/lightnvm/Makefile b/drivers/lightnvm/Makefile new file mode 100644 index 000000000000..7e0f42acb737 --- /dev/null +++ b/drivers/lightnvm/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for Open-Channel SSDs. +# + +obj-$(CONFIG_NVM)		:= core.o +obj-$(CONFIG_NVM_GENNVM) 	+= gennvm.o +obj-$(CONFIG_NVM_RRPC)		+= rrpc.o diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c new file mode 100644 index 000000000000..5178645ac42b --- /dev/null +++ b/drivers/lightnvm/core.c @@ -0,0 +1,829 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen. All rights reserved. + * Initial release: Matias Bjorling <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING.  If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + */ + +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/sem.h> +#include <linux/bitmap.h> +#include <linux/module.h> +#include <linux/miscdevice.h> +#include <linux/lightnvm.h> +#include <uapi/linux/lightnvm.h> + +static LIST_HEAD(nvm_targets); +static LIST_HEAD(nvm_mgrs); +static LIST_HEAD(nvm_devices); +static DECLARE_RWSEM(nvm_lock); + +static struct nvm_tgt_type *nvm_find_target_type(const char *name) +{ +	struct nvm_tgt_type *tt; + +	list_for_each_entry(tt, &nvm_targets, list) +		if (!strcmp(name, tt->name)) +			return tt; + +	return NULL; +} + +int nvm_register_target(struct nvm_tgt_type *tt) +{ +	int ret = 0; + +	down_write(&nvm_lock); +	if (nvm_find_target_type(tt->name)) +		ret = -EEXIST; +	else +		list_add(&tt->list, &nvm_targets); +	up_write(&nvm_lock); + +	return ret; +} +EXPORT_SYMBOL(nvm_register_target); + +void nvm_unregister_target(struct nvm_tgt_type *tt) +{ +	if (!tt) +		return; + +	down_write(&nvm_lock); +	list_del(&tt->list); +	up_write(&nvm_lock); +} +EXPORT_SYMBOL(nvm_unregister_target); + +void *nvm_dev_dma_alloc(struct nvm_dev *dev, gfp_t mem_flags, +							dma_addr_t *dma_handler) +{ +	return dev->ops->dev_dma_alloc(dev->q, dev->ppalist_pool, mem_flags, +								dma_handler); +} +EXPORT_SYMBOL(nvm_dev_dma_alloc); + +void nvm_dev_dma_free(struct nvm_dev *dev, void *ppa_list, +							dma_addr_t dma_handler) +{ +	dev->ops->dev_dma_free(dev->ppalist_pool, ppa_list, dma_handler); +} +EXPORT_SYMBOL(nvm_dev_dma_free); + +static struct nvmm_type *nvm_find_mgr_type(const char *name) +{ +	struct nvmm_type *mt; + +	list_for_each_entry(mt, &nvm_mgrs, list) +		if (!strcmp(name, mt->name)) +			return mt; + +	return NULL; +} + +int nvm_register_mgr(struct nvmm_type *mt) +{ +	int ret = 0; + +	down_write(&nvm_lock); +	if (nvm_find_mgr_type(mt->name)) +		ret = -EEXIST; +	else +		list_add(&mt->list, &nvm_mgrs); +	up_write(&nvm_lock); + +	return ret; +} +EXPORT_SYMBOL(nvm_register_mgr); + +void nvm_unregister_mgr(struct nvmm_type *mt) +{ +	if (!mt) +		return; + +	down_write(&nvm_lock); +	list_del(&mt->list); +	up_write(&nvm_lock); +} +EXPORT_SYMBOL(nvm_unregister_mgr); + +static struct nvm_dev *nvm_find_nvm_dev(const char *name) +{ +	struct nvm_dev *dev; + +	list_for_each_entry(dev, &nvm_devices, devices) +		if (!strcmp(name, dev->name)) +			return dev; + +	return NULL; +} + +struct nvm_block *nvm_get_blk(struct nvm_dev *dev, struct nvm_lun *lun, +							unsigned long flags) +{ +	return dev->mt->get_blk(dev, lun, flags); +} +EXPORT_SYMBOL(nvm_get_blk); + +/* Assumes that all valid pages have already been moved on release to bm */ +void nvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk) +{ +	return dev->mt->put_blk(dev, blk); +} +EXPORT_SYMBOL(nvm_put_blk); + +int nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) +{ +	return dev->mt->submit_io(dev, rqd); +} +EXPORT_SYMBOL(nvm_submit_io); + +int nvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk) +{ +	return dev->mt->erase_blk(dev, blk, 0); +} +EXPORT_SYMBOL(nvm_erase_blk); + +static int nvm_core_init(struct nvm_dev *dev) +{ +	struct nvm_id *id = &dev->identity; +	struct nvm_id_group *grp = &id->groups[0]; + +	/* device values */ +	dev->nr_chnls = grp->num_ch; +	dev->luns_per_chnl = grp->num_lun; +	dev->pgs_per_blk = grp->num_pg; +	dev->blks_per_lun = grp->num_blk; +	dev->nr_planes = grp->num_pln; +	dev->sec_size = grp->csecs; +	dev->oob_size = grp->sos; +	dev->sec_per_pg = grp->fpg_sz / grp->csecs; +	memcpy(&dev->ppaf, &id->ppaf, sizeof(struct nvm_addr_format)); + +	dev->plane_mode = NVM_PLANE_SINGLE; +	dev->max_rq_size = dev->ops->max_phys_sect * dev->sec_size; + +	if (grp->mtype != 0) { +		pr_err("nvm: memory type not supported\n"); +		return -EINVAL; +	} + +	if (grp->fmtype != 0 && grp->fmtype != 1) { +		pr_err("nvm: flash type not supported\n"); +		return -EINVAL; +	} + +	if (grp->mpos & 0x020202) +		dev->plane_mode = NVM_PLANE_DOUBLE; +	if (grp->mpos & 0x040404) +		dev->plane_mode = NVM_PLANE_QUAD; + +	/* calculated values */ +	dev->sec_per_pl = dev->sec_per_pg * dev->nr_planes; +	dev->sec_per_blk = dev->sec_per_pl * dev->pgs_per_blk; +	dev->sec_per_lun = dev->sec_per_blk * dev->blks_per_lun; +	dev->nr_luns = dev->luns_per_chnl * dev->nr_chnls; + +	dev->total_blocks = dev->nr_planes * +				dev->blks_per_lun * +				dev->luns_per_chnl * +				dev->nr_chnls; +	dev->total_pages = dev->total_blocks * dev->pgs_per_blk; +	INIT_LIST_HEAD(&dev->online_targets); + +	return 0; +} + +static void nvm_free(struct nvm_dev *dev) +{ +	if (!dev) +		return; + +	if (dev->mt) +		dev->mt->unregister_mgr(dev); +} + +static int nvm_init(struct nvm_dev *dev) +{ +	struct nvmm_type *mt; +	int ret = -EINVAL; + +	if (!dev->q || !dev->ops) +		return ret; + +	if (dev->ops->identity(dev->q, &dev->identity)) { +		pr_err("nvm: device could not be identified\n"); +		goto err; +	} + +	pr_debug("nvm: ver:%x nvm_vendor:%x groups:%u\n", +			dev->identity.ver_id, dev->identity.vmnt, +							dev->identity.cgrps); + +	if (dev->identity.ver_id != 1) { +		pr_err("nvm: device not supported by kernel."); +		goto err; +	} + +	if (dev->identity.cgrps != 1) { +		pr_err("nvm: only one group configuration supported."); +		goto err; +	} + +	ret = nvm_core_init(dev); +	if (ret) { +		pr_err("nvm: could not initialize core structures.\n"); +		goto err; +	} + +	/* register with device with a supported manager */ +	list_for_each_entry(mt, &nvm_mgrs, list) { +		ret = mt->register_mgr(dev); +		if (ret < 0) +			goto err; /* initialization failed */ +		if (ret > 0) { +			dev->mt = mt; +			break; /* successfully initialized */ +		} +	} + +	if (!ret) { +		pr_info("nvm: no compatible manager found.\n"); +		return 0; +	} + +	pr_info("nvm: registered %s [%u/%u/%u/%u/%u/%u]\n", +			dev->name, dev->sec_per_pg, dev->nr_planes, +			dev->pgs_per_blk, dev->blks_per_lun, dev->nr_luns, +			dev->nr_chnls); +	return 0; +err: +	pr_err("nvm: failed to initialize nvm\n"); +	return ret; +} + +static void nvm_exit(struct nvm_dev *dev) +{ +	if (dev->ppalist_pool) +		dev->ops->destroy_dma_pool(dev->ppalist_pool); +	nvm_free(dev); + +	pr_info("nvm: successfully unloaded\n"); +} + +int nvm_register(struct request_queue *q, char *disk_name, +							struct nvm_dev_ops *ops) +{ +	struct nvm_dev *dev; +	int ret; + +	if (!ops->identity) +		return -EINVAL; + +	dev = kzalloc(sizeof(struct nvm_dev), GFP_KERNEL); +	if (!dev) +		return -ENOMEM; + +	dev->q = q; +	dev->ops = ops; +	strncpy(dev->name, disk_name, DISK_NAME_LEN); + +	ret = nvm_init(dev); +	if (ret) +		goto err_init; + +	if (dev->ops->max_phys_sect > 1) { +		dev->ppalist_pool = dev->ops->create_dma_pool(dev->q, +								"ppalist"); +		if (!dev->ppalist_pool) { +			pr_err("nvm: could not create ppa pool\n"); +			ret = -ENOMEM; +			goto err_init; +		} +	} else if (dev->ops->max_phys_sect > 256) { +		pr_info("nvm: max sectors supported is 256.\n"); +		ret = -EINVAL; +		goto err_init; +	} + +	down_write(&nvm_lock); +	list_add(&dev->devices, &nvm_devices); +	up_write(&nvm_lock); + +	return 0; +err_init: +	kfree(dev); +	return ret; +} +EXPORT_SYMBOL(nvm_register); + +void nvm_unregister(char *disk_name) +{ +	struct nvm_dev *dev = nvm_find_nvm_dev(disk_name); + +	if (!dev) { +		pr_err("nvm: could not find device %s to unregister\n", +								disk_name); +		return; +	} + +	down_write(&nvm_lock); +	list_del(&dev->devices); +	up_write(&nvm_lock); + +	nvm_exit(dev); +	kfree(dev); +} +EXPORT_SYMBOL(nvm_unregister); + +static const struct block_device_operations nvm_fops = { +	.owner		= THIS_MODULE, +}; + +static int nvm_create_target(struct nvm_dev *dev, +						struct nvm_ioctl_create *create) +{ +	struct nvm_ioctl_create_simple *s = &create->conf.s; +	struct request_queue *tqueue; +	struct nvmm_type *mt; +	struct gendisk *tdisk; +	struct nvm_tgt_type *tt; +	struct nvm_target *t; +	void *targetdata; +	int ret = 0; + +	if (!dev->mt) { +		/* register with device with a supported NVM manager */ +		list_for_each_entry(mt, &nvm_mgrs, list) { +			ret = mt->register_mgr(dev); +			if (ret < 0) +				return ret; /* initialization failed */ +			if (ret > 0) { +				dev->mt = mt; +				break; /* successfully initialized */ +			} +		} + +		if (!ret) { +			pr_info("nvm: no compatible nvm manager found.\n"); +			return -ENODEV; +		} +	} + +	tt = nvm_find_target_type(create->tgttype); +	if (!tt) { +		pr_err("nvm: target type %s not found\n", create->tgttype); +		return -EINVAL; +	} + +	down_write(&nvm_lock); +	list_for_each_entry(t, &dev->online_targets, list) { +		if (!strcmp(create->tgtname, t->disk->disk_name)) { +			pr_err("nvm: target name already exists.\n"); +			up_write(&nvm_lock); +			return -EINVAL; +		} +	} +	up_write(&nvm_lock); + +	t = kmalloc(sizeof(struct nvm_target), GFP_KERNEL); +	if (!t) +		return -ENOMEM; + +	tqueue = blk_alloc_queue_node(GFP_KERNEL, dev->q->node); +	if (!tqueue) +		goto err_t; +	blk_queue_make_request(tqueue, tt->make_rq); + +	tdisk = alloc_disk(0); +	if (!tdisk) +		goto err_queue; + +	sprintf(tdisk->disk_name, "%s", create->tgtname); +	tdisk->flags = GENHD_FL_EXT_DEVT; +	tdisk->major = 0; +	tdisk->first_minor = 0; +	tdisk->fops = &nvm_fops; +	tdisk->queue = tqueue; + +	targetdata = tt->init(dev, tdisk, s->lun_begin, s->lun_end); +	if (IS_ERR(targetdata)) +		goto err_init; + +	tdisk->private_data = targetdata; +	tqueue->queuedata = targetdata; + +	blk_queue_max_hw_sectors(tqueue, 8 * dev->ops->max_phys_sect); + +	set_capacity(tdisk, tt->capacity(targetdata)); +	add_disk(tdisk); + +	t->type = tt; +	t->disk = tdisk; + +	down_write(&nvm_lock); +	list_add_tail(&t->list, &dev->online_targets); +	up_write(&nvm_lock); + +	return 0; +err_init: +	put_disk(tdisk); +err_queue: +	blk_cleanup_queue(tqueue); +err_t: +	kfree(t); +	return -ENOMEM; +} + +static void nvm_remove_target(struct nvm_target *t) +{ +	struct nvm_tgt_type *tt = t->type; +	struct gendisk *tdisk = t->disk; +	struct request_queue *q = tdisk->queue; + +	lockdep_assert_held(&nvm_lock); + +	del_gendisk(tdisk); +	blk_cleanup_queue(q); + +	if (tt->exit) +		tt->exit(tdisk->private_data); + +	put_disk(tdisk); + +	list_del(&t->list); +	kfree(t); +} + +static int __nvm_configure_create(struct nvm_ioctl_create *create) +{ +	struct nvm_dev *dev; +	struct nvm_ioctl_create_simple *s; + +	dev = nvm_find_nvm_dev(create->dev); +	if (!dev) { +		pr_err("nvm: device not found\n"); +		return -EINVAL; +	} + +	if (create->conf.type != NVM_CONFIG_TYPE_SIMPLE) { +		pr_err("nvm: config type not valid\n"); +		return -EINVAL; +	} +	s = &create->conf.s; + +	if (s->lun_begin > s->lun_end || s->lun_end > dev->nr_luns) { +		pr_err("nvm: lun out of bound (%u:%u > %u)\n", +			s->lun_begin, s->lun_end, dev->nr_luns); +		return -EINVAL; +	} + +	return nvm_create_target(dev, create); +} + +static int __nvm_configure_remove(struct nvm_ioctl_remove *remove) +{ +	struct nvm_target *t = NULL; +	struct nvm_dev *dev; +	int ret = -1; + +	down_write(&nvm_lock); +	list_for_each_entry(dev, &nvm_devices, devices) +		list_for_each_entry(t, &dev->online_targets, list) { +			if (!strcmp(remove->tgtname, t->disk->disk_name)) { +				nvm_remove_target(t); +				ret = 0; +				break; +			} +		} +	up_write(&nvm_lock); + +	if (ret) { +		pr_err("nvm: target \"%s\" doesn't exist.\n", remove->tgtname); +		return -EINVAL; +	} + +	return 0; +} + +#ifdef CONFIG_NVM_DEBUG +static int nvm_configure_show(const char *val) +{ +	struct nvm_dev *dev; +	char opcode, devname[DISK_NAME_LEN]; +	int ret; + +	ret = sscanf(val, "%c %32s", &opcode, devname); +	if (ret != 2) { +		pr_err("nvm: invalid command. Use \"opcode devicename\".\n"); +		return -EINVAL; +	} + +	dev = nvm_find_nvm_dev(devname); +	if (!dev) { +		pr_err("nvm: device not found\n"); +		return -EINVAL; +	} + +	if (!dev->mt) +		return 0; + +	dev->mt->lun_info_print(dev); + +	return 0; +} + +static int nvm_configure_remove(const char *val) +{ +	struct nvm_ioctl_remove remove; +	char opcode; +	int ret; + +	ret = sscanf(val, "%c %256s", &opcode, remove.tgtname); +	if (ret != 2) { +		pr_err("nvm: invalid command. Use \"d targetname\".\n"); +		return -EINVAL; +	} + +	remove.flags = 0; + +	return __nvm_configure_remove(&remove); +} + +static int nvm_configure_create(const char *val) +{ +	struct nvm_ioctl_create create; +	char opcode; +	int lun_begin, lun_end, ret; + +	ret = sscanf(val, "%c %256s %256s %48s %u:%u", &opcode, create.dev, +						create.tgtname, create.tgttype, +						&lun_begin, &lun_end); +	if (ret != 6) { +		pr_err("nvm: invalid command. Use \"opcode device name tgttype lun_begin:lun_end\".\n"); +		return -EINVAL; +	} + +	create.flags = 0; +	create.conf.type = NVM_CONFIG_TYPE_SIMPLE; +	create.conf.s.lun_begin = lun_begin; +	create.conf.s.lun_end = lun_end; + +	return __nvm_configure_create(&create); +} + + +/* Exposes administrative interface through /sys/module/lnvm/configure_by_str */ +static int nvm_configure_by_str_event(const char *val, +					const struct kernel_param *kp) +{ +	char opcode; +	int ret; + +	ret = sscanf(val, "%c", &opcode); +	if (ret != 1) { +		pr_err("nvm: string must have the format of \"cmd ...\"\n"); +		return -EINVAL; +	} + +	switch (opcode) { +	case 'a': +		return nvm_configure_create(val); +	case 'd': +		return nvm_configure_remove(val); +	case 's': +		return nvm_configure_show(val); +	default: +		pr_err("nvm: invalid command\n"); +		return -EINVAL; +	} + +	return 0; +} + +static int nvm_configure_get(char *buf, const struct kernel_param *kp) +{ +	int sz = 0; +	char *buf_start = buf; +	struct nvm_dev *dev; + +	buf += sprintf(buf, "available devices:\n"); +	down_write(&nvm_lock); +	list_for_each_entry(dev, &nvm_devices, devices) { +		if (sz > 4095 - DISK_NAME_LEN) +			break; +		buf += sprintf(buf, " %32s\n", dev->name); +	} +	up_write(&nvm_lock); + +	return buf - buf_start - 1; +} + +static const struct kernel_param_ops nvm_configure_by_str_event_param_ops = { +	.set	= nvm_configure_by_str_event, +	.get	= nvm_configure_get, +}; + +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX	"lnvm." + +module_param_cb(configure_debug, &nvm_configure_by_str_event_param_ops, NULL, +									0644); + +#endif /* CONFIG_NVM_DEBUG */ + +static long nvm_ioctl_info(struct file *file, void __user *arg) +{ +	struct nvm_ioctl_info *info; +	struct nvm_tgt_type *tt; +	int tgt_iter = 0; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	info = memdup_user(arg, sizeof(struct nvm_ioctl_info)); +	if (IS_ERR(info)) +		return -EFAULT; + +	info->version[0] = NVM_VERSION_MAJOR; +	info->version[1] = NVM_VERSION_MINOR; +	info->version[2] = NVM_VERSION_PATCH; + +	down_write(&nvm_lock); +	list_for_each_entry(tt, &nvm_targets, list) { +		struct nvm_ioctl_info_tgt *tgt = &info->tgts[tgt_iter]; + +		tgt->version[0] = tt->version[0]; +		tgt->version[1] = tt->version[1]; +		tgt->version[2] = tt->version[2]; +		strncpy(tgt->tgtname, tt->name, NVM_TTYPE_NAME_MAX); + +		tgt_iter++; +	} + +	info->tgtsize = tgt_iter; +	up_write(&nvm_lock); + +	if (copy_to_user(arg, info, sizeof(struct nvm_ioctl_info))) +		return -EFAULT; + +	kfree(info); +	return 0; +} + +static long nvm_ioctl_get_devices(struct file *file, void __user *arg) +{ +	struct nvm_ioctl_get_devices *devices; +	struct nvm_dev *dev; +	int i = 0; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	devices = kzalloc(sizeof(struct nvm_ioctl_get_devices), GFP_KERNEL); +	if (!devices) +		return -ENOMEM; + +	down_write(&nvm_lock); +	list_for_each_entry(dev, &nvm_devices, devices) { +		struct nvm_ioctl_device_info *info = &devices->info[i]; + +		sprintf(info->devname, "%s", dev->name); +		if (dev->mt) { +			info->bmversion[0] = dev->mt->version[0]; +			info->bmversion[1] = dev->mt->version[1]; +			info->bmversion[2] = dev->mt->version[2]; +			sprintf(info->bmname, "%s", dev->mt->name); +		} else { +			sprintf(info->bmname, "none"); +		} + +		i++; +		if (i > 31) { +			pr_err("nvm: max 31 devices can be reported.\n"); +			break; +		} +	} +	up_write(&nvm_lock); + +	devices->nr_devices = i; + +	if (copy_to_user(arg, devices, sizeof(struct nvm_ioctl_get_devices))) +		return -EFAULT; + +	kfree(devices); +	return 0; +} + +static long nvm_ioctl_dev_create(struct file *file, void __user *arg) +{ +	struct nvm_ioctl_create create; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	if (copy_from_user(&create, arg, sizeof(struct nvm_ioctl_create))) +		return -EFAULT; + +	create.dev[DISK_NAME_LEN - 1] = '\0'; +	create.tgttype[NVM_TTYPE_NAME_MAX - 1] = '\0'; +	create.tgtname[DISK_NAME_LEN - 1] = '\0'; + +	if (create.flags != 0) { +		pr_err("nvm: no flags supported\n"); +		return -EINVAL; +	} + +	return __nvm_configure_create(&create); +} + +static long nvm_ioctl_dev_remove(struct file *file, void __user *arg) +{ +	struct nvm_ioctl_remove remove; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	if (copy_from_user(&remove, arg, sizeof(struct nvm_ioctl_remove))) +		return -EFAULT; + +	remove.tgtname[DISK_NAME_LEN - 1] = '\0'; + +	if (remove.flags != 0) { +		pr_err("nvm: no flags supported\n"); +		return -EINVAL; +	} + +	return __nvm_configure_remove(&remove); +} + +static long nvm_ctl_ioctl(struct file *file, uint cmd, unsigned long arg) +{ +	void __user *argp = (void __user *)arg; + +	switch (cmd) { +	case NVM_INFO: +		return nvm_ioctl_info(file, argp); +	case NVM_GET_DEVICES: +		return nvm_ioctl_get_devices(file, argp); +	case NVM_DEV_CREATE: +		return nvm_ioctl_dev_create(file, argp); +	case NVM_DEV_REMOVE: +		return nvm_ioctl_dev_remove(file, argp); +	} +	return 0; +} + +static const struct file_operations _ctl_fops = { +	.open = nonseekable_open, +	.unlocked_ioctl = nvm_ctl_ioctl, +	.owner = THIS_MODULE, +	.llseek  = noop_llseek, +}; + +static struct miscdevice _nvm_misc = { +	.minor		= MISC_DYNAMIC_MINOR, +	.name		= "lightnvm", +	.nodename	= "lightnvm/control", +	.fops		= &_ctl_fops, +}; + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); + +static int __init nvm_mod_init(void) +{ +	int ret; + +	ret = misc_register(&_nvm_misc); +	if (ret) +		pr_err("nvm: misc_register failed for control device"); + +	return ret; +} + +static void __exit nvm_mod_exit(void) +{ +	misc_deregister(&_nvm_misc); +} + +MODULE_AUTHOR("Matias Bjorling <[email protected]>"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION("0.1"); +module_init(nvm_mod_init); +module_exit(nvm_mod_exit); diff --git a/drivers/lightnvm/gennvm.c b/drivers/lightnvm/gennvm.c new file mode 100644 index 000000000000..e20e74ec6b91 --- /dev/null +++ b/drivers/lightnvm/gennvm.c @@ -0,0 +1,518 @@ +/* + * Copyright (C) 2015 Matias Bjorling <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING.  If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, + * USA. + * + * Implementation of a generic nvm manager for Open-Channel SSDs. + */ + +#include "gennvm.h" + +static void gennvm_blocks_free(struct nvm_dev *dev) +{ +	struct gen_nvm *gn = dev->mp; +	struct gen_lun *lun; +	int i; + +	gennvm_for_each_lun(gn, lun, i) { +		if (!lun->vlun.blocks) +			break; +		vfree(lun->vlun.blocks); +	} +} + +static void gennvm_luns_free(struct nvm_dev *dev) +{ +	struct gen_nvm *gn = dev->mp; + +	kfree(gn->luns); +} + +static int gennvm_luns_init(struct nvm_dev *dev, struct gen_nvm *gn) +{ +	struct gen_lun *lun; +	int i; + +	gn->luns = kcalloc(dev->nr_luns, sizeof(struct gen_lun), GFP_KERNEL); +	if (!gn->luns) +		return -ENOMEM; + +	gennvm_for_each_lun(gn, lun, i) { +		spin_lock_init(&lun->vlun.lock); +		INIT_LIST_HEAD(&lun->free_list); +		INIT_LIST_HEAD(&lun->used_list); +		INIT_LIST_HEAD(&lun->bb_list); + +		lun->reserved_blocks = 2; /* for GC only */ +		lun->vlun.id = i; +		lun->vlun.lun_id = i % dev->luns_per_chnl; +		lun->vlun.chnl_id = i / dev->luns_per_chnl; +		lun->vlun.nr_free_blocks = dev->blks_per_lun; +		lun->vlun.nr_inuse_blocks = 0; +		lun->vlun.nr_bad_blocks = 0; +	} +	return 0; +} + +static int gennvm_block_bb(struct ppa_addr ppa, int nr_blocks, u8 *blks, +								void *private) +{ +	struct gen_nvm *gn = private; +	struct nvm_dev *dev = gn->dev; +	struct gen_lun *lun; +	struct nvm_block *blk; +	int i; + +	ppa = dev_to_generic_addr(gn->dev, ppa); +	lun = &gn->luns[(dev->nr_luns * ppa.g.ch) + ppa.g.lun]; + +	for (i = 0; i < nr_blocks; i++) { +		if (blks[i] == 0) +			continue; + +		blk = &lun->vlun.blocks[i]; +		if (!blk) { +			pr_err("gennvm: BB data is out of bounds.\n"); +			return -EINVAL; +		} + +		list_move_tail(&blk->list, &lun->bb_list); +		lun->vlun.nr_bad_blocks++; +	} + +	return 0; +} + +static int gennvm_block_map(u64 slba, u32 nlb, __le64 *entries, void *private) +{ +	struct nvm_dev *dev = private; +	struct gen_nvm *gn = dev->mp; +	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9); +	u64 elba = slba + nlb; +	struct gen_lun *lun; +	struct nvm_block *blk; +	u64 i; +	int lun_id; + +	if (unlikely(elba > dev->total_pages)) { +		pr_err("gennvm: L2P data from device is out of bounds!\n"); +		return -EINVAL; +	} + +	for (i = 0; i < nlb; i++) { +		u64 pba = le64_to_cpu(entries[i]); + +		if (unlikely(pba >= max_pages && pba != U64_MAX)) { +			pr_err("gennvm: L2P data entry is out of bounds!\n"); +			return -EINVAL; +		} + +		/* Address zero is a special one. The first page on a disk is +		 * protected. It often holds internal device boot +		 * information. +		 */ +		if (!pba) +			continue; + +		/* resolve block from physical address */ +		lun_id = div_u64(pba, dev->sec_per_lun); +		lun = &gn->luns[lun_id]; + +		/* Calculate block offset into lun */ +		pba = pba - (dev->sec_per_lun * lun_id); +		blk = &lun->vlun.blocks[div_u64(pba, dev->sec_per_blk)]; + +		if (!blk->type) { +			/* at this point, we don't know anything about the +			 * block. It's up to the FTL on top to re-etablish the +			 * block state +			 */ +			list_move_tail(&blk->list, &lun->used_list); +			blk->type = 1; +			lun->vlun.nr_free_blocks--; +			lun->vlun.nr_inuse_blocks++; +		} +	} + +	return 0; +} + +static int gennvm_blocks_init(struct nvm_dev *dev, struct gen_nvm *gn) +{ +	struct gen_lun *lun; +	struct nvm_block *block; +	sector_t lun_iter, blk_iter, cur_block_id = 0; +	int ret; + +	gennvm_for_each_lun(gn, lun, lun_iter) { +		lun->vlun.blocks = vzalloc(sizeof(struct nvm_block) * +							dev->blks_per_lun); +		if (!lun->vlun.blocks) +			return -ENOMEM; + +		for (blk_iter = 0; blk_iter < dev->blks_per_lun; blk_iter++) { +			block = &lun->vlun.blocks[blk_iter]; + +			INIT_LIST_HEAD(&block->list); + +			block->lun = &lun->vlun; +			block->id = cur_block_id++; + +			/* First block is reserved for device */ +			if (unlikely(lun_iter == 0 && blk_iter == 0)) { +				lun->vlun.nr_free_blocks--; +				continue; +			} + +			list_add_tail(&block->list, &lun->free_list); +		} + +		if (dev->ops->get_bb_tbl) { +			struct ppa_addr ppa; + +			ppa.ppa = 0; +			ppa.g.ch = lun->vlun.chnl_id; +			ppa.g.lun = lun->vlun.id; +			ppa = generic_to_dev_addr(dev, ppa); + +			ret = dev->ops->get_bb_tbl(dev->q, ppa, +						dev->blks_per_lun, +						gennvm_block_bb, gn); +			if (ret) +				pr_err("gennvm: could not read BB table\n"); +		} +	} + +	if (dev->ops->get_l2p_tbl) { +		ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages, +							gennvm_block_map, dev); +		if (ret) { +			pr_err("gennvm: could not read L2P table.\n"); +			pr_warn("gennvm: default block initialization"); +		} +	} + +	return 0; +} + +static int gennvm_register(struct nvm_dev *dev) +{ +	struct gen_nvm *gn; +	int ret; + +	gn = kzalloc(sizeof(struct gen_nvm), GFP_KERNEL); +	if (!gn) +		return -ENOMEM; + +	gn->dev = dev; +	gn->nr_luns = dev->nr_luns; +	dev->mp = gn; + +	ret = gennvm_luns_init(dev, gn); +	if (ret) { +		pr_err("gennvm: could not initialize luns\n"); +		goto err; +	} + +	ret = gennvm_blocks_init(dev, gn); +	if (ret) { +		pr_err("gennvm: could not initialize blocks\n"); +		goto err; +	} + +	return 1; +err: +	kfree(gn); +	return ret; +} + +static void gennvm_unregister(struct nvm_dev *dev) +{ +	gennvm_blocks_free(dev); +	gennvm_luns_free(dev); +	kfree(dev->mp); +	dev->mp = NULL; +} + +static struct nvm_block *gennvm_get_blk(struct nvm_dev *dev, +				struct nvm_lun *vlun, unsigned long flags) +{ +	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun); +	struct nvm_block *blk = NULL; +	int is_gc = flags & NVM_IOTYPE_GC; + +	spin_lock(&vlun->lock); + +	if (list_empty(&lun->free_list)) { +		pr_err_ratelimited("gennvm: lun %u have no free pages available", +								lun->vlun.id); +		spin_unlock(&vlun->lock); +		goto out; +	} + +	while (!is_gc && lun->vlun.nr_free_blocks < lun->reserved_blocks) { +		spin_unlock(&vlun->lock); +		goto out; +	} + +	blk = list_first_entry(&lun->free_list, struct nvm_block, list); +	list_move_tail(&blk->list, &lun->used_list); +	blk->type = 1; + +	lun->vlun.nr_free_blocks--; +	lun->vlun.nr_inuse_blocks++; + +	spin_unlock(&vlun->lock); +out: +	return blk; +} + +static void gennvm_put_blk(struct nvm_dev *dev, struct nvm_block *blk) +{ +	struct nvm_lun *vlun = blk->lun; +	struct gen_lun *lun = container_of(vlun, struct gen_lun, vlun); + +	spin_lock(&vlun->lock); + +	switch (blk->type) { +	case 1: +		list_move_tail(&blk->list, &lun->free_list); +		lun->vlun.nr_free_blocks++; +		lun->vlun.nr_inuse_blocks--; +		blk->type = 0; +		break; +	case 2: +		list_move_tail(&blk->list, &lun->bb_list); +		lun->vlun.nr_bad_blocks++; +		lun->vlun.nr_inuse_blocks--; +		break; +	default: +		WARN_ON_ONCE(1); +		pr_err("gennvm: erroneous block type (%lu -> %u)\n", +							blk->id, blk->type); +		list_move_tail(&blk->list, &lun->bb_list); +		lun->vlun.nr_bad_blocks++; +		lun->vlun.nr_inuse_blocks--; +	} + +	spin_unlock(&vlun->lock); +} + +static void gennvm_addr_to_generic_mode(struct nvm_dev *dev, struct nvm_rq *rqd) +{ +	int i; + +	if (rqd->nr_pages > 1) { +		for (i = 0; i < rqd->nr_pages; i++) +			rqd->ppa_list[i] = dev_to_generic_addr(dev, +							rqd->ppa_list[i]); +	} else { +		rqd->ppa_addr = dev_to_generic_addr(dev, rqd->ppa_addr); +	} +} + +static void gennvm_generic_to_addr_mode(struct nvm_dev *dev, struct nvm_rq *rqd) +{ +	int i; + +	if (rqd->nr_pages > 1) { +		for (i = 0; i < rqd->nr_pages; i++) +			rqd->ppa_list[i] = generic_to_dev_addr(dev, +							rqd->ppa_list[i]); +	} else { +		rqd->ppa_addr = generic_to_dev_addr(dev, rqd->ppa_addr); +	} +} + +static int gennvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd) +{ +	if (!dev->ops->submit_io) +		return 0; + +	/* Convert address space */ +	gennvm_generic_to_addr_mode(dev, rqd); + +	rqd->dev = dev; +	return dev->ops->submit_io(dev->q, rqd); +} + +static void gennvm_blk_set_type(struct nvm_dev *dev, struct ppa_addr *ppa, +								int type) +{ +	struct gen_nvm *gn = dev->mp; +	struct gen_lun *lun; +	struct nvm_block *blk; + +	if (unlikely(ppa->g.ch > dev->nr_chnls || +					ppa->g.lun > dev->luns_per_chnl || +					ppa->g.blk > dev->blks_per_lun)) { +		WARN_ON_ONCE(1); +		pr_err("gennvm: ppa broken (ch: %u > %u lun: %u > %u blk: %u > %u", +				ppa->g.ch, dev->nr_chnls, +				ppa->g.lun, dev->luns_per_chnl, +				ppa->g.blk, dev->blks_per_lun); +		return; +	} + +	lun = &gn->luns[ppa->g.lun * ppa->g.ch]; +	blk = &lun->vlun.blocks[ppa->g.blk]; + +	/* will be moved to bb list on put_blk from target */ +	blk->type = type; +} + +/* mark block bad. It is expected the target recover from the error. */ +static void gennvm_mark_blk_bad(struct nvm_dev *dev, struct nvm_rq *rqd) +{ +	int i; + +	if (!dev->ops->set_bb_tbl) +		return; + +	if (dev->ops->set_bb_tbl(dev->q, rqd, 1)) +		return; + +	gennvm_addr_to_generic_mode(dev, rqd); + +	/* look up blocks and mark them as bad */ +	if (rqd->nr_pages > 1) +		for (i = 0; i < rqd->nr_pages; i++) +			gennvm_blk_set_type(dev, &rqd->ppa_list[i], 2); +	else +		gennvm_blk_set_type(dev, &rqd->ppa_addr, 2); +} + +static int gennvm_end_io(struct nvm_rq *rqd, int error) +{ +	struct nvm_tgt_instance *ins = rqd->ins; +	int ret = 0; + +	switch (error) { +	case NVM_RSP_SUCCESS: +		break; +	case NVM_RSP_ERR_EMPTYPAGE: +		break; +	case NVM_RSP_ERR_FAILWRITE: +		gennvm_mark_blk_bad(rqd->dev, rqd); +	default: +		ret++; +	} + +	ret += ins->tt->end_io(rqd, error); + +	return ret; +} + +static int gennvm_erase_blk(struct nvm_dev *dev, struct nvm_block *blk, +							unsigned long flags) +{ +	int plane_cnt = 0, pl_idx, ret; +	struct ppa_addr addr; +	struct nvm_rq rqd; + +	if (!dev->ops->erase_block) +		return 0; + +	addr = block_to_ppa(dev, blk); + +	if (dev->plane_mode == NVM_PLANE_SINGLE) { +		rqd.nr_pages = 1; +		rqd.ppa_addr = addr; +	} else { +		plane_cnt = (1 << dev->plane_mode); +		rqd.nr_pages = plane_cnt; + +		rqd.ppa_list = nvm_dev_dma_alloc(dev, GFP_KERNEL, +							&rqd.dma_ppa_list); +		if (!rqd.ppa_list) { +			pr_err("gennvm: failed to allocate dma memory\n"); +			return -ENOMEM; +		} + +		for (pl_idx = 0; pl_idx < plane_cnt; pl_idx++) { +			addr.g.pl = pl_idx; +			rqd.ppa_list[pl_idx] = addr; +		} +	} + +	gennvm_generic_to_addr_mode(dev, &rqd); + +	ret = dev->ops->erase_block(dev->q, &rqd); + +	if (plane_cnt) +		nvm_dev_dma_free(dev, rqd.ppa_list, rqd.dma_ppa_list); + +	return ret; +} + +static struct nvm_lun *gennvm_get_lun(struct nvm_dev *dev, int lunid) +{ +	struct gen_nvm *gn = dev->mp; + +	return &gn->luns[lunid].vlun; +} + +static void gennvm_lun_info_print(struct nvm_dev *dev) +{ +	struct gen_nvm *gn = dev->mp; +	struct gen_lun *lun; +	unsigned int i; + + +	gennvm_for_each_lun(gn, lun, i) { +		spin_lock(&lun->vlun.lock); + +		pr_info("%s: lun%8u\t%u\t%u\t%u\n", +				dev->name, i, +				lun->vlun.nr_free_blocks, +				lun->vlun.nr_inuse_blocks, +				lun->vlun.nr_bad_blocks); + +		spin_unlock(&lun->vlun.lock); +	} +} + +static struct nvmm_type gennvm = { +	.name		= "gennvm", +	.version	= {0, 1, 0}, + +	.register_mgr	= gennvm_register, +	.unregister_mgr	= gennvm_unregister, + +	.get_blk	= gennvm_get_blk, +	.put_blk	= gennvm_put_blk, + +	.submit_io	= gennvm_submit_io, +	.end_io		= gennvm_end_io, +	.erase_blk	= gennvm_erase_blk, + +	.get_lun	= gennvm_get_lun, +	.lun_info_print = gennvm_lun_info_print, +}; + +static int __init gennvm_module_init(void) +{ +	return nvm_register_mgr(&gennvm); +} + +static void gennvm_module_exit(void) +{ +	nvm_unregister_mgr(&gennvm); +} + +module_init(gennvm_module_init); +module_exit(gennvm_module_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Generic media manager for Open-Channel SSDs"); diff --git a/drivers/lightnvm/gennvm.h b/drivers/lightnvm/gennvm.h new file mode 100644 index 000000000000..9c24b5b32dac --- /dev/null +++ b/drivers/lightnvm/gennvm.h @@ -0,0 +1,48 @@ +/* + * Copyright: Matias Bjorling <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + */ + +#ifndef GENNVM_H_ +#define GENNVM_H_ + +#include <linux/module.h> +#include <linux/vmalloc.h> + +#include <linux/lightnvm.h> + +struct gen_lun { +	struct nvm_lun vlun; + +	int reserved_blocks; +	/* lun block lists */ +	struct list_head used_list;	/* In-use blocks */ +	struct list_head free_list;	/* Not used blocks i.e. released +					 * and ready for use +					 */ +	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with +					 * free_list and used_list +					 */ +}; + +struct gen_nvm { +	struct nvm_dev *dev; + +	int nr_luns; +	struct gen_lun *luns; +}; + +#define gennvm_for_each_lun(bm, lun, i) \ +		for ((i) = 0, lun = &(bm)->luns[0]; \ +			(i) < (bm)->nr_luns; (i)++, lun = &(bm)->luns[(i)]) + +#endif /* GENNVM_H_ */ diff --git a/drivers/lightnvm/rrpc.c b/drivers/lightnvm/rrpc.c new file mode 100644 index 000000000000..75e59c3a3f96 --- /dev/null +++ b/drivers/lightnvm/rrpc.c @@ -0,0 +1,1354 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen + * Initial release: Matias Bjorling <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs. + */ + +#include "rrpc.h" + +static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache; +static DECLARE_RWSEM(rrpc_lock); + +static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, +				struct nvm_rq *rqd, unsigned long flags); + +#define rrpc_for_each_lun(rrpc, rlun, i) \ +		for ((i) = 0, rlun = &(rrpc)->luns[0]; \ +			(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)]) + +static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a) +{ +	struct rrpc_block *rblk = a->rblk; +	unsigned int pg_offset; + +	lockdep_assert_held(&rrpc->rev_lock); + +	if (a->addr == ADDR_EMPTY || !rblk) +		return; + +	spin_lock(&rblk->lock); + +	div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset); +	WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages)); +	rblk->nr_invalid_pages++; + +	spin_unlock(&rblk->lock); + +	rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY; +} + +static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba, +								unsigned len) +{ +	sector_t i; + +	spin_lock(&rrpc->rev_lock); +	for (i = slba; i < slba + len; i++) { +		struct rrpc_addr *gp = &rrpc->trans_map[i]; + +		rrpc_page_invalidate(rrpc, gp); +		gp->rblk = NULL; +	} +	spin_unlock(&rrpc->rev_lock); +} + +static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc, +					sector_t laddr, unsigned int pages) +{ +	struct nvm_rq *rqd; +	struct rrpc_inflight_rq *inf; + +	rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC); +	if (!rqd) +		return ERR_PTR(-ENOMEM); + +	inf = rrpc_get_inflight_rq(rqd); +	if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) { +		mempool_free(rqd, rrpc->rq_pool); +		return NULL; +	} + +	return rqd; +} + +static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd) +{ +	struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd); + +	rrpc_unlock_laddr(rrpc, inf); + +	mempool_free(rqd, rrpc->rq_pool); +} + +static void rrpc_discard(struct rrpc *rrpc, struct bio *bio) +{ +	sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG; +	sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE; +	struct nvm_rq *rqd; + +	do { +		rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len); +		schedule(); +	} while (!rqd); + +	if (IS_ERR(rqd)) { +		pr_err("rrpc: unable to acquire inflight IO\n"); +		bio_io_error(bio); +		return; +	} + +	rrpc_invalidate_range(rrpc, slba, len); +	rrpc_inflight_laddr_release(rrpc, rqd); +} + +static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	return (rblk->next_page == rrpc->dev->pgs_per_blk); +} + +static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	struct nvm_block *blk = rblk->parent; + +	return blk->id * rrpc->dev->pgs_per_blk; +} + +static struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev, +							struct ppa_addr r) +{ +	struct ppa_addr l; +	int secs, pgs, blks, luns; +	sector_t ppa = r.ppa; + +	l.ppa = 0; + +	div_u64_rem(ppa, dev->sec_per_pg, &secs); +	l.g.sec = secs; + +	sector_div(ppa, dev->sec_per_pg); +	div_u64_rem(ppa, dev->sec_per_blk, &pgs); +	l.g.pg = pgs; + +	sector_div(ppa, dev->pgs_per_blk); +	div_u64_rem(ppa, dev->blks_per_lun, &blks); +	l.g.blk = blks; + +	sector_div(ppa, dev->blks_per_lun); +	div_u64_rem(ppa, dev->luns_per_chnl, &luns); +	l.g.lun = luns; + +	sector_div(ppa, dev->luns_per_chnl); +	l.g.ch = ppa; + +	return l; +} + +static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr) +{ +	struct ppa_addr paddr; + +	paddr.ppa = addr; +	return linear_to_generic_addr(dev, paddr); +} + +/* requires lun->lock taken */ +static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk) +{ +	struct rrpc *rrpc = rlun->rrpc; + +	BUG_ON(!rblk); + +	if (rlun->cur) { +		spin_lock(&rlun->cur->lock); +		WARN_ON(!block_is_full(rrpc, rlun->cur)); +		spin_unlock(&rlun->cur->lock); +	} +	rlun->cur = rblk; +} + +static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun, +							unsigned long flags) +{ +	struct nvm_block *blk; +	struct rrpc_block *rblk; + +	blk = nvm_get_blk(rrpc->dev, rlun->parent, 0); +	if (!blk) +		return NULL; + +	rblk = &rlun->blocks[blk->id]; +	blk->priv = rblk; + +	bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk); +	rblk->next_page = 0; +	rblk->nr_invalid_pages = 0; +	atomic_set(&rblk->data_cmnt_size, 0); + +	return rblk; +} + +static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	nvm_put_blk(rrpc->dev, rblk->parent); +} + +static struct rrpc_lun *get_next_lun(struct rrpc *rrpc) +{ +	int next = atomic_inc_return(&rrpc->next_lun); + +	return &rrpc->luns[next % rrpc->nr_luns]; +} + +static void rrpc_gc_kick(struct rrpc *rrpc) +{ +	struct rrpc_lun *rlun; +	unsigned int i; + +	for (i = 0; i < rrpc->nr_luns; i++) { +		rlun = &rrpc->luns[i]; +		queue_work(rrpc->krqd_wq, &rlun->ws_gc); +	} +} + +/* + * timed GC every interval. + */ +static void rrpc_gc_timer(unsigned long data) +{ +	struct rrpc *rrpc = (struct rrpc *)data; + +	rrpc_gc_kick(rrpc); +	mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10)); +} + +static void rrpc_end_sync_bio(struct bio *bio) +{ +	struct completion *waiting = bio->bi_private; + +	if (bio->bi_error) +		pr_err("nvm: gc request failed (%u).\n", bio->bi_error); + +	complete(waiting); +} + +/* + * rrpc_move_valid_pages -- migrate live data off the block + * @rrpc: the 'rrpc' structure + * @block: the block from which to migrate live pages + * + * Description: + *   GC algorithms may call this function to migrate remaining live + *   pages off the block prior to erasing it. This function blocks + *   further execution until the operation is complete. + */ +static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	struct request_queue *q = rrpc->dev->q; +	struct rrpc_rev_addr *rev; +	struct nvm_rq *rqd; +	struct bio *bio; +	struct page *page; +	int slot; +	int nr_pgs_per_blk = rrpc->dev->pgs_per_blk; +	u64 phys_addr; +	DECLARE_COMPLETION_ONSTACK(wait); + +	if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) +		return 0; + +	bio = bio_alloc(GFP_NOIO, 1); +	if (!bio) { +		pr_err("nvm: could not alloc bio to gc\n"); +		return -ENOMEM; +	} + +	page = mempool_alloc(rrpc->page_pool, GFP_NOIO); + +	while ((slot = find_first_zero_bit(rblk->invalid_pages, +					    nr_pgs_per_blk)) < nr_pgs_per_blk) { + +		/* Lock laddr */ +		phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot; + +try: +		spin_lock(&rrpc->rev_lock); +		/* Get logical address from physical to logical table */ +		rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset]; +		/* already updated by previous regular write */ +		if (rev->addr == ADDR_EMPTY) { +			spin_unlock(&rrpc->rev_lock); +			continue; +		} + +		rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1); +		if (IS_ERR_OR_NULL(rqd)) { +			spin_unlock(&rrpc->rev_lock); +			schedule(); +			goto try; +		} + +		spin_unlock(&rrpc->rev_lock); + +		/* Perform read to do GC */ +		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr); +		bio->bi_rw = READ; +		bio->bi_private = &wait; +		bio->bi_end_io = rrpc_end_sync_bio; + +		/* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */ +		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); + +		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) { +			pr_err("rrpc: gc read failed.\n"); +			rrpc_inflight_laddr_release(rrpc, rqd); +			goto finished; +		} +		wait_for_completion_io(&wait); + +		bio_reset(bio); +		reinit_completion(&wait); + +		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr); +		bio->bi_rw = WRITE; +		bio->bi_private = &wait; +		bio->bi_end_io = rrpc_end_sync_bio; + +		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0); + +		/* turn the command around and write the data back to a new +		 * address +		 */ +		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) { +			pr_err("rrpc: gc write failed.\n"); +			rrpc_inflight_laddr_release(rrpc, rqd); +			goto finished; +		} +		wait_for_completion_io(&wait); + +		rrpc_inflight_laddr_release(rrpc, rqd); + +		bio_reset(bio); +	} + +finished: +	mempool_free(page, rrpc->page_pool); +	bio_put(bio); + +	if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) { +		pr_err("nvm: failed to garbage collect block\n"); +		return -EIO; +	} + +	return 0; +} + +static void rrpc_block_gc(struct work_struct *work) +{ +	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, +									ws_gc); +	struct rrpc *rrpc = gcb->rrpc; +	struct rrpc_block *rblk = gcb->rblk; +	struct nvm_dev *dev = rrpc->dev; + +	pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id); + +	if (rrpc_move_valid_pages(rrpc, rblk)) +		goto done; + +	nvm_erase_blk(dev, rblk->parent); +	rrpc_put_blk(rrpc, rblk); +done: +	mempool_free(gcb, rrpc->gcb_pool); +} + +/* the block with highest number of invalid pages, will be in the beginning + * of the list + */ +static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra, +							struct rrpc_block *rb) +{ +	if (ra->nr_invalid_pages == rb->nr_invalid_pages) +		return ra; + +	return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra; +} + +/* linearly find the block with highest number of invalid pages + * requires lun->lock + */ +static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun) +{ +	struct list_head *prio_list = &rlun->prio_list; +	struct rrpc_block *rblock, *max; + +	BUG_ON(list_empty(prio_list)); + +	max = list_first_entry(prio_list, struct rrpc_block, prio); +	list_for_each_entry(rblock, prio_list, prio) +		max = rblock_max_invalid(max, rblock); + +	return max; +} + +static void rrpc_lun_gc(struct work_struct *work) +{ +	struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc); +	struct rrpc *rrpc = rlun->rrpc; +	struct nvm_lun *lun = rlun->parent; +	struct rrpc_block_gc *gcb; +	unsigned int nr_blocks_need; + +	nr_blocks_need = rrpc->dev->blks_per_lun / GC_LIMIT_INVERSE; + +	if (nr_blocks_need < rrpc->nr_luns) +		nr_blocks_need = rrpc->nr_luns; + +	spin_lock(&lun->lock); +	while (nr_blocks_need > lun->nr_free_blocks && +					!list_empty(&rlun->prio_list)) { +		struct rrpc_block *rblock = block_prio_find_max(rlun); +		struct nvm_block *block = rblock->parent; + +		if (!rblock->nr_invalid_pages) +			break; + +		list_del_init(&rblock->prio); + +		BUG_ON(!block_is_full(rrpc, rblock)); + +		pr_debug("rrpc: selected block '%lu' for GC\n", block->id); + +		gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); +		if (!gcb) +			break; + +		gcb->rrpc = rrpc; +		gcb->rblk = rblock; +		INIT_WORK(&gcb->ws_gc, rrpc_block_gc); + +		queue_work(rrpc->kgc_wq, &gcb->ws_gc); + +		nr_blocks_need--; +	} +	spin_unlock(&lun->lock); + +	/* TODO: Hint that request queue can be started again */ +} + +static void rrpc_gc_queue(struct work_struct *work) +{ +	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc, +									ws_gc); +	struct rrpc *rrpc = gcb->rrpc; +	struct rrpc_block *rblk = gcb->rblk; +	struct nvm_lun *lun = rblk->parent->lun; +	struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset]; + +	spin_lock(&rlun->lock); +	list_add_tail(&rblk->prio, &rlun->prio_list); +	spin_unlock(&rlun->lock); + +	mempool_free(gcb, rrpc->gcb_pool); +	pr_debug("nvm: block '%lu' is full, allow GC (sched)\n", +							rblk->parent->id); +} + +static const struct block_device_operations rrpc_fops = { +	.owner		= THIS_MODULE, +}; + +static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc) +{ +	unsigned int i; +	struct rrpc_lun *rlun, *max_free; + +	if (!is_gc) +		return get_next_lun(rrpc); + +	/* during GC, we don't care about RR, instead we want to make +	 * sure that we maintain evenness between the block luns. +	 */ +	max_free = &rrpc->luns[0]; +	/* prevent GC-ing lun from devouring pages of a lun with +	 * little free blocks. We don't take the lock as we only need an +	 * estimate. +	 */ +	rrpc_for_each_lun(rrpc, rlun, i) { +		if (rlun->parent->nr_free_blocks > +					max_free->parent->nr_free_blocks) +			max_free = rlun; +	} + +	return max_free; +} + +static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr, +					struct rrpc_block *rblk, u64 paddr) +{ +	struct rrpc_addr *gp; +	struct rrpc_rev_addr *rev; + +	BUG_ON(laddr >= rrpc->nr_pages); + +	gp = &rrpc->trans_map[laddr]; +	spin_lock(&rrpc->rev_lock); +	if (gp->rblk) +		rrpc_page_invalidate(rrpc, gp); + +	gp->addr = paddr; +	gp->rblk = rblk; + +	rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset]; +	rev->addr = laddr; +	spin_unlock(&rrpc->rev_lock); + +	return gp; +} + +static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	u64 addr = ADDR_EMPTY; + +	spin_lock(&rblk->lock); +	if (block_is_full(rrpc, rblk)) +		goto out; + +	addr = block_to_addr(rrpc, rblk) + rblk->next_page; + +	rblk->next_page++; +out: +	spin_unlock(&rblk->lock); +	return addr; +} + +/* Simple round-robin Logical to physical address translation. + * + * Retrieve the mapping using the active append point. Then update the ap for + * the next write to the disk. + * + * Returns rrpc_addr with the physical address and block. Remember to return to + * rrpc->addr_cache when request is finished. + */ +static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr, +								int is_gc) +{ +	struct rrpc_lun *rlun; +	struct rrpc_block *rblk; +	struct nvm_lun *lun; +	u64 paddr; + +	rlun = rrpc_get_lun_rr(rrpc, is_gc); +	lun = rlun->parent; + +	if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4) +		return NULL; + +	spin_lock(&rlun->lock); + +	rblk = rlun->cur; +retry: +	paddr = rrpc_alloc_addr(rrpc, rblk); + +	if (paddr == ADDR_EMPTY) { +		rblk = rrpc_get_blk(rrpc, rlun, 0); +		if (rblk) { +			rrpc_set_lun_cur(rlun, rblk); +			goto retry; +		} + +		if (is_gc) { +			/* retry from emergency gc block */ +			paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur); +			if (paddr == ADDR_EMPTY) { +				rblk = rrpc_get_blk(rrpc, rlun, 1); +				if (!rblk) { +					pr_err("rrpc: no more blocks"); +					goto err; +				} + +				rlun->gc_cur = rblk; +				paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur); +			} +			rblk = rlun->gc_cur; +		} +	} + +	spin_unlock(&rlun->lock); +	return rrpc_update_map(rrpc, laddr, rblk, paddr); +err: +	spin_unlock(&rlun->lock); +	return NULL; +} + +static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	struct rrpc_block_gc *gcb; + +	gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC); +	if (!gcb) { +		pr_err("rrpc: unable to queue block for gc."); +		return; +	} + +	gcb->rrpc = rrpc; +	gcb->rblk = rblk; + +	INIT_WORK(&gcb->ws_gc, rrpc_gc_queue); +	queue_work(rrpc->kgc_wq, &gcb->ws_gc); +} + +static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd, +						sector_t laddr, uint8_t npages) +{ +	struct rrpc_addr *p; +	struct rrpc_block *rblk; +	struct nvm_lun *lun; +	int cmnt_size, i; + +	for (i = 0; i < npages; i++) { +		p = &rrpc->trans_map[laddr + i]; +		rblk = p->rblk; +		lun = rblk->parent->lun; + +		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size); +		if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk)) +			rrpc_run_gc(rrpc, rblk); +	} +} + +static int rrpc_end_io(struct nvm_rq *rqd, int error) +{ +	struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance); +	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); +	uint8_t npages = rqd->nr_pages; +	sector_t laddr = rrpc_get_laddr(rqd->bio) - npages; + +	if (bio_data_dir(rqd->bio) == WRITE) +		rrpc_end_io_write(rrpc, rrqd, laddr, npages); + +	if (rrqd->flags & NVM_IOTYPE_GC) +		return 0; + +	rrpc_unlock_rq(rrpc, rqd); +	bio_put(rqd->bio); + +	if (npages > 1) +		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); +	if (rqd->metadata) +		nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata); + +	mempool_free(rqd, rrpc->rq_pool); + +	return 0; +} + +static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio, +			struct nvm_rq *rqd, unsigned long flags, int npages) +{ +	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); +	struct rrpc_addr *gp; +	sector_t laddr = rrpc_get_laddr(bio); +	int is_gc = flags & NVM_IOTYPE_GC; +	int i; + +	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) { +		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); +		return NVM_IO_REQUEUE; +	} + +	for (i = 0; i < npages; i++) { +		/* We assume that mapping occurs at 4KB granularity */ +		BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages)); +		gp = &rrpc->trans_map[laddr + i]; + +		if (gp->rblk) { +			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, +								gp->addr); +		} else { +			BUG_ON(is_gc); +			rrpc_unlock_laddr(rrpc, r); +			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, +							rqd->dma_ppa_list); +			return NVM_IO_DONE; +		} +	} + +	rqd->opcode = NVM_OP_HBREAD; + +	return NVM_IO_OK; +} + +static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd, +							unsigned long flags) +{ +	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); +	int is_gc = flags & NVM_IOTYPE_GC; +	sector_t laddr = rrpc_get_laddr(bio); +	struct rrpc_addr *gp; + +	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) +		return NVM_IO_REQUEUE; + +	BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages)); +	gp = &rrpc->trans_map[laddr]; + +	if (gp->rblk) { +		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr); +	} else { +		BUG_ON(is_gc); +		rrpc_unlock_rq(rrpc, rqd); +		return NVM_IO_DONE; +	} + +	rqd->opcode = NVM_OP_HBREAD; +	rrqd->addr = gp; + +	return NVM_IO_OK; +} + +static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio, +			struct nvm_rq *rqd, unsigned long flags, int npages) +{ +	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); +	struct rrpc_addr *p; +	sector_t laddr = rrpc_get_laddr(bio); +	int is_gc = flags & NVM_IOTYPE_GC; +	int i; + +	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) { +		nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list); +		return NVM_IO_REQUEUE; +	} + +	for (i = 0; i < npages; i++) { +		/* We assume that mapping occurs at 4KB granularity */ +		p = rrpc_map_page(rrpc, laddr + i, is_gc); +		if (!p) { +			BUG_ON(is_gc); +			rrpc_unlock_laddr(rrpc, r); +			nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, +							rqd->dma_ppa_list); +			rrpc_gc_kick(rrpc); +			return NVM_IO_REQUEUE; +		} + +		rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev, +								p->addr); +	} + +	rqd->opcode = NVM_OP_HBWRITE; + +	return NVM_IO_OK; +} + +static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio, +				struct nvm_rq *rqd, unsigned long flags) +{ +	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); +	struct rrpc_addr *p; +	int is_gc = flags & NVM_IOTYPE_GC; +	sector_t laddr = rrpc_get_laddr(bio); + +	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) +		return NVM_IO_REQUEUE; + +	p = rrpc_map_page(rrpc, laddr, is_gc); +	if (!p) { +		BUG_ON(is_gc); +		rrpc_unlock_rq(rrpc, rqd); +		rrpc_gc_kick(rrpc); +		return NVM_IO_REQUEUE; +	} + +	rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, p->addr); +	rqd->opcode = NVM_OP_HBWRITE; +	rrqd->addr = p; + +	return NVM_IO_OK; +} + +static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio, +			struct nvm_rq *rqd, unsigned long flags, uint8_t npages) +{ +	if (npages > 1) { +		rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL, +							&rqd->dma_ppa_list); +		if (!rqd->ppa_list) { +			pr_err("rrpc: not able to allocate ppa list\n"); +			return NVM_IO_ERR; +		} + +		if (bio_rw(bio) == WRITE) +			return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags, +									npages); + +		return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages); +	} + +	if (bio_rw(bio) == WRITE) +		return rrpc_write_rq(rrpc, bio, rqd, flags); + +	return rrpc_read_rq(rrpc, bio, rqd, flags); +} + +static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio, +				struct nvm_rq *rqd, unsigned long flags) +{ +	int err; +	struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd); +	uint8_t nr_pages = rrpc_get_pages(bio); +	int bio_size = bio_sectors(bio) << 9; + +	if (bio_size < rrpc->dev->sec_size) +		return NVM_IO_ERR; +	else if (bio_size > rrpc->dev->max_rq_size) +		return NVM_IO_ERR; + +	err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages); +	if (err) +		return err; + +	bio_get(bio); +	rqd->bio = bio; +	rqd->ins = &rrpc->instance; +	rqd->nr_pages = nr_pages; +	rrq->flags = flags; + +	err = nvm_submit_io(rrpc->dev, rqd); +	if (err) { +		pr_err("rrpc: I/O submission failed: %d\n", err); +		return NVM_IO_ERR; +	} + +	return NVM_IO_OK; +} + +static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio) +{ +	struct rrpc *rrpc = q->queuedata; +	struct nvm_rq *rqd; +	int err; + +	if (bio->bi_rw & REQ_DISCARD) { +		rrpc_discard(rrpc, bio); +		return BLK_QC_T_NONE; +	} + +	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL); +	if (!rqd) { +		pr_err_ratelimited("rrpc: not able to queue bio."); +		bio_io_error(bio); +		return BLK_QC_T_NONE; +	} +	memset(rqd, 0, sizeof(struct nvm_rq)); + +	err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE); +	switch (err) { +	case NVM_IO_OK: +		return BLK_QC_T_NONE; +	case NVM_IO_ERR: +		bio_io_error(bio); +		break; +	case NVM_IO_DONE: +		bio_endio(bio); +		break; +	case NVM_IO_REQUEUE: +		spin_lock(&rrpc->bio_lock); +		bio_list_add(&rrpc->requeue_bios, bio); +		spin_unlock(&rrpc->bio_lock); +		queue_work(rrpc->kgc_wq, &rrpc->ws_requeue); +		break; +	} + +	mempool_free(rqd, rrpc->rq_pool); +	return BLK_QC_T_NONE; +} + +static void rrpc_requeue(struct work_struct *work) +{ +	struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue); +	struct bio_list bios; +	struct bio *bio; + +	bio_list_init(&bios); + +	spin_lock(&rrpc->bio_lock); +	bio_list_merge(&bios, &rrpc->requeue_bios); +	bio_list_init(&rrpc->requeue_bios); +	spin_unlock(&rrpc->bio_lock); + +	while ((bio = bio_list_pop(&bios))) +		rrpc_make_rq(rrpc->disk->queue, bio); +} + +static void rrpc_gc_free(struct rrpc *rrpc) +{ +	struct rrpc_lun *rlun; +	int i; + +	if (rrpc->krqd_wq) +		destroy_workqueue(rrpc->krqd_wq); + +	if (rrpc->kgc_wq) +		destroy_workqueue(rrpc->kgc_wq); + +	if (!rrpc->luns) +		return; + +	for (i = 0; i < rrpc->nr_luns; i++) { +		rlun = &rrpc->luns[i]; + +		if (!rlun->blocks) +			break; +		vfree(rlun->blocks); +	} +} + +static int rrpc_gc_init(struct rrpc *rrpc) +{ +	rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND, +								rrpc->nr_luns); +	if (!rrpc->krqd_wq) +		return -ENOMEM; + +	rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1); +	if (!rrpc->kgc_wq) +		return -ENOMEM; + +	setup_timer(&rrpc->gc_timer, rrpc_gc_timer, (unsigned long)rrpc); + +	return 0; +} + +static void rrpc_map_free(struct rrpc *rrpc) +{ +	vfree(rrpc->rev_trans_map); +	vfree(rrpc->trans_map); +} + +static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private) +{ +	struct rrpc *rrpc = (struct rrpc *)private; +	struct nvm_dev *dev = rrpc->dev; +	struct rrpc_addr *addr = rrpc->trans_map + slba; +	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map; +	sector_t max_pages = dev->total_pages * (dev->sec_size >> 9); +	u64 elba = slba + nlb; +	u64 i; + +	if (unlikely(elba > dev->total_pages)) { +		pr_err("nvm: L2P data from device is out of bounds!\n"); +		return -EINVAL; +	} + +	for (i = 0; i < nlb; i++) { +		u64 pba = le64_to_cpu(entries[i]); +		/* LNVM treats address-spaces as silos, LBA and PBA are +		 * equally large and zero-indexed. +		 */ +		if (unlikely(pba >= max_pages && pba != U64_MAX)) { +			pr_err("nvm: L2P data entry is out of bounds!\n"); +			return -EINVAL; +		} + +		/* Address zero is a special one. The first page on a disk is +		 * protected. As it often holds internal device boot +		 * information. +		 */ +		if (!pba) +			continue; + +		addr[i].addr = pba; +		raddr[pba].addr = slba + i; +	} + +	return 0; +} + +static int rrpc_map_init(struct rrpc *rrpc) +{ +	struct nvm_dev *dev = rrpc->dev; +	sector_t i; +	int ret; + +	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages); +	if (!rrpc->trans_map) +		return -ENOMEM; + +	rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr) +							* rrpc->nr_pages); +	if (!rrpc->rev_trans_map) +		return -ENOMEM; + +	for (i = 0; i < rrpc->nr_pages; i++) { +		struct rrpc_addr *p = &rrpc->trans_map[i]; +		struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i]; + +		p->addr = ADDR_EMPTY; +		r->addr = ADDR_EMPTY; +	} + +	if (!dev->ops->get_l2p_tbl) +		return 0; + +	/* Bring up the mapping table from device */ +	ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages, +							rrpc_l2p_update, rrpc); +	if (ret) { +		pr_err("nvm: rrpc: could not read L2P table.\n"); +		return -EINVAL; +	} + +	return 0; +} + + +/* Minimum pages needed within a lun */ +#define PAGE_POOL_SIZE 16 +#define ADDR_POOL_SIZE 64 + +static int rrpc_core_init(struct rrpc *rrpc) +{ +	down_write(&rrpc_lock); +	if (!rrpc_gcb_cache) { +		rrpc_gcb_cache = kmem_cache_create("rrpc_gcb", +				sizeof(struct rrpc_block_gc), 0, 0, NULL); +		if (!rrpc_gcb_cache) { +			up_write(&rrpc_lock); +			return -ENOMEM; +		} + +		rrpc_rq_cache = kmem_cache_create("rrpc_rq", +				sizeof(struct nvm_rq) + sizeof(struct rrpc_rq), +				0, 0, NULL); +		if (!rrpc_rq_cache) { +			kmem_cache_destroy(rrpc_gcb_cache); +			up_write(&rrpc_lock); +			return -ENOMEM; +		} +	} +	up_write(&rrpc_lock); + +	rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); +	if (!rrpc->page_pool) +		return -ENOMEM; + +	rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->nr_luns, +								rrpc_gcb_cache); +	if (!rrpc->gcb_pool) +		return -ENOMEM; + +	rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache); +	if (!rrpc->rq_pool) +		return -ENOMEM; + +	spin_lock_init(&rrpc->inflights.lock); +	INIT_LIST_HEAD(&rrpc->inflights.reqs); + +	return 0; +} + +static void rrpc_core_free(struct rrpc *rrpc) +{ +	mempool_destroy(rrpc->page_pool); +	mempool_destroy(rrpc->gcb_pool); +	mempool_destroy(rrpc->rq_pool); +} + +static void rrpc_luns_free(struct rrpc *rrpc) +{ +	kfree(rrpc->luns); +} + +static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end) +{ +	struct nvm_dev *dev = rrpc->dev; +	struct rrpc_lun *rlun; +	int i, j; + +	spin_lock_init(&rrpc->rev_lock); + +	rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun), +								GFP_KERNEL); +	if (!rrpc->luns) +		return -ENOMEM; + +	/* 1:1 mapping */ +	for (i = 0; i < rrpc->nr_luns; i++) { +		struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i); + +		if (dev->pgs_per_blk > +				MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) { +			pr_err("rrpc: number of pages per block too high."); +			goto err; +		} + +		rlun = &rrpc->luns[i]; +		rlun->rrpc = rrpc; +		rlun->parent = lun; +		INIT_LIST_HEAD(&rlun->prio_list); +		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc); +		spin_lock_init(&rlun->lock); + +		rrpc->total_blocks += dev->blks_per_lun; +		rrpc->nr_pages += dev->sec_per_lun; + +		rlun->blocks = vzalloc(sizeof(struct rrpc_block) * +						rrpc->dev->blks_per_lun); +		if (!rlun->blocks) +			goto err; + +		for (j = 0; j < rrpc->dev->blks_per_lun; j++) { +			struct rrpc_block *rblk = &rlun->blocks[j]; +			struct nvm_block *blk = &lun->blocks[j]; + +			rblk->parent = blk; +			INIT_LIST_HEAD(&rblk->prio); +			spin_lock_init(&rblk->lock); +		} +	} + +	return 0; +err: +	return -ENOMEM; +} + +static void rrpc_free(struct rrpc *rrpc) +{ +	rrpc_gc_free(rrpc); +	rrpc_map_free(rrpc); +	rrpc_core_free(rrpc); +	rrpc_luns_free(rrpc); + +	kfree(rrpc); +} + +static void rrpc_exit(void *private) +{ +	struct rrpc *rrpc = private; + +	del_timer(&rrpc->gc_timer); + +	flush_workqueue(rrpc->krqd_wq); +	flush_workqueue(rrpc->kgc_wq); + +	rrpc_free(rrpc); +} + +static sector_t rrpc_capacity(void *private) +{ +	struct rrpc *rrpc = private; +	struct nvm_dev *dev = rrpc->dev; +	sector_t reserved, provisioned; + +	/* cur, gc, and two emergency blocks for each lun */ +	reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4; +	provisioned = rrpc->nr_pages - reserved; + +	if (reserved > rrpc->nr_pages) { +		pr_err("rrpc: not enough space available to expose storage.\n"); +		return 0; +	} + +	sector_div(provisioned, 10); +	return provisioned * 9 * NR_PHY_IN_LOG; +} + +/* + * Looks up the logical address from reverse trans map and check if its valid by + * comparing the logical to physical address with the physical address. + * Returns 0 on free, otherwise 1 if in use + */ +static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk) +{ +	struct nvm_dev *dev = rrpc->dev; +	int offset; +	struct rrpc_addr *laddr; +	u64 paddr, pladdr; + +	for (offset = 0; offset < dev->pgs_per_blk; offset++) { +		paddr = block_to_addr(rrpc, rblk) + offset; + +		pladdr = rrpc->rev_trans_map[paddr].addr; +		if (pladdr == ADDR_EMPTY) +			continue; + +		laddr = &rrpc->trans_map[pladdr]; + +		if (paddr == laddr->addr) { +			laddr->rblk = rblk; +		} else { +			set_bit(offset, rblk->invalid_pages); +			rblk->nr_invalid_pages++; +		} +	} +} + +static int rrpc_blocks_init(struct rrpc *rrpc) +{ +	struct rrpc_lun *rlun; +	struct rrpc_block *rblk; +	int lun_iter, blk_iter; + +	for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) { +		rlun = &rrpc->luns[lun_iter]; + +		for (blk_iter = 0; blk_iter < rrpc->dev->blks_per_lun; +								blk_iter++) { +			rblk = &rlun->blocks[blk_iter]; +			rrpc_block_map_update(rrpc, rblk); +		} +	} + +	return 0; +} + +static int rrpc_luns_configure(struct rrpc *rrpc) +{ +	struct rrpc_lun *rlun; +	struct rrpc_block *rblk; +	int i; + +	for (i = 0; i < rrpc->nr_luns; i++) { +		rlun = &rrpc->luns[i]; + +		rblk = rrpc_get_blk(rrpc, rlun, 0); +		if (!rblk) +			return -EINVAL; + +		rrpc_set_lun_cur(rlun, rblk); + +		/* Emergency gc block */ +		rblk = rrpc_get_blk(rrpc, rlun, 1); +		if (!rblk) +			return -EINVAL; +		rlun->gc_cur = rblk; +	} + +	return 0; +} + +static struct nvm_tgt_type tt_rrpc; + +static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk, +						int lun_begin, int lun_end) +{ +	struct request_queue *bqueue = dev->q; +	struct request_queue *tqueue = tdisk->queue; +	struct rrpc *rrpc; +	int ret; + +	if (!(dev->identity.dom & NVM_RSP_L2P)) { +		pr_err("nvm: rrpc: device does not support l2p (%x)\n", +							dev->identity.dom); +		return ERR_PTR(-EINVAL); +	} + +	rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL); +	if (!rrpc) +		return ERR_PTR(-ENOMEM); + +	rrpc->instance.tt = &tt_rrpc; +	rrpc->dev = dev; +	rrpc->disk = tdisk; + +	bio_list_init(&rrpc->requeue_bios); +	spin_lock_init(&rrpc->bio_lock); +	INIT_WORK(&rrpc->ws_requeue, rrpc_requeue); + +	rrpc->nr_luns = lun_end - lun_begin + 1; + +	/* simple round-robin strategy */ +	atomic_set(&rrpc->next_lun, -1); + +	ret = rrpc_luns_init(rrpc, lun_begin, lun_end); +	if (ret) { +		pr_err("nvm: rrpc: could not initialize luns\n"); +		goto err; +	} + +	rrpc->poffset = dev->sec_per_lun * lun_begin; +	rrpc->lun_offset = lun_begin; + +	ret = rrpc_core_init(rrpc); +	if (ret) { +		pr_err("nvm: rrpc: could not initialize core\n"); +		goto err; +	} + +	ret = rrpc_map_init(rrpc); +	if (ret) { +		pr_err("nvm: rrpc: could not initialize maps\n"); +		goto err; +	} + +	ret = rrpc_blocks_init(rrpc); +	if (ret) { +		pr_err("nvm: rrpc: could not initialize state for blocks\n"); +		goto err; +	} + +	ret = rrpc_luns_configure(rrpc); +	if (ret) { +		pr_err("nvm: rrpc: not enough blocks available in LUNs.\n"); +		goto err; +	} + +	ret = rrpc_gc_init(rrpc); +	if (ret) { +		pr_err("nvm: rrpc: could not initialize gc\n"); +		goto err; +	} + +	/* inherit the size from the underlying device */ +	blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); +	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); + +	pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n", +			rrpc->nr_luns, (unsigned long long)rrpc->nr_pages); + +	mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10)); + +	return rrpc; +err: +	rrpc_free(rrpc); +	return ERR_PTR(ret); +} + +/* round robin, page-based FTL, and cost-based GC */ +static struct nvm_tgt_type tt_rrpc = { +	.name		= "rrpc", +	.version	= {1, 0, 0}, + +	.make_rq	= rrpc_make_rq, +	.capacity	= rrpc_capacity, +	.end_io		= rrpc_end_io, + +	.init		= rrpc_init, +	.exit		= rrpc_exit, +}; + +static int __init rrpc_module_init(void) +{ +	return nvm_register_target(&tt_rrpc); +} + +static void rrpc_module_exit(void) +{ +	nvm_unregister_target(&tt_rrpc); +} + +module_init(rrpc_module_init); +module_exit(rrpc_module_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs"); diff --git a/drivers/lightnvm/rrpc.h b/drivers/lightnvm/rrpc.h new file mode 100644 index 000000000000..a9696a06c38c --- /dev/null +++ b/drivers/lightnvm/rrpc.h @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015 IT University of Copenhagen + * Initial release: Matias Bjorling <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + * General Public License for more details. + * + * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs. + */ + +#ifndef RRPC_H_ +#define RRPC_H_ + +#include <linux/blkdev.h> +#include <linux/blk-mq.h> +#include <linux/bio.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/vmalloc.h> + +#include <linux/lightnvm.h> + +/* Run only GC if less than 1/X blocks are free */ +#define GC_LIMIT_INVERSE 10 +#define GC_TIME_SECS 100 + +#define RRPC_SECTOR (512) +#define RRPC_EXPOSED_PAGE_SIZE (4096) + +#define NR_PHY_IN_LOG (RRPC_EXPOSED_PAGE_SIZE / RRPC_SECTOR) + +struct rrpc_inflight { +	struct list_head reqs; +	spinlock_t lock; +}; + +struct rrpc_inflight_rq { +	struct list_head list; +	sector_t l_start; +	sector_t l_end; +}; + +struct rrpc_rq { +	struct rrpc_inflight_rq inflight_rq; +	struct rrpc_addr *addr; +	unsigned long flags; +}; + +struct rrpc_block { +	struct nvm_block *parent; +	struct list_head prio; + +#define MAX_INVALID_PAGES_STORAGE 8 +	/* Bitmap for invalid page intries */ +	unsigned long invalid_pages[MAX_INVALID_PAGES_STORAGE]; +	/* points to the next writable page within a block */ +	unsigned int next_page; +	/* number of pages that are invalid, wrt host page size */ +	unsigned int nr_invalid_pages; + +	spinlock_t lock; +	atomic_t data_cmnt_size; /* data pages committed to stable storage */ +}; + +struct rrpc_lun { +	struct rrpc *rrpc; +	struct nvm_lun *parent; +	struct rrpc_block *cur, *gc_cur; +	struct rrpc_block *blocks;	/* Reference to block allocation */ +	struct list_head prio_list;		/* Blocks that may be GC'ed */ +	struct work_struct ws_gc; + +	spinlock_t lock; +}; + +struct rrpc { +	/* instance must be kept in top to resolve rrpc in unprep */ +	struct nvm_tgt_instance instance; + +	struct nvm_dev *dev; +	struct gendisk *disk; + +	u64 poffset; /* physical page offset */ +	int lun_offset; + +	int nr_luns; +	struct rrpc_lun *luns; + +	/* calculated values */ +	unsigned long long nr_pages; +	unsigned long total_blocks; + +	/* Write strategy variables. Move these into each for structure for each +	 * strategy +	 */ +	atomic_t next_lun; /* Whenever a page is written, this is updated +			    * to point to the next write lun +			    */ + +	spinlock_t bio_lock; +	struct bio_list requeue_bios; +	struct work_struct ws_requeue; + +	/* Simple translation map of logical addresses to physical addresses. +	 * The logical addresses is known by the host system, while the physical +	 * addresses are used when writing to the disk block device. +	 */ +	struct rrpc_addr *trans_map; +	/* also store a reverse map for garbage collection */ +	struct rrpc_rev_addr *rev_trans_map; +	spinlock_t rev_lock; + +	struct rrpc_inflight inflights; + +	mempool_t *addr_pool; +	mempool_t *page_pool; +	mempool_t *gcb_pool; +	mempool_t *rq_pool; + +	struct timer_list gc_timer; +	struct workqueue_struct *krqd_wq; +	struct workqueue_struct *kgc_wq; +}; + +struct rrpc_block_gc { +	struct rrpc *rrpc; +	struct rrpc_block *rblk; +	struct work_struct ws_gc; +}; + +/* Logical to physical mapping */ +struct rrpc_addr { +	u64 addr; +	struct rrpc_block *rblk; +}; + +/* Physical to logical mapping */ +struct rrpc_rev_addr { +	u64 addr; +}; + +static inline sector_t rrpc_get_laddr(struct bio *bio) +{ +	return bio->bi_iter.bi_sector / NR_PHY_IN_LOG; +} + +static inline unsigned int rrpc_get_pages(struct bio *bio) +{ +	return  bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE; +} + +static inline sector_t rrpc_get_sector(sector_t laddr) +{ +	return laddr * NR_PHY_IN_LOG; +} + +static inline int request_intersects(struct rrpc_inflight_rq *r, +				sector_t laddr_start, sector_t laddr_end) +{ +	return (laddr_end >= r->l_start && laddr_end <= r->l_end) && +		(laddr_start >= r->l_start && laddr_start <= r->l_end); +} + +static int __rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, +			     unsigned pages, struct rrpc_inflight_rq *r) +{ +	sector_t laddr_end = laddr + pages - 1; +	struct rrpc_inflight_rq *rtmp; + +	spin_lock_irq(&rrpc->inflights.lock); +	list_for_each_entry(rtmp, &rrpc->inflights.reqs, list) { +		if (unlikely(request_intersects(rtmp, laddr, laddr_end))) { +			/* existing, overlapping request, come back later */ +			spin_unlock_irq(&rrpc->inflights.lock); +			return 1; +		} +	} + +	r->l_start = laddr; +	r->l_end = laddr_end; + +	list_add_tail(&r->list, &rrpc->inflights.reqs); +	spin_unlock_irq(&rrpc->inflights.lock); +	return 0; +} + +static inline int rrpc_lock_laddr(struct rrpc *rrpc, sector_t laddr, +				 unsigned pages, +				 struct rrpc_inflight_rq *r) +{ +	BUG_ON((laddr + pages) > rrpc->nr_pages); + +	return __rrpc_lock_laddr(rrpc, laddr, pages, r); +} + +static inline struct rrpc_inflight_rq *rrpc_get_inflight_rq(struct nvm_rq *rqd) +{ +	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd); + +	return &rrqd->inflight_rq; +} + +static inline int rrpc_lock_rq(struct rrpc *rrpc, struct bio *bio, +							struct nvm_rq *rqd) +{ +	sector_t laddr = rrpc_get_laddr(bio); +	unsigned int pages = rrpc_get_pages(bio); +	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); + +	return rrpc_lock_laddr(rrpc, laddr, pages, r); +} + +static inline void rrpc_unlock_laddr(struct rrpc *rrpc, +						struct rrpc_inflight_rq *r) +{ +	unsigned long flags; + +	spin_lock_irqsave(&rrpc->inflights.lock, flags); +	list_del_init(&r->list); +	spin_unlock_irqrestore(&rrpc->inflights.lock, flags); +} + +static inline void rrpc_unlock_rq(struct rrpc *rrpc, struct nvm_rq *rqd) +{ +	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd); +	uint8_t pages = rqd->nr_pages; + +	BUG_ON((r->l_start + pages) > rrpc->nr_pages); + +	rrpc_unlock_laddr(rrpc, r); +} + +#endif /* RRPC_H_ */ |