diff options
Diffstat (limited to 'fs/nfs/flexfilelayout/flexfilelayout.c')
| -rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 200 | 
1 files changed, 139 insertions, 61 deletions
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 0cb1abd535e3..0e8018bc9880 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -26,6 +26,8 @@  #define FF_LAYOUT_POLL_RETRY_MAX     (15*HZ) +static struct group_info	*ff_zero_group; +  static struct pnfs_layout_hdr *  ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)  { @@ -53,14 +55,15 @@ ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo)  	kfree(FF_LAYOUT_FROM_HDR(lo));  } -static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +static int decode_pnfs_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)  {  	__be32 *p;  	p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);  	if (unlikely(p == NULL))  		return -ENOBUFS; -	memcpy(stateid, p, NFS4_STATEID_SIZE); +	stateid->type = NFS4_PNFS_DS_STATEID_TYPE; +	memcpy(stateid->data, p, NFS4_STATEID_SIZE);  	dprintk("%s: stateid id= [%x%x%x%x]\n", __func__,  		p[0], p[1], p[2], p[3]);  	return 0; @@ -211,10 +214,16 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)  static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror)  { +	struct rpc_cred	*cred; +  	ff_layout_remove_mirror(mirror);  	kfree(mirror->fh_versions); -	if (mirror->cred) -		put_rpccred(mirror->cred); +	cred = rcu_access_pointer(mirror->ro_cred); +	if (cred) +		put_rpccred(cred); +	cred = rcu_access_pointer(mirror->rw_cred); +	if (cred) +		put_rpccred(cred);  	nfs4_ff_layout_put_deviceid(mirror->mirror_ds);  	kfree(mirror);  } @@ -290,6 +299,8 @@ ff_lseg_merge(struct pnfs_layout_segment *new,  {  	u64 new_end, old_end; +	if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) +		return false;  	if (new->pls_range.iomode != old->pls_range.iomode)  		return false;  	old_end = pnfs_calc_offset_end(old->pls_range.offset, @@ -310,8 +321,6 @@ ff_lseg_merge(struct pnfs_layout_segment *new,  			new_end);  	if (test_bit(NFS_LSEG_ROC, &old->pls_flags))  		set_bit(NFS_LSEG_ROC, &new->pls_flags); -	if (test_bit(NFS_LSEG_LAYOUTRETURN, &old->pls_flags)) -		set_bit(NFS_LSEG_LAYOUTRETURN, &new->pls_flags);  	return true;  } @@ -407,8 +416,9 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		struct nfs4_ff_layout_mirror *mirror;  		struct nfs4_deviceid devid;  		struct nfs4_deviceid_node *idnode; -		u32 ds_count; -		u32 fh_count; +		struct auth_cred acred = { .group_info = ff_zero_group }; +		struct rpc_cred	__rcu *cred; +		u32 ds_count, fh_count, id;  		int j;  		rc = -EIO; @@ -456,7 +466,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		fls->mirror_array[i]->efficiency = be32_to_cpup(p);  		/* stateid */ -		rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); +		rc = decode_pnfs_stateid(&stream, &fls->mirror_array[i]->stateid);  		if (rc)  			goto out_err_free; @@ -484,24 +494,49 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,  		fls->mirror_array[i]->fh_versions_cnt = fh_count;  		/* user */ -		rc = decode_name(&stream, &fls->mirror_array[i]->uid); +		rc = decode_name(&stream, &id);  		if (rc)  			goto out_err_free; +		acred.uid = make_kuid(&init_user_ns, id); +  		/* group */ -		rc = decode_name(&stream, &fls->mirror_array[i]->gid); +		rc = decode_name(&stream, &id);  		if (rc)  			goto out_err_free; +		acred.gid = make_kgid(&init_user_ns, id); + +		/* find the cred for it */ +		rcu_assign_pointer(cred, rpc_lookup_generic_cred(&acred, 0, gfp_flags)); +		if (IS_ERR(cred)) { +			rc = PTR_ERR(cred); +			goto out_err_free; +		} + +		if (lgr->range.iomode == IOMODE_READ) +			rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); +		else +			rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); +  		mirror = ff_layout_add_mirror(lh, fls->mirror_array[i]);  		if (mirror != fls->mirror_array[i]) { +			/* swap cred ptrs so free_mirror will clean up old */ +			if (lgr->range.iomode == IOMODE_READ) { +				cred = xchg(&mirror->ro_cred, cred); +				rcu_assign_pointer(fls->mirror_array[i]->ro_cred, cred); +			} else { +				cred = xchg(&mirror->rw_cred, cred); +				rcu_assign_pointer(fls->mirror_array[i]->rw_cred, cred); +			}  			ff_layout_free_mirror(fls->mirror_array[i]);  			fls->mirror_array[i] = mirror;  		} -		dprintk("%s: uid %d gid %d\n", __func__, -			fls->mirror_array[i]->uid, -			fls->mirror_array[i]->gid); +		dprintk("%s: iomode %s uid %u gid %u\n", __func__, +			lgr->range.iomode == IOMODE_READ ? "READ" : "RW", +			from_kuid(&init_user_ns, acred.uid), +			from_kgid(&init_user_ns, acred.gid));  	}  	p = xdr_inline_decode(&stream, 4); @@ -745,7 +780,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,  	else {  		int i; -		spin_lock(cinfo->lock); +		spin_lock(&cinfo->inode->i_lock);  		if (cinfo->ds->nbuckets != 0)  			kfree(buckets);  		else { @@ -759,7 +794,7 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,  					NFS_INVALID_STABLE_HOW;  			}  		} -		spin_unlock(cinfo->lock); +		spin_unlock(&cinfo->inode->i_lock);  		return 0;  	}  } @@ -786,6 +821,36 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,  }  static void +ff_layout_pg_get_read(struct nfs_pageio_descriptor *pgio, +		      struct nfs_page *req, +		      bool strict_iomode) +{ +retry_strict: +	pnfs_put_lseg(pgio->pg_lseg); +	pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, +					   req->wb_context, +					   0, +					   NFS4_MAX_UINT64, +					   IOMODE_READ, +					   strict_iomode, +					   GFP_KERNEL); +	if (IS_ERR(pgio->pg_lseg)) { +		pgio->pg_error = PTR_ERR(pgio->pg_lseg); +		pgio->pg_lseg = NULL; +	} + +	/* If we don't have checking, do get a IOMODE_RW +	 * segment, and the server wants to avoid READs +	 * there, then retry! +	 */ +	if (pgio->pg_lseg && !strict_iomode && +	    ff_layout_avoid_read_on_rw(pgio->pg_lseg)) { +		strict_iomode = true; +		goto retry_strict; +	} +} + +static void  ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,  			struct nfs_page *req)  { @@ -795,26 +860,23 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,  	int ds_idx;  	/* Use full layout for now */ -	if (!pgio->pg_lseg) { -		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, -						   req->wb_context, -						   0, -						   NFS4_MAX_UINT64, -						   IOMODE_READ, -						   GFP_KERNEL); -		if (IS_ERR(pgio->pg_lseg)) { -			pgio->pg_error = PTR_ERR(pgio->pg_lseg); -			pgio->pg_lseg = NULL; -			return; -		} -	} +	if (!pgio->pg_lseg) +		ff_layout_pg_get_read(pgio, req, false); +	else if (ff_layout_avoid_read_on_rw(pgio->pg_lseg)) +		ff_layout_pg_get_read(pgio, req, true); +  	/* If no lseg, fall back to read through mds */  	if (pgio->pg_lseg == NULL)  		goto out_mds;  	ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx); -	if (!ds) -		goto out_mds; +	if (!ds) { +		if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) +			goto out_pnfs; +		else +			goto out_mds; +	} +  	mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);  	pgio->pg_mirror_idx = ds_idx; @@ -828,6 +890,12 @@ out_mds:  	pnfs_put_lseg(pgio->pg_lseg);  	pgio->pg_lseg = NULL;  	nfs_pageio_reset_read_mds(pgio); +	return; + +out_pnfs: +	pnfs_set_lo_fail(pgio->pg_lseg); +	pnfs_put_lseg(pgio->pg_lseg); +	pgio->pg_lseg = NULL;  }  static void @@ -847,6 +915,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,  						   0,  						   NFS4_MAX_UINT64,  						   IOMODE_RW, +						   false,  						   GFP_NOFS);  		if (IS_ERR(pgio->pg_lseg)) {  			pgio->pg_error = PTR_ERR(pgio->pg_lseg); @@ -870,8 +939,12 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,  	for (i = 0; i < pgio->pg_mirror_count; i++) {  		ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); -		if (!ds) -			goto out_mds; +		if (!ds) { +			if (ff_layout_no_fallback_to_mds(pgio->pg_lseg)) +				goto out_pnfs; +			else +				goto out_mds; +		}  		pgm = &pgio->pg_mirrors[i];  		mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);  		pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; @@ -883,6 +956,12 @@ out_mds:  	pnfs_put_lseg(pgio->pg_lseg);  	pgio->pg_lseg = NULL;  	nfs_pageio_reset_write_mds(pgio); +	return; + +out_pnfs: +	pnfs_set_lo_fail(pgio->pg_lseg); +	pnfs_put_lseg(pgio->pg_lseg); +	pgio->pg_lseg = NULL;  }  static unsigned int @@ -895,6 +974,7 @@ ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio,  						   0,  						   NFS4_MAX_UINT64,  						   IOMODE_RW, +						   false,  						   GFP_NOFS);  		if (IS_ERR(pgio->pg_lseg)) {  			pgio->pg_error = PTR_ERR(pgio->pg_lseg); @@ -1067,8 +1147,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,  		rpc_wake_up(&tbl->slot_tbl_waitq);  		/* fall through */  	default: -		if (ff_layout_no_fallback_to_mds(lseg) || -		    ff_layout_has_available_ds(lseg)) +		if (ff_layout_avoid_mds_available_ds(lseg))  			return -NFS4ERR_RESET_TO_PNFS;  reset:  		dprintk("%s Retry through MDS. Error %d\n", __func__, @@ -1215,8 +1294,6 @@ static int ff_layout_read_done_cb(struct rpc_task *task,  					hdr->pgio_mirror_idx + 1,  					&hdr->pgio_mirror_idx))  			goto out_eagain; -		set_bit(NFS_LAYOUT_RETURN_REQUESTED, -			&hdr->lseg->pls_layout->plh_flags);  		pnfs_read_resend_pnfs(hdr);  		return task->tk_status;  	case -NFS4ERR_RESET_TO_MDS: @@ -1260,7 +1337,7 @@ ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)  }  static bool -ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) +ff_layout_device_unavailable(struct pnfs_layout_segment *lseg, int idx)  {  	/* No mirroring for now */  	struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -1297,16 +1374,10 @@ static int ff_layout_read_prepare_common(struct rpc_task *task,  		rpc_exit(task, -EIO);  		return -EIO;  	} -	if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { -		dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); -		if (ff_layout_has_available_ds(hdr->lseg)) -			pnfs_read_resend_pnfs(hdr); -		else -			ff_layout_reset_read(hdr); -		rpc_exit(task, 0); +	if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { +		rpc_exit(task, -EHOSTDOWN);  		return -EAGAIN;  	} -	hdr->pgio_done_cb = ff_layout_read_done_cb;  	ff_layout_read_record_layoutstats_start(task, hdr);  	return 0; @@ -1496,14 +1567,8 @@ static int ff_layout_write_prepare_common(struct rpc_task *task,  		return -EIO;  	} -	if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { -		bool retry_pnfs; - -		retry_pnfs = ff_layout_has_available_ds(hdr->lseg); -		dprintk("%s task %u reset io to %s\n", __func__, -			task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); -		ff_layout_reset_write(hdr, retry_pnfs); -		rpc_exit(task, 0); +	if (ff_layout_device_unavailable(hdr->lseg, hdr->pgio_mirror_idx)) { +		rpc_exit(task, -EHOSTDOWN);  		return -EAGAIN;  	} @@ -1712,7 +1777,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)  		goto out_failed;  	ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); -	if (IS_ERR(ds_cred)) +	if (!ds_cred)  		goto out_failed;  	vers = nfs4_ff_layout_ds_version(lseg, idx); @@ -1720,6 +1785,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)  	dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__,  		ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); +	hdr->pgio_done_cb = ff_layout_read_done_cb;  	atomic_inc(&ds->ds_clp->cl_count);  	hdr->ds_clp = ds->ds_clp;  	fh = nfs4_ff_layout_select_ds_fh(lseg, idx); @@ -1737,11 +1803,11 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)  			  vers == 3 ? &ff_layout_read_call_ops_v3 :  				      &ff_layout_read_call_ops_v4,  			  0, RPC_TASK_SOFTCONN); - +	put_rpccred(ds_cred);  	return PNFS_ATTEMPTED;  out_failed: -	if (ff_layout_has_available_ds(lseg)) +	if (ff_layout_avoid_mds_available_ds(lseg))  		return PNFS_TRY_AGAIN;  	return PNFS_NOT_ATTEMPTED;  } @@ -1769,7 +1835,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)  		return PNFS_NOT_ATTEMPTED;  	ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); -	if (IS_ERR(ds_cred)) +	if (!ds_cred)  		return PNFS_NOT_ATTEMPTED;  	vers = nfs4_ff_layout_ds_version(lseg, idx); @@ -1798,6 +1864,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)  			  vers == 3 ? &ff_layout_write_call_ops_v3 :  				      &ff_layout_write_call_ops_v4,  			  sync, RPC_TASK_SOFTCONN); +	put_rpccred(ds_cred);  	return PNFS_ATTEMPTED;  } @@ -1824,7 +1891,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)  	struct rpc_clnt *ds_clnt;  	struct rpc_cred *ds_cred;  	u32 idx; -	int vers; +	int vers, ret;  	struct nfs_fh *fh;  	idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); @@ -1838,7 +1905,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)  		goto out_err;  	ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); -	if (IS_ERR(ds_cred)) +	if (!ds_cred)  		goto out_err;  	vers = nfs4_ff_layout_ds_version(lseg, idx); @@ -1854,10 +1921,12 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)  	if (fh)  		data->args.fh = fh; -	return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, +	ret = nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops,  				   vers == 3 ? &ff_layout_commit_call_ops_v3 :  					       &ff_layout_commit_call_ops_v4,  				   how, RPC_TASK_SOFTCONN); +	put_rpccred(ds_cred); +	return ret;  out_err:  	pnfs_generic_prepare_to_resend_writes(data);  	pnfs_generic_commit_release(data); @@ -2223,6 +2292,11 @@ static int __init nfs4flexfilelayout_init(void)  {  	printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n",  	       __func__); +	if (!ff_zero_group) { +		ff_zero_group = groups_alloc(0); +		if (!ff_zero_group) +			return -ENOMEM; +	}  	return pnfs_register_layoutdriver(&flexfilelayout_type);  } @@ -2231,6 +2305,10 @@ static void __exit nfs4flexfilelayout_exit(void)  	printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n",  	       __func__);  	pnfs_unregister_layoutdriver(&flexfilelayout_type); +	if (ff_zero_group) { +		put_group_info(ff_zero_group); +		ff_zero_group = NULL; +	}  }  MODULE_ALIAS("nfs-layouttype4-4");  |