diff options
Diffstat (limited to 'lib/iov_iter.c')
| -rw-r--r-- | lib/iov_iter.c | 318 | 
1 files changed, 295 insertions, 23 deletions
diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f66c62aa7154..61228a6c69f8 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -76,7 +76,44 @@  	}						\  } -#define iterate_all_kinds(i, n, v, I, B, K) {			\ +#define iterate_xarray(i, n, __v, skip, STEP) {		\ +	struct page *head = NULL;				\ +	size_t wanted = n, seg, offset;				\ +	loff_t start = i->xarray_start + skip;			\ +	pgoff_t index = start >> PAGE_SHIFT;			\ +	int j;							\ +								\ +	XA_STATE(xas, i->xarray, index);			\ +								\ +	rcu_read_lock();						\ +	xas_for_each(&xas, head, ULONG_MAX) {				\ +		if (xas_retry(&xas, head))				\ +			continue;					\ +		if (WARN_ON(xa_is_value(head)))				\ +			break;						\ +		if (WARN_ON(PageHuge(head)))				\ +			break;						\ +		for (j = (head->index < index) ? index - head->index : 0; \ +		     j < thp_nr_pages(head); j++) {			\ +			__v.bv_page = head + j;				\ +			offset = (i->xarray_start + skip) & ~PAGE_MASK;	\ +			seg = PAGE_SIZE - offset;			\ +			__v.bv_offset = offset;				\ +			__v.bv_len = min(n, seg);			\ +			(void)(STEP);					\ +			n -= __v.bv_len;				\ +			skip += __v.bv_len;				\ +			if (n == 0)					\ +				break;					\ +		}							\ +		if (n == 0)						\ +			break;						\ +	}							\ +	rcu_read_unlock();					\ +	n = wanted - n;						\ +} + +#define iterate_all_kinds(i, n, v, I, B, K, X) {		\  	if (likely(n)) {					\  		size_t skip = i->iov_offset;			\  		if (unlikely(i->type & ITER_BVEC)) {		\ @@ -88,6 +125,9 @@  			struct kvec v;				\  			iterate_kvec(i, n, v, kvec, skip, (K))	\  		} else if (unlikely(i->type & ITER_DISCARD)) {	\ +		} else if (unlikely(i->type & ITER_XARRAY)) {	\ +			struct bio_vec v;			\ +			iterate_xarray(i, n, v, skip, (X));	\  		} else {					\  			const struct iovec *iov;		\  			struct iovec v;				\ @@ -96,7 +136,7 @@  	}							\  } -#define iterate_and_advance(i, n, v, I, B, K) {			\ +#define iterate_and_advance(i, n, v, I, B, K, X) {		\  	if (unlikely(i->count < n))				\  		n = i->count;					\  	if (i->count) {						\ @@ -121,6 +161,9 @@  			i->kvec = kvec;				\  		} else if (unlikely(i->type & ITER_DISCARD)) {	\  			skip += n;				\ +		} else if (unlikely(i->type & ITER_XARRAY)) {	\ +			struct bio_vec v;			\ +			iterate_xarray(i, n, v, skip, (X))	\  		} else {					\  			const struct iovec *iov;		\  			struct iovec v;				\ @@ -622,7 +665,9 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)  		copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),  		memcpy_to_page(v.bv_page, v.bv_offset,  			       (from += v.bv_len) - v.bv_len, v.bv_len), -		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) +		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), +		memcpy_to_page(v.bv_page, v.bv_offset, +			       (from += v.bv_len) - v.bv_len, v.bv_len)  	)  	return bytes; @@ -738,6 +783,18 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)  			bytes = curr_addr - s_addr - rem;  			return bytes;  		} +		}), +		({ +		rem = copy_mc_to_page(v.bv_page, v.bv_offset, +				      (from += v.bv_len) - v.bv_len, v.bv_len); +		if (rem) { +			curr_addr = (unsigned long) from; +			bytes = curr_addr - s_addr - rem; +			rcu_read_unlock(); +			i->iov_offset += bytes; +			i->count -= bytes; +			return bytes; +		}  		})  	) @@ -759,7 +816,9 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)  		copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),  		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len), -		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) +		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), +		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	return bytes; @@ -785,7 +844,9 @@ bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)  		0;}),  		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len), -		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) +		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), +		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	iov_iter_advance(i, bytes); @@ -805,7 +866,9 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)  					 v.iov_base, v.iov_len),  		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len), -		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) +		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), +		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	return bytes; @@ -840,7 +903,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)  		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len),  		memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, -			v.iov_len) +			v.iov_len), +		memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	return bytes; @@ -864,7 +929,9 @@ bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)  		0;}),  		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len), -		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) +		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), +		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	iov_iter_advance(i, bytes); @@ -901,7 +968,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,  {  	if (unlikely(!page_copy_sane(page, offset, bytes)))  		return 0; -	if (i->type & (ITER_BVEC|ITER_KVEC)) { +	if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {  		void *kaddr = kmap_atomic(page);  		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);  		kunmap_atomic(kaddr); @@ -924,7 +991,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,  		WARN_ON(1);  		return 0;  	} -	if (i->type & (ITER_BVEC|ITER_KVEC)) { +	if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {  		void *kaddr = kmap_atomic(page);  		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);  		kunmap_atomic(kaddr); @@ -968,7 +1035,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)  	iterate_and_advance(i, bytes, v,  		clear_user(v.iov_base, v.iov_len),  		memzero_page(v.bv_page, v.bv_offset, v.bv_len), -		memset(v.iov_base, 0, v.iov_len) +		memset(v.iov_base, 0, v.iov_len), +		memzero_page(v.bv_page, v.bv_offset, v.bv_len)  	)  	return bytes; @@ -992,7 +1060,9 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,  		copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),  		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,  				 v.bv_offset, v.bv_len), -		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) +		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), +		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, +				 v.bv_offset, v.bv_len)  	)  	kunmap_atomic(kaddr);  	return bytes; @@ -1078,11 +1148,17 @@ void iov_iter_advance(struct iov_iter *i, size_t size)  		i->count -= size;  		return;  	} +	if (unlikely(iov_iter_is_xarray(i))) { +		size = min(size, i->count); +		i->iov_offset += size; +		i->count -= size; +		return; +	}  	if (iov_iter_is_bvec(i)) {  		iov_iter_bvec_advance(i, size);  		return;  	} -	iterate_and_advance(i, size, v, 0, 0, 0) +	iterate_and_advance(i, size, v, 0, 0, 0, 0)  }  EXPORT_SYMBOL(iov_iter_advance); @@ -1126,7 +1202,12 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)  		return;  	}  	unroll -= i->iov_offset; -	if (iov_iter_is_bvec(i)) { +	if (iov_iter_is_xarray(i)) { +		BUG(); /* We should never go beyond the start of the specified +			* range since we might then be straying into pages that +			* aren't pinned. +			*/ +	} else if (iov_iter_is_bvec(i)) {  		const struct bio_vec *bvec = i->bvec;  		while (1) {  			size_t n = (--bvec)->bv_len; @@ -1163,9 +1244,9 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)  		return i->count;	// it is a silly place, anyway  	if (i->nr_segs == 1)  		return i->count; -	if (unlikely(iov_iter_is_discard(i))) +	if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))  		return i->count; -	else if (iov_iter_is_bvec(i)) +	if (iov_iter_is_bvec(i))  		return min(i->count, i->bvec->bv_len - i->iov_offset);  	else  		return min(i->count, i->iov->iov_len - i->iov_offset); @@ -1214,6 +1295,31 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,  EXPORT_SYMBOL(iov_iter_pipe);  /** + * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray + * @i: The iterator to initialise. + * @direction: The direction of the transfer. + * @xarray: The xarray to access. + * @start: The start file position. + * @count: The size of the I/O buffer in bytes. + * + * Set up an I/O iterator to either draw data out of the pages attached to an + * inode or to inject data into those pages.  The pages *must* be prevented + * from evaporation, either by taking a ref on them or locking them by the + * caller. + */ +void iov_iter_xarray(struct iov_iter *i, unsigned int direction, +		     struct xarray *xarray, loff_t start, size_t count) +{ +	BUG_ON(direction & ~1); +	i->type = ITER_XARRAY | (direction & (READ | WRITE)); +	i->xarray = xarray; +	i->xarray_start = start; +	i->count = count; +	i->iov_offset = 0; +} +EXPORT_SYMBOL(iov_iter_xarray); + +/**   * iov_iter_discard - Initialise an I/O iterator that discards data   * @i: The iterator to initialise.   * @direction: The direction of the transfer. @@ -1243,10 +1349,13 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)  			return size | i->iov_offset;  		return size;  	} +	if (unlikely(iov_iter_is_xarray(i))) +		return (i->xarray_start + i->iov_offset) | i->count;  	iterate_all_kinds(i, size, v,  		(res |= (unsigned long)v.iov_base | v.iov_len, 0),  		res |= v.bv_offset | v.bv_len, -		res |= (unsigned long)v.iov_base | v.iov_len +		res |= (unsigned long)v.iov_base | v.iov_len, +		res |= v.bv_offset | v.bv_len  	)  	return res;  } @@ -1268,7 +1377,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)  		(res |= (!res ? 0 : (unsigned long)v.bv_offset) |  			(size != v.bv_len ? size : 0)),  		(res |= (!res ? 0 : (unsigned long)v.iov_base) | -			(size != v.iov_len ? size : 0)) +			(size != v.iov_len ? size : 0)), +		(res |= (!res ? 0 : (unsigned long)v.bv_offset) | +			(size != v.bv_len ? size : 0))  		);  	return res;  } @@ -1318,6 +1429,75 @@ static ssize_t pipe_get_pages(struct iov_iter *i,  	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);  } +static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa, +					  pgoff_t index, unsigned int nr_pages) +{ +	XA_STATE(xas, xa, index); +	struct page *page; +	unsigned int ret = 0; + +	rcu_read_lock(); +	for (page = xas_load(&xas); page; page = xas_next(&xas)) { +		if (xas_retry(&xas, page)) +			continue; + +		/* Has the page moved or been split? */ +		if (unlikely(page != xas_reload(&xas))) { +			xas_reset(&xas); +			continue; +		} + +		pages[ret] = find_subpage(page, xas.xa_index); +		get_page(pages[ret]); +		if (++ret == nr_pages) +			break; +	} +	rcu_read_unlock(); +	return ret; +} + +static ssize_t iter_xarray_get_pages(struct iov_iter *i, +				     struct page **pages, size_t maxsize, +				     unsigned maxpages, size_t *_start_offset) +{ +	unsigned nr, offset; +	pgoff_t index, count; +	size_t size = maxsize, actual; +	loff_t pos; + +	if (!size || !maxpages) +		return 0; + +	pos = i->xarray_start + i->iov_offset; +	index = pos >> PAGE_SHIFT; +	offset = pos & ~PAGE_MASK; +	*_start_offset = offset; + +	count = 1; +	if (size > PAGE_SIZE - offset) { +		size -= PAGE_SIZE - offset; +		count += size >> PAGE_SHIFT; +		size &= ~PAGE_MASK; +		if (size) +			count++; +	} + +	if (count > maxpages) +		count = maxpages; + +	nr = iter_xarray_populate_pages(pages, i->xarray, index, count); +	if (nr == 0) +		return 0; + +	actual = PAGE_SIZE * nr; +	actual -= offset; +	if (nr == count && size > 0) { +		unsigned last_offset = (nr > 1) ? 0 : offset; +		actual -= PAGE_SIZE - (last_offset + size); +	} +	return actual; +} +  ssize_t iov_iter_get_pages(struct iov_iter *i,  		   struct page **pages, size_t maxsize, unsigned maxpages,  		   size_t *start) @@ -1327,6 +1507,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,  	if (unlikely(iov_iter_is_pipe(i)))  		return pipe_get_pages(i, pages, maxsize, maxpages, start); +	if (unlikely(iov_iter_is_xarray(i))) +		return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);  	if (unlikely(iov_iter_is_discard(i)))  		return -EFAULT; @@ -1353,7 +1535,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,  		return v.bv_len;  	}),({  		return -EFAULT; -	}) +	}), +	0  	)  	return 0;  } @@ -1397,6 +1580,51 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,  	return n;  } +static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i, +					   struct page ***pages, size_t maxsize, +					   size_t *_start_offset) +{ +	struct page **p; +	unsigned nr, offset; +	pgoff_t index, count; +	size_t size = maxsize, actual; +	loff_t pos; + +	if (!size) +		return 0; + +	pos = i->xarray_start + i->iov_offset; +	index = pos >> PAGE_SHIFT; +	offset = pos & ~PAGE_MASK; +	*_start_offset = offset; + +	count = 1; +	if (size > PAGE_SIZE - offset) { +		size -= PAGE_SIZE - offset; +		count += size >> PAGE_SHIFT; +		size &= ~PAGE_MASK; +		if (size) +			count++; +	} + +	p = get_pages_array(count); +	if (!p) +		return -ENOMEM; +	*pages = p; + +	nr = iter_xarray_populate_pages(p, i->xarray, index, count); +	if (nr == 0) +		return 0; + +	actual = PAGE_SIZE * nr; +	actual -= offset; +	if (nr == count && size > 0) { +		unsigned last_offset = (nr > 1) ? 0 : offset; +		actual -= PAGE_SIZE - (last_offset + size); +	} +	return actual; +} +  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,  		   struct page ***pages, size_t maxsize,  		   size_t *start) @@ -1408,6 +1636,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,  	if (unlikely(iov_iter_is_pipe(i)))  		return pipe_get_pages_alloc(i, pages, maxsize, start); +	if (unlikely(iov_iter_is_xarray(i))) +		return iter_xarray_get_pages_alloc(i, pages, maxsize, start);  	if (unlikely(iov_iter_is_discard(i)))  		return -EFAULT; @@ -1440,7 +1670,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,  		return v.bv_len;  	}),({  		return -EFAULT; -	}) +	}), 0  	)  	return 0;  } @@ -1478,6 +1708,13 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,  				      v.iov_base, v.iov_len,  				      sum, off);  		off += v.iov_len; +	}), ({ +		char *p = kmap_atomic(v.bv_page); +		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, +				      p + v.bv_offset, v.bv_len, +				      sum, off); +		kunmap_atomic(p); +		off += v.bv_len;  	})  	)  	*csum = sum; @@ -1519,6 +1756,13 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,  				      v.iov_base, v.iov_len,  				      sum, off);  		off += v.iov_len; +	}), ({ +		char *p = kmap_atomic(v.bv_page); +		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, +				      p + v.bv_offset, v.bv_len, +				      sum, off); +		kunmap_atomic(p); +		off += v.bv_len;  	})  	)  	*csum = sum; @@ -1565,6 +1809,13 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,  				     (from += v.iov_len) - v.iov_len,  				     v.iov_len, sum, off);  		off += v.iov_len; +	}), ({ +		char *p = kmap_atomic(v.bv_page); +		sum = csum_and_memcpy(p + v.bv_offset, +				      (from += v.bv_len) - v.bv_len, +				      v.bv_len, sum, off); +		kunmap_atomic(p); +		off += v.bv_len;  	})  	)  	csstate->csum = sum; @@ -1615,6 +1866,21 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)  		npages = pipe_space_for_user(iter_head, pipe->tail, pipe);  		if (npages >= maxpages)  			return maxpages; +	} else if (unlikely(iov_iter_is_xarray(i))) { +		unsigned offset; + +		offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK; + +		npages = 1; +		if (size > PAGE_SIZE - offset) { +			size -= PAGE_SIZE - offset; +			npages += size >> PAGE_SHIFT; +			size &= ~PAGE_MASK; +			if (size) +				npages++; +		} +		if (npages >= maxpages) +			return maxpages;  	} else iterate_all_kinds(i, size, v, ({  		unsigned long p = (unsigned long)v.iov_base;  		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) @@ -1631,7 +1897,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)  			- p / PAGE_SIZE;  		if (npages >= maxpages)  			return maxpages; -	}) +	}), +	0  	)  	return npages;  } @@ -1644,7 +1911,7 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)  		WARN_ON(1);  		return NULL;  	} -	if (unlikely(iov_iter_is_discard(new))) +	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))  		return NULL;  	if (iov_iter_is_bvec(new))  		return new->bvec = kmemdup(new->bvec, @@ -1849,7 +2116,12 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,  		kunmap(v.bv_page);  		err;}), ({  		w = v; -		err = f(&w, context);}) +		err = f(&w, context);}), ({ +		w.iov_base = kmap(v.bv_page) + v.bv_offset; +		w.iov_len = v.bv_len; +		err = f(&w, context); +		kunmap(v.bv_page); +		err;})  	)  	return err;  }  |