From 46e678c96bbd775abd05d3ddbe2fd334794f9157 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 30 Apr 2006 16:36:32 +0200 Subject: [PATCH] splice: fix bugs with stealing regular pipe pages - Check that page has suitable count for stealing in the regular pipes. - pipe_to_file() assumes that the page is locked on succesful steal, so do that in the pipe steal hook - Missing unlock_page() in add_to_page_cache() failure. Signed-off-by: Jens Axboe --- fs/pipe.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 7fefb10db8d9..5a369273c51b 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -127,8 +127,15 @@ static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { - buf->flags |= PIPE_BUF_FLAG_STOLEN; - return 0; + struct page *page = buf->page; + + if (page_count(page) == 1) { + buf->flags |= PIPE_BUF_FLAG_STOLEN; + lock_page(page); + return 0; + } + + return 1; } static void anon_pipe_buf_get(struct pipe_inode_info *info, -- cgit From 0568b409c74f7a125d92a09a3f386785700ef688 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 19:50:48 +0200 Subject: [PATCH] splice: fix bugs in pipe_to_file() Found by Oleg Nesterov , fixed by me. - Only allow full pages to go to the page cache. - Check page != buf->page instead of using PIPE_BUF_FLAG_STOLEN. - Remember to clear 'stolen' if add_to_page_cache() fails. And as a cleanup on that: - Make the bottom fall-through logic a little less convoluted. Also make the steal path hold an extra reference to the page, so we don't have to differentiate between stolen and non-stolen at the end. Signed-off-by: Jens Axboe --- fs/pipe.c | 3 --- fs/splice.c | 37 +++++++++++++++++++------------------ include/linux/pipe_fs_i.h | 3 +-- 3 files changed, 20 insertions(+), 23 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 5a369273c51b..888f265011bf 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -99,8 +99,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, { struct page *page = buf->page; - buf->flags &= ~PIPE_BUF_FLAG_STOLEN; - /* * If nobody else uses this page, and we don't already have a * temporary page, let's keep track of it as a one-deep @@ -130,7 +128,6 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, struct page *page = buf->page; if (page_count(page) == 1) { - buf->flags |= PIPE_BUF_FLAG_STOLEN; lock_page(page); return 0; } diff --git a/fs/splice.c b/fs/splice.c index 9df28d30efa0..1633778f3652 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -78,7 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } - buf->flags |= PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU; + buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } @@ -87,7 +87,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, { page_cache_release(buf->page); buf->page = NULL; - buf->flags &= ~(PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU); + buf->flags &= ~PIPE_BUF_FLAG_LRU; } static void *page_cache_pipe_buf_map(struct file *file, @@ -587,9 +587,10 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, this_len = PAGE_CACHE_SIZE - offset; /* - * Reuse buf page, if SPLICE_F_MOVE is set. + * Reuse buf page, if SPLICE_F_MOVE is set and we are doing a full + * page. */ - if (sd->flags & SPLICE_F_MOVE) { + if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* * If steal succeeds, buf->page is now pruned from the vm * side (LRU and page cache) and we can reuse it. The page @@ -604,6 +605,8 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, goto find_page; } + page_cache_get(page); + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(page); } else { @@ -662,7 +665,7 @@ find_page: } else if (ret) goto out; - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) { + if (buf->page != page) { char *dst = kmap_atomic(page, KM_USER0); memcpy(dst + offset, src + buf->offset, this_len); @@ -671,22 +674,20 @@ find_page: } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); - if (ret == AOP_TRUNCATED_PAGE) { + if (!ret) { + /* + * Return the number of bytes written and mark page as + * accessed, we are now done! + */ + ret = this_len; + mark_page_accessed(page); + balance_dirty_pages_ratelimited(mapping); + } else if (ret == AOP_TRUNCATED_PAGE) { page_cache_release(page); goto find_page; - } else if (ret) - goto out; - - /* - * Return the number of bytes written. - */ - ret = this_len; - mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); + } out: - if (!(buf->flags & PIPE_BUF_FLAG_STOLEN)) - page_cache_release(page); - + page_cache_release(page); unlock_page(page); out_nomem: buf->ops->unmap(info, buf); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 0008d4bd4059..3130977fc6ab 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,8 +5,7 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_STOLEN 0x01 -#define PIPE_BUF_FLAG_LRU 0x02 +#define PIPE_BUF_FLAG_LRU 0x01 struct pipe_buffer { struct page *page; -- cgit From f84d751994441292593523c7069ed147176f6cab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 19:59:03 +0200 Subject: [PATCH] pipe: introduce ->pin() buffer operation The ->map() function is really expensive on highmem machines right now, since it has to use the slower kmap() instead of kmap_atomic(). Splice rarely needs to access the virtual address of a page, so it's a waste of time doing it. Introduce ->pin() to take over the responsibility of making sure the page data is valid. ->map() is then reduced to just kmap(). That way we can also share a most of the pipe buffer ops between pipe.c and splice.c Signed-off-by: Jens Axboe --- fs/pipe.c | 39 +++++++++++--------- fs/splice.c | 91 ++++++++++++++++------------------------------- include/linux/pipe_fs_i.h | 21 ++++++++++- 3 files changed, 73 insertions(+), 78 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index 888f265011bf..d9644fd9cc0d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -110,14 +110,14 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, page_cache_release(page); } -static void * anon_pipe_buf_map(struct file *file, struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void *generic_pipe_buf_map(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { return kmap(buf->page); } -static void anon_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) +void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) { kunmap(buf->page); } @@ -135,19 +135,24 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, return 1; } -static void anon_pipe_buf_get(struct pipe_inode_info *info, - struct pipe_buffer *buf) +void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_get(buf->page); } +int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) +{ + return 0; +} + static struct pipe_buf_operations anon_pipe_buf_ops = { .can_merge = 1, - .map = anon_pipe_buf_map, - .unmap = anon_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = anon_pipe_buf_release, .steal = anon_pipe_buf_steal, - .get = anon_pipe_buf_get, + .get = generic_pipe_buf_get, }; static ssize_t @@ -183,12 +188,14 @@ pipe_readv(struct file *filp, const struct iovec *_iov, if (chars > total_len) chars = total_len; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { + error = ops->pin(pipe, buf); + if (error) { if (!ret) - ret = PTR_ERR(addr); + error = ret; break; } + + addr = ops->map(pipe, buf); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); ops->unmap(pipe, buf); if (unlikely(error)) { @@ -300,11 +307,11 @@ pipe_writev(struct file *filp, const struct iovec *_iov, void *addr; int error; - addr = ops->map(filp, pipe, buf); - if (IS_ERR(addr)) { - error = PTR_ERR(addr); + error = ops->pin(pipe, buf); + if (error) goto out; - } + + addr = ops->map(pipe, buf); error = pipe_iov_copy_from_user(offset + addr, iov, chars); ops->unmap(pipe, buf); diff --git a/fs/splice.c b/fs/splice.c index 1633778f3652..d7538d83c367 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -90,9 +90,8 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, buf->flags &= ~PIPE_BUF_FLAG_LRU; } -static void *page_cache_pipe_buf_map(struct file *file, - struct pipe_inode_info *info, - struct pipe_buffer *buf) +static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, + struct pipe_buffer *buf) { struct page *page = buf->page; int err; @@ -118,49 +117,25 @@ static void *page_cache_pipe_buf_map(struct file *file, } /* - * Page is ok afterall, fall through to mapping. + * Page is ok afterall, we are done. */ unlock_page(page); } - return kmap(page); + return 0; error: unlock_page(page); - return ERR_PTR(err); -} - -static void page_cache_pipe_buf_unmap(struct pipe_inode_info *info, - struct pipe_buffer *buf) -{ - kunmap(buf->page); -} - -static void *user_page_pipe_buf_map(struct file *file, - struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return kmap(buf->page); -} - -static void user_page_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - kunmap(buf->page); -} - -static void page_cache_pipe_buf_get(struct pipe_inode_info *info, - struct pipe_buffer *buf) -{ - page_cache_get(buf->page); + return err; } static struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, - .map = page_cache_pipe_buf_map, - .unmap = page_cache_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = page_cache_pipe_buf_pin, .release = page_cache_pipe_buf_release, .steal = page_cache_pipe_buf_steal, - .get = page_cache_pipe_buf_get, + .get = generic_pipe_buf_get, }; static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -171,11 +146,12 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, static struct pipe_buf_operations user_page_pipe_buf_ops = { .can_merge = 0, - .map = user_page_pipe_buf_map, - .unmap = user_page_pipe_buf_unmap, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .pin = generic_pipe_buf_pin, .release = page_cache_pipe_buf_release, .steal = user_page_pipe_buf_steal, - .get = page_cache_pipe_buf_get, + .get = generic_pipe_buf_get, }; /* @@ -517,26 +493,16 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, { struct file *file = sd->file; loff_t pos = sd->pos; - ssize_t ret; - void *ptr; - int more; + int ret, more; - /* - * Sub-optimal, but we are limited by the pipe ->map. We don't - * need a kmap'ed buffer here, we just want to make sure we - * have the page pinned if the pipe page originates from the - * page cache. - */ - ptr = buf->ops->map(file, info, buf); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - - more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; + ret = buf->ops->pin(info, buf); + if (!ret) { + more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; - ret = file->f_op->sendpage(file, buf->page, buf->offset, sd->len, - &pos, more); + ret = file->f_op->sendpage(file, buf->page, buf->offset, + sd->len, &pos, more); + } - buf->ops->unmap(info, buf); return ret; } @@ -569,15 +535,14 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, unsigned int offset, this_len; struct page *page; pgoff_t index; - char *src; int ret; /* * make sure the data in this buffer is uptodate */ - src = buf->ops->map(file, info, buf); - if (IS_ERR(src)) - return PTR_ERR(src); + ret = buf->ops->pin(info, buf); + if (unlikely(ret)) + return ret; index = sd->pos >> PAGE_CACHE_SHIFT; offset = sd->pos & ~PAGE_CACHE_MASK; @@ -666,11 +631,16 @@ find_page: goto out; if (buf->page != page) { - char *dst = kmap_atomic(page, KM_USER0); + /* + * Careful, ->map() uses KM_USER0! + */ + char *src = buf->ops->map(info, buf); + char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); - kunmap_atomic(dst, KM_USER0); + kunmap_atomic(dst, KM_USER1); + buf->ops->unmap(info, buf); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); @@ -690,7 +660,6 @@ out: page_cache_release(page); unlock_page(page); out_nomem: - buf->ops->unmap(info, buf); return ret; } diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 3130977fc6ab..b8aae1fc5185 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -14,10 +14,23 @@ struct pipe_buffer { unsigned int flags; }; +/* + * Note on the nesting of these functions: + * + * ->pin() + * ->steal() + * ... + * ->map() + * ... + * ->unmap() + * + * That is, ->map() must be called on a pinned buffer, same goes for ->steal(). + */ struct pipe_buf_operations { int can_merge; - void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); + int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); void (*get)(struct pipe_inode_info *, struct pipe_buffer *); @@ -50,6 +63,12 @@ struct pipe_inode_info * alloc_pipe_info(struct inode * inode); void free_pipe_info(struct inode * inode); void __free_pipe_info(struct pipe_inode_info *); +/* Generic pipe buffer ops functions */ +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *); +void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); +int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); + /* * splice is tied to pipes as a transport (at least for now), so we'll just * add the splice flags here. -- cgit From f6762b7ad8edd6abc802542ce845d3bc8adcb92f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 1 May 2006 20:02:05 +0200 Subject: [PATCH] pipe: enable atomic copying of pipe data to/from user space The pipe ->map() method uses kmap() to virtually map the pages, which is both slow and has known scalability issues on SMP. This patch enables atomic copying of pipe pages, by pre-faulting data and using kmap_atomic() instead. lmbench bw_pipe and lat_pipe measurements agree this is a Good Thing. Here are results from that on a UP machine with highmem (1.5GiB of RAM), running first a UP kernel, SMP kernel, and SMP kernel patched. Vanilla-UP: Pipe bandwidth: 1622.28 MB/sec Pipe bandwidth: 1610.59 MB/sec Pipe bandwidth: 1608.30 MB/sec Pipe latency: 7.3275 microseconds Pipe latency: 7.2995 microseconds Pipe latency: 7.3097 microseconds Vanilla-SMP: Pipe bandwidth: 1382.19 MB/sec Pipe bandwidth: 1317.27 MB/sec Pipe bandwidth: 1355.61 MB/sec Pipe latency: 9.6402 microseconds Pipe latency: 9.6696 microseconds Pipe latency: 9.6153 microseconds Patched-SMP: Pipe bandwidth: 1578.70 MB/sec Pipe bandwidth: 1579.95 MB/sec Pipe bandwidth: 1578.63 MB/sec Pipe latency: 9.1654 microseconds Pipe latency: 9.2266 microseconds Pipe latency: 9.1527 microseconds Signed-off-by: Jens Axboe --- fs/pipe.c | 141 ++++++++++++++++++++++++++++++++++++++-------- fs/splice.c | 4 +- include/linux/pipe_fs_i.h | 11 ++-- 3 files changed, 126 insertions(+), 30 deletions(-) (limited to 'fs/pipe.c') diff --git a/fs/pipe.c b/fs/pipe.c index d9644fd9cc0d..3941a7f78b5d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -55,7 +55,8 @@ void pipe_wait(struct pipe_inode_info *pipe) } static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) +pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, + int atomic) { unsigned long copy; @@ -64,8 +65,13 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_from_user(to, iov->iov_base, copy)) - return -EFAULT; + if (atomic) { + if (__copy_from_user_inatomic(to, iov->iov_base, copy)) + return -EFAULT; + } else { + if (copy_from_user(to, iov->iov_base, copy)) + return -EFAULT; + } to += copy; len -= copy; iov->iov_base += copy; @@ -75,7 +81,8 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len) } static int -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) +pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, + int atomic) { unsigned long copy; @@ -84,8 +91,13 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) iov++; copy = min_t(unsigned long, len, iov->iov_len); - if (copy_to_user(iov->iov_base, from, copy)) - return -EFAULT; + if (atomic) { + if (__copy_to_user_inatomic(iov->iov_base, from, copy)) + return -EFAULT; + } else { + if (copy_to_user(iov->iov_base, from, copy)) + return -EFAULT; + } from += copy; len -= copy; iov->iov_base += copy; @@ -94,6 +106,47 @@ pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len) return 0; } +/* + * Attempt to pre-fault in the user memory, so we can use atomic copies. + * Returns the number of bytes not faulted in. + */ +static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + if (fault_in_pages_writeable(iov->iov_base, this_len)) + break; + + len -= this_len; + iov++; + } + + return len; +} + +/* + * Pre-fault in the user memory, so we can use atomic copies. + */ +static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) +{ + while (!iov->iov_len) + iov++; + + while (len > 0) { + unsigned long this_len; + + this_len = min_t(unsigned long, len, iov->iov_len); + fault_in_pages_readable(iov->iov_base, this_len); + len -= this_len; + iov++; + } +} + static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -111,15 +164,24 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, } void *generic_pipe_buf_map(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) + struct pipe_buffer *buf, int atomic) { + if (atomic) { + buf->flags |= PIPE_BUF_FLAG_ATOMIC; + return kmap_atomic(buf->page, KM_USER0); + } + return kmap(buf->page); } void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) + struct pipe_buffer *buf, void *map_data) { - kunmap(buf->page); + if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { + buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; + kunmap_atomic(map_data, KM_USER0); + } else + kunmap(buf->page); } static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -183,7 +245,7 @@ pipe_readv(struct file *filp, const struct iovec *_iov, struct pipe_buf_operations *ops = buf->ops; void *addr; size_t chars = buf->len; - int error; + int error, atomic; if (chars > total_len) chars = total_len; @@ -195,12 +257,21 @@ pipe_readv(struct file *filp, const struct iovec *_iov, break; } - addr = ops->map(pipe, buf); - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); - ops->unmap(pipe, buf); + atomic = !iov_fault_in_pages_write(iov, chars); +redo: + addr = ops->map(pipe, buf, atomic); + error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); + ops->unmap(pipe, buf, addr); if (unlikely(error)) { + /* + * Just retry with the slow path if we failed. + */ + if (atomic) { + atomic = 0; + goto redo; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; @@ -304,21 +375,28 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { + int error, atomic = 1; void *addr; - int error; error = ops->pin(pipe, buf); if (error) goto out; - addr = ops->map(pipe, buf); + iov_fault_in_pages_read(iov, chars); +redo1: + addr = ops->map(pipe, buf, atomic); error = pipe_iov_copy_from_user(offset + addr, iov, - chars); - ops->unmap(pipe, buf); + chars, atomic); + ops->unmap(pipe, buf, addr); ret = error; do_wakeup = 1; - if (error) + if (error) { + if (atomic) { + atomic = 0; + goto redo1; + } goto out; + } buf->len += chars; total_len -= chars; ret = chars; @@ -341,7 +419,8 @@ pipe_writev(struct file *filp, const struct iovec *_iov, int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; - int error; + char *src; + int error, atomic = 1; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -361,11 +440,27 @@ pipe_writev(struct file *filp, const struct iovec *_iov, if (chars > total_len) chars = total_len; - error = pipe_iov_copy_from_user(kmap(page), iov, chars); - kunmap(page); + iov_fault_in_pages_read(iov, chars); +redo2: + if (atomic) + src = kmap_atomic(page, KM_USER0); + else + src = kmap(page); + + error = pipe_iov_copy_from_user(src, iov, chars, + atomic); + if (atomic) + kunmap_atomic(src, KM_USER0); + else + kunmap(page); + if (unlikely(error)) { + if (atomic) { + atomic = 0; + goto redo2; + } if (!ret) - ret = -EFAULT; + ret = error; break; } ret += chars; diff --git a/fs/splice.c b/fs/splice.c index 0a6916423e7d..d4664a297bab 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -640,13 +640,13 @@ find_page: /* * Careful, ->map() uses KM_USER0! */ - char *src = buf->ops->map(info, buf); + char *src = buf->ops->map(info, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); - buf->ops->unmap(info, buf); + buf->ops->unmap(info, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b8aae1fc5185..4c054491e38e 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,7 +5,8 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_LRU 0x01 +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ struct pipe_buffer { struct page *page; @@ -28,8 +29,8 @@ struct pipe_buffer { */ struct pipe_buf_operations { int can_merge; - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *); - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); + void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); + void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); int (*pin)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); @@ -64,8 +65,8 @@ void free_pipe_info(struct inode * inode); void __free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *); -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *); +void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); +void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_pin(struct pipe_inode_info *, struct pipe_buffer *); -- cgit