diff options
Diffstat (limited to 'fs/netfs/io.c')
-rw-r--r-- | fs/netfs/io.c | 361 |
1 files changed, 174 insertions, 187 deletions
diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 7f753380e047..c93851b98368 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -21,12 +21,7 @@ */ static void netfs_clear_unread(struct netfs_io_subrequest *subreq) { - struct iov_iter iter; - - iov_iter_xarray(&iter, ITER_DEST, &subreq->rreq->mapping->i_pages, - subreq->start + subreq->transferred, - subreq->len - subreq->transferred); - iov_iter_zero(iov_iter_count(&iter), &iter); + iov_iter_zero(iov_iter_count(&subreq->io_iter), &subreq->io_iter); } static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, @@ -46,14 +41,9 @@ static void netfs_read_from_cache(struct netfs_io_request *rreq, enum netfs_read_from_hole read_hole) { struct netfs_cache_resources *cres = &rreq->cache_resources; - struct iov_iter iter; netfs_stat(&netfs_n_rh_read); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, - subreq->start + subreq->transferred, - subreq->len - subreq->transferred); - - cres->ops->read(cres, subreq->start, &iter, read_hole, + cres->ops->read(cres, subreq->start, &subreq->io_iter, read_hole, netfs_cache_read_terminated, subreq); } @@ -88,6 +78,13 @@ static void netfs_read_from_server(struct netfs_io_request *rreq, struct netfs_io_subrequest *subreq) { netfs_stat(&netfs_n_rh_download); + + if (rreq->origin != NETFS_DIO_READ && + iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred) + pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n", + rreq->debug_id, subreq->debug_index, + iov_iter_count(&subreq->io_iter), subreq->len, + subreq->transferred, subreq->flags); rreq->netfs_ops->issue_read(subreq); } @@ -102,144 +99,6 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) } /* - * Deal with the completion of writing the data to the cache. We have to clear - * the PG_fscache bits on the folios involved and release the caller's ref. - * - * May be called in softirq mode and we inherit a ref from the caller. - */ -static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, - bool was_async) -{ - struct netfs_io_subrequest *subreq; - struct folio *folio; - pgoff_t unlocked = 0; - bool have_unlocked = false; - - rcu_read_lock(); - - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { - XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); - - xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { - if (xas_retry(&xas, folio)) - continue; - - /* We might have multiple writes from the same huge - * folio, but we mustn't unlock a folio more than once. - */ - if (have_unlocked && folio_index(folio) <= unlocked) - continue; - unlocked = folio_index(folio); - folio_end_fscache(folio); - have_unlocked = true; - } - } - - rcu_read_unlock(); - netfs_rreq_completed(rreq, was_async); -} - -static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, - bool was_async) -{ - struct netfs_io_subrequest *subreq = priv; - struct netfs_io_request *rreq = subreq->rreq; - - if (IS_ERR_VALUE(transferred_or_error)) { - netfs_stat(&netfs_n_rh_write_failed); - trace_netfs_failure(rreq, subreq, transferred_or_error, - netfs_fail_copy_to_cache); - } else { - netfs_stat(&netfs_n_rh_write_done); - } - - trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); - - /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ - if (atomic_dec_and_test(&rreq->nr_copy_ops)) - netfs_rreq_unmark_after_write(rreq, was_async); - - netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); -} - -/* - * Perform any outstanding writes to the cache. We inherit a ref from the - * caller. - */ -static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) -{ - struct netfs_cache_resources *cres = &rreq->cache_resources; - struct netfs_io_subrequest *subreq, *next, *p; - struct iov_iter iter; - int ret; - - trace_netfs_rreq(rreq, netfs_rreq_trace_copy); - - /* We don't want terminating writes trying to wake us up whilst we're - * still going through the list. - */ - atomic_inc(&rreq->nr_copy_ops); - - list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { - if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { - list_del_init(&subreq->rreq_link); - netfs_put_subrequest(subreq, false, - netfs_sreq_trace_put_no_copy); - } - } - - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { - /* Amalgamate adjacent writes */ - while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { - next = list_next_entry(subreq, rreq_link); - if (next->start != subreq->start + subreq->len) - break; - subreq->len += next->len; - list_del_init(&next->rreq_link); - netfs_put_subrequest(next, false, - netfs_sreq_trace_put_merged); - } - - ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, - rreq->i_size, true); - if (ret < 0) { - trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); - trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); - continue; - } - - iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, - subreq->start, subreq->len); - - atomic_inc(&rreq->nr_copy_ops); - netfs_stat(&netfs_n_rh_write); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); - trace_netfs_sreq(subreq, netfs_sreq_trace_write); - cres->ops->write(cres, subreq->start, &iter, - netfs_rreq_copy_terminated, subreq); - } - - /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ - if (atomic_dec_and_test(&rreq->nr_copy_ops)) - netfs_rreq_unmark_after_write(rreq, false); -} - -static void netfs_rreq_write_to_cache_work(struct work_struct *work) -{ - struct netfs_io_request *rreq = - container_of(work, struct netfs_io_request, work); - - netfs_rreq_do_write_to_cache(rreq); -} - -static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) -{ - rreq->work.func = netfs_rreq_write_to_cache_work; - if (!queue_work(system_unbound_wq, &rreq->work)) - BUG(); -} - -/* * Handle a short read. */ static void netfs_rreq_short_read(struct netfs_io_request *rreq, @@ -260,6 +119,30 @@ static void netfs_rreq_short_read(struct netfs_io_request *rreq, } /* + * Reset the subrequest iterator prior to resubmission. + */ +static void netfs_reset_subreq_iter(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) +{ + size_t remaining = subreq->len - subreq->transferred; + size_t count = iov_iter_count(&subreq->io_iter); + + if (count == remaining) + return; + + _debug("R=%08x[%u] ITER RESUB-MISMATCH %zx != %zx-%zx-%llx %x\n", + rreq->debug_id, subreq->debug_index, + iov_iter_count(&subreq->io_iter), subreq->transferred, + subreq->len, rreq->i_size, + subreq->io_iter.iter_type); + + if (count < remaining) + iov_iter_revert(&subreq->io_iter, remaining - count); + else + iov_iter_advance(&subreq->io_iter, count - remaining); +} + +/* * Resubmit any short or failed operations. Returns true if we got the rreq * ref back. */ @@ -287,6 +170,7 @@ static bool netfs_rreq_perform_resubmissions(struct netfs_io_request *rreq) trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); atomic_inc(&rreq->nr_outstanding); + netfs_reset_subreq_iter(rreq, subreq); netfs_read_from_server(rreq, subreq); } else if (test_bit(NETFS_SREQ_SHORT_IO, &subreq->flags)) { netfs_rreq_short_read(rreq, subreq); @@ -321,6 +205,48 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq) } /* + * Determine how much we can admit to having read from a DIO read. + */ +static void netfs_rreq_assess_dio(struct netfs_io_request *rreq) +{ + struct netfs_io_subrequest *subreq; + unsigned int i; + size_t transferred = 0; + + for (i = 0; i < rreq->direct_bv_count; i++) { + flush_dcache_page(rreq->direct_bv[i].bv_page); + // TODO: cifs marks pages in the destination buffer + // dirty under some circumstances after a read. Do we + // need to do that too? + set_page_dirty(rreq->direct_bv[i].bv_page); + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + if (subreq->error || subreq->transferred == 0) + break; + transferred += subreq->transferred; + if (subreq->transferred < subreq->len) + break; + } + + for (i = 0; i < rreq->direct_bv_count; i++) + flush_dcache_page(rreq->direct_bv[i].bv_page); + + rreq->transferred = transferred; + task_io_account_read(transferred); + + if (rreq->iocb) { + rreq->iocb->ki_pos += transferred; + if (rreq->iocb->ki_complete) + rreq->iocb->ki_complete( + rreq->iocb, rreq->error ? rreq->error : transferred); + } + if (rreq->netfs_ops->done) + rreq->netfs_ops->done(rreq); + inode_dio_end(rreq->inode); +} + +/* * Assess the state of a read request and decide what to do next. * * Note that we could be in an ordinary kernel thread, on a workqueue or in @@ -340,14 +266,15 @@ again: return; } - netfs_rreq_unlock_folios(rreq); + if (rreq->origin != NETFS_DIO_READ) + netfs_rreq_unlock_folios(rreq); + else + netfs_rreq_assess_dio(rreq); + trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); - if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) - return netfs_rreq_write_to_cache(rreq); - netfs_rreq_completed(rreq, was_async); } @@ -399,9 +326,9 @@ void netfs_subreq_terminated(struct netfs_io_subrequest *subreq, struct netfs_io_request *rreq = subreq->rreq; int u; - _enter("[%u]{%llx,%lx},%zd", - subreq->debug_index, subreq->start, subreq->flags, - transferred_or_error); + _enter("R=%x[%x]{%llx,%lx},%zd", + rreq->debug_id, subreq->debug_index, + subreq->start, subreq->flags, transferred_or_error); switch (subreq->source) { case NETFS_READ_FROM_CACHE: @@ -501,15 +428,20 @@ static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_subrequest */ static enum netfs_io_source netfs_rreq_prepare_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq) + struct netfs_io_subrequest *subreq, + struct iov_iter *io_iter) { - enum netfs_io_source source; + enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; + struct netfs_inode *ictx = netfs_inode(rreq->inode); + size_t lsize; _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); - source = netfs_cache_prepare_read(subreq, rreq->i_size); - if (source == NETFS_INVALID_READ) - goto out; + if (rreq->origin != NETFS_DIO_READ) { + source = netfs_cache_prepare_read(subreq, rreq->i_size); + if (source == NETFS_INVALID_READ) + goto out; + } if (source == NETFS_DOWNLOAD_FROM_SERVER) { /* Call out to the netfs to let it shrink the request to fit @@ -518,19 +450,52 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, * to make serial calls, it can indicate a short read and then * we will call it again. */ + if (rreq->origin != NETFS_DIO_READ) { + if (subreq->start >= ictx->zero_point) { + source = NETFS_FILL_WITH_ZEROES; + goto set; + } + if (subreq->len > ictx->zero_point - subreq->start) + subreq->len = ictx->zero_point - subreq->start; + } if (subreq->len > rreq->i_size - subreq->start) subreq->len = rreq->i_size - subreq->start; + if (rreq->rsize && subreq->len > rreq->rsize) + subreq->len = rreq->rsize; if (rreq->netfs_ops->clamp_length && !rreq->netfs_ops->clamp_length(subreq)) { source = NETFS_INVALID_READ; goto out; } + + if (subreq->max_nr_segs) { + lsize = netfs_limit_iter(io_iter, 0, subreq->len, + subreq->max_nr_segs); + if (subreq->len > lsize) { + subreq->len = lsize; + trace_netfs_sreq(subreq, netfs_sreq_trace_limited); + } + } } - if (WARN_ON(subreq->len == 0)) +set: + if (subreq->len > rreq->len) + pr_warn("R=%08x[%u] SREQ>RREQ %zx > %llx\n", + rreq->debug_id, subreq->debug_index, + subreq->len, rreq->len); + + if (WARN_ON(subreq->len == 0)) { source = NETFS_INVALID_READ; + goto out; + } + subreq->source = source; + trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); + + subreq->io_iter = *io_iter; + iov_iter_truncate(&subreq->io_iter, subreq->len); + iov_iter_advance(io_iter, subreq->len); out: subreq->source = source; trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); @@ -541,7 +506,7 @@ out: * Slice off a piece of a read request and submit an I/O request for it. */ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, - unsigned int *_debug_index) + struct iov_iter *io_iter) { struct netfs_io_subrequest *subreq; enum netfs_io_source source; @@ -550,11 +515,10 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, if (!subreq) return false; - subreq->debug_index = (*_debug_index)++; subreq->start = rreq->start + rreq->submitted; - subreq->len = rreq->len - rreq->submitted; + subreq->len = io_iter->count; - _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); + _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted); list_add_tail(&subreq->rreq_link, &rreq->subrequests); /* Call out to the cache to find out what it can do with the remaining @@ -565,7 +529,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq, * (the starts must coincide), in which case, we go around the loop * again and ask it to download the next piece. */ - source = netfs_rreq_prepare_read(rreq, subreq); + source = netfs_rreq_prepare_read(rreq, subreq, io_iter); if (source == NETFS_INVALID_READ) goto subreq_failed; @@ -603,7 +567,7 @@ subreq_failed: */ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) { - unsigned int debug_index = 0; + struct iov_iter io_iter; int ret; _enter("R=%x %llx-%llx", @@ -611,50 +575,73 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) if (rreq->len == 0) { pr_err("Zero-sized read [R=%x]\n", rreq->debug_id); - netfs_put_request(rreq, false, netfs_rreq_trace_put_zero_len); return -EIO; } - INIT_WORK(&rreq->work, netfs_rreq_work); + if (rreq->origin == NETFS_DIO_READ) + inode_dio_begin(rreq->inode); - if (sync) - netfs_get_request(rreq, netfs_rreq_trace_get_hold); + // TODO: Use bounce buffer if requested + rreq->io_iter = rreq->iter; + + INIT_WORK(&rreq->work, netfs_rreq_work); /* Chop the read into slices according to what the cache and the netfs * want and submit each one. */ + netfs_get_request(rreq, netfs_rreq_trace_get_for_outstanding); atomic_set(&rreq->nr_outstanding, 1); + io_iter = rreq->io_iter; do { - if (!netfs_rreq_submit_slice(rreq, &debug_index)) + _debug("submit %llx + %llx >= %llx", + rreq->start, rreq->submitted, rreq->i_size); + if (rreq->origin == NETFS_DIO_READ && + rreq->start + rreq->submitted >= rreq->i_size) + break; + if (!netfs_rreq_submit_slice(rreq, &io_iter)) + break; + if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && + test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; } while (rreq->submitted < rreq->len); + if (!rreq->submitted) { + netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit); + if (rreq->origin == NETFS_DIO_READ) + inode_dio_end(rreq->inode); + ret = 0; + goto out; + } + if (sync) { - /* Keep nr_outstanding incremented so that the ref always belongs to - * us, and the service code isn't punted off to a random thread pool to - * process. + /* Keep nr_outstanding incremented so that the ref always + * belongs to us, and the service code isn't punted off to a + * random thread pool to process. Note that this might start + * further work, such as writing to the cache. */ - for (;;) { - wait_var_event(&rreq->nr_outstanding, - atomic_read(&rreq->nr_outstanding) == 1); + wait_var_event(&rreq->nr_outstanding, + atomic_read(&rreq->nr_outstanding) == 1); + if (atomic_dec_and_test(&rreq->nr_outstanding)) netfs_rreq_assess(rreq, false); - if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) - break; - cond_resched(); - } + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip); + wait_on_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS, + TASK_UNINTERRUPTIBLE); ret = rreq->error; - if (ret == 0 && rreq->submitted < rreq->len) { + if (ret == 0 && rreq->submitted < rreq->len && + rreq->origin != NETFS_DIO_READ) { trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read); ret = -EIO; } - netfs_put_request(rreq, false, netfs_rreq_trace_put_hold); } else { /* If we decrement nr_outstanding to 0, the ref belongs to us. */ if (atomic_dec_and_test(&rreq->nr_outstanding)) netfs_rreq_assess(rreq, false); - ret = 0; + ret = -EIOCBQUEUED; } + +out: return ret; } |