diff options
Diffstat (limited to 'drivers/hv/ring_buffer.c')
| -rw-r--r-- | drivers/hv/ring_buffer.c | 65 | 
1 files changed, 54 insertions, 11 deletions
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index be3c8b10b84a..3e90eb91db45 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -431,7 +431,24 @@ static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,  }  /* - * Update host ring buffer after iterating over packets. + * Update host ring buffer after iterating over packets. If the host has + * stopped queuing new entries because it found the ring buffer full, and + * sufficient space is being freed up, signal the host. But be careful to + * only signal the host when necessary, both for performance reasons and + * because Hyper-V protects itself by throttling guests that signal + * inappropriately. + * + * Determining when to signal is tricky. There are three key data inputs + * that must be handled in this order to avoid race conditions: + * + * 1. Update the read_index + * 2. Read the pending_send_sz + * 3. Read the current write_index + * + * The interrupt_mask is not used to determine when to signal. The + * interrupt_mask is used only on the guest->host ring buffer when + * sending requests to the host. The host does not use it on the host-> + * guest ring buffer to indicate whether it should be signaled.   */  void hv_pkt_iter_close(struct vmbus_channel *channel)  { @@ -447,22 +464,30 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)  	start_read_index = rbi->ring_buffer->read_index;  	rbi->ring_buffer->read_index = rbi->priv_read_index; +	/* +	 * Older versions of Hyper-V (before WS2102 and Win8) do not +	 * implement pending_send_sz and simply poll if the host->guest +	 * ring buffer is full.  No signaling is needed or expected. +	 */  	if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)  		return;  	/*  	 * Issue a full memory barrier before making the signaling decision. -	 * Here is the reason for having this barrier: -	 * If the reading of the pend_sz (in this function) -	 * were to be reordered and read before we commit the new read -	 * index (in the calling function)  we could -	 * have a problem. If the host were to set the pending_sz after we -	 * have sampled pending_sz and go to sleep before we commit the +	 * If reading pending_send_sz were to be reordered and happen +	 * before we commit the new read_index, a race could occur.  If the +	 * host were to set the pending_send_sz after we have sampled +	 * pending_send_sz, and the ring buffer blocks before we commit the  	 * read index, we could miss sending the interrupt. Issue a full  	 * memory barrier to address this.  	 */  	virt_mb(); +	/* +	 * If the pending_send_sz is zero, then the ring buffer is not +	 * blocked and there is no need to signal.  This is far by the +	 * most common case, so exit quickly for best performance. +	 */  	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);  	if (!pending_sz)  		return; @@ -476,14 +501,32 @@ void hv_pkt_iter_close(struct vmbus_channel *channel)  	bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);  	/* -	 * If there was space before we began iteration, -	 * then host was not blocked. +	 * We want to signal the host only if we're transitioning +	 * from a "not enough free space" state to a "enough free +	 * space" state.  For example, it's possible that this function +	 * could run and free up enough space to signal the host, and then +	 * run again and free up additional space before the host has a +	 * chance to clear the pending_send_sz.  The 2nd invocation would +	 * be a null transition from "enough free space" to "enough free +	 * space", which doesn't warrant a signal. +	 * +	 * Exactly filling the ring buffer is treated as "not enough +	 * space". The ring buffer always must have at least one byte +	 * empty so the empty and full conditions are distinguishable. +	 * hv_get_bytes_to_write() doesn't fully tell the truth in +	 * this regard. +	 * +	 * So first check if we were in the "enough free space" state +	 * before we began the iteration. If so, the host was not +	 * blocked, and there's no need to signal.  	 */ -  	if (curr_write_sz - bytes_read > pending_sz)  		return; -	/* If pending write will not fit, don't give false hope. */ +	/* +	 * Similarly, if the new state is "not enough space", then +	 * there's no need to signal. +	 */  	if (curr_write_sz <= pending_sz)  		return;  |