diff options
Diffstat (limited to 'kernel/futex.c')
| -rw-r--r-- | kernel/futex.c | 194 | 
1 files changed, 60 insertions, 134 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 0cf84c8664f2..b59532862bc0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -135,8 +135,7 @@   *   * Where (A) orders the waiters increment and the futex value read through   * atomic operations (see hb_waiters_inc) and where (B) orders the write - * to futex and the waiters read -- this is done by the barriers for both - * shared and private futexes in get_futex_key_refs(). + * to futex and the waiters read (see hb_waiters_pending()).   *   * This yields the following case (where X:=waiters, Y:=futex):   * @@ -331,17 +330,6 @@ static void compat_exit_robust_list(struct task_struct *curr);  static inline void compat_exit_robust_list(struct task_struct *curr) { }  #endif -static inline void futex_get_mm(union futex_key *key) -{ -	mmgrab(key->private.mm); -	/* -	 * Ensure futex_get_mm() implies a full barrier such that -	 * get_futex_key() implies a full barrier. This is relied upon -	 * as smp_mb(); (B), see the ordering comment above. -	 */ -	smp_mb__after_atomic(); -} -  /*   * Reflects a new waiter being added to the waitqueue.   */ @@ -370,6 +358,10 @@ static inline void hb_waiters_dec(struct futex_hash_bucket *hb)  static inline int hb_waiters_pending(struct futex_hash_bucket *hb)  {  #ifdef CONFIG_SMP +	/* +	 * Full barrier (B), see the ordering comment above. +	 */ +	smp_mb();  	return atomic_read(&hb->waiters);  #else  	return 1; @@ -385,9 +377,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)   */  static struct futex_hash_bucket *hash_futex(union futex_key *key)  { -	u32 hash = jhash2((u32*)&key->both.word, -			  (sizeof(key->both.word)+sizeof(key->both.ptr))/4, +	u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,  			  key->both.offset); +  	return &futex_queues[hash & (futex_hashsize - 1)];  } @@ -407,70 +399,6 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)  		&& key1->both.offset == key2->both.offset);  } -/* - * Take a reference to the resource addressed by a key. - * Can be called while holding spinlocks. - * - */ -static void get_futex_key_refs(union futex_key *key) -{ -	if (!key->both.ptr) -		return; - -	/* -	 * On MMU less systems futexes are always "private" as there is no per -	 * process address space. We need the smp wmb nevertheless - yes, -	 * arch/blackfin has MMU less SMP ... -	 */ -	if (!IS_ENABLED(CONFIG_MMU)) { -		smp_mb(); /* explicit smp_mb(); (B) */ -		return; -	} - -	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { -	case FUT_OFF_INODE: -		ihold(key->shared.inode); /* implies smp_mb(); (B) */ -		break; -	case FUT_OFF_MMSHARED: -		futex_get_mm(key); /* implies smp_mb(); (B) */ -		break; -	default: -		/* -		 * Private futexes do not hold reference on an inode or -		 * mm, therefore the only purpose of calling get_futex_key_refs -		 * is because we need the barrier for the lockless waiter check. -		 */ -		smp_mb(); /* explicit smp_mb(); (B) */ -	} -} - -/* - * Drop a reference to the resource addressed by a key. - * The hash bucket spinlock must not be held. This is - * a no-op for private futexes, see comment in the get - * counterpart. - */ -static void drop_futex_key_refs(union futex_key *key) -{ -	if (!key->both.ptr) { -		/* If we're here then we tried to put a key we failed to get */ -		WARN_ON_ONCE(1); -		return; -	} - -	if (!IS_ENABLED(CONFIG_MMU)) -		return; - -	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { -	case FUT_OFF_INODE: -		iput(key->shared.inode); -		break; -	case FUT_OFF_MMSHARED: -		mmdrop(key->private.mm); -		break; -	} -} -  enum futex_access {  	FUTEX_READ,  	FUTEX_WRITE @@ -505,6 +433,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,  	return timeout;  } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ +	static atomic64_t i_seq; +	u64 old; + +	/* Does the inode already have a sequence number? */ +	old = atomic64_read(&inode->i_sequence); +	if (likely(old)) +		return old; + +	for (;;) { +		u64 new = atomic64_add_return(1, &i_seq); +		if (WARN_ON_ONCE(!new)) +			continue; + +		old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); +		if (old) +			return old; +		return new; +	} +} +  /**   * get_futex_key() - Get parameters which are the keys for a futex   * @uaddr:	virtual address of the futex @@ -517,9 +485,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,   *   * The key words are stored in @key on success.   * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page).  For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + *   ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + *   ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT.   *   * lock_page() might sleep, the caller should not hold a spinlock.   */ @@ -556,7 +530,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a  	if (!fshared) {  		key->private.mm = mm;  		key->private.address = address; -		get_futex_key_refs(key);  /* implies smp_mb(); (B) */  		return 0;  	} @@ -659,8 +632,6 @@ again:  		key->private.mm = mm;  		key->private.address = address; -		get_futex_key_refs(key); /* implies smp_mb(); (B) */ -  	} else {  		struct inode *inode; @@ -692,36 +663,8 @@ again:  			goto again;  		} -		/* -		 * Take a reference unless it is about to be freed. Previously -		 * this reference was taken by ihold under the page lock -		 * pinning the inode in place so i_lock was unnecessary. The -		 * only way for this check to fail is if the inode was -		 * truncated in parallel which is almost certainly an -		 * application bug. In such a case, just retry. -		 * -		 * We are not calling into get_futex_key_refs() in file-backed -		 * cases, therefore a successful atomic_inc return below will -		 * guarantee that get_futex_key() will still imply smp_mb(); (B). -		 */ -		if (!atomic_inc_not_zero(&inode->i_count)) { -			rcu_read_unlock(); -			put_page(page); - -			goto again; -		} - -		/* Should be impossible but lets be paranoid for now */ -		if (WARN_ON_ONCE(inode->i_mapping != mapping)) { -			err = -EFAULT; -			rcu_read_unlock(); -			iput(inode); - -			goto out; -		} -  		key->both.offset |= FUT_OFF_INODE; /* inode-based key */ -		key->shared.inode = inode; +		key->shared.i_seq = get_inode_sequence_number(inode);  		key->shared.pgoff = basepage_index(tail);  		rcu_read_unlock();  	} @@ -733,7 +676,6 @@ out:  static inline void put_futex_key(union futex_key *key)  { -	drop_futex_key_refs(key);  }  /** @@ -1723,10 +1665,9 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)  		oparg = 1 << oparg;  	} -	if (!access_ok(uaddr, sizeof(u32))) -		return -EFAULT; - +	pagefault_disable();  	ret = arch_futex_atomic_op_inuser(op, oparg, &oldval, uaddr); +	pagefault_enable();  	if (ret)  		return ret; @@ -1868,7 +1809,6 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,  		plist_add(&q->list, &hb2->chain);  		q->lock_ptr = &hb2->lock;  	} -	get_futex_key_refs(key2);  	q->key = *key2;  } @@ -1890,7 +1830,6 @@ static inline  void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,  			   struct futex_hash_bucket *hb)  { -	get_futex_key_refs(key);  	q->key = *key;  	__unqueue_futex(q); @@ -2001,7 +1940,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,  			 u32 *cmpval, int requeue_pi)  {  	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; -	int drop_count = 0, task_count = 0, ret; +	int task_count = 0, ret;  	struct futex_pi_state *pi_state = NULL;  	struct futex_hash_bucket *hb1, *hb2;  	struct futex_q *this, *next; @@ -2122,7 +2061,6 @@ retry_private:  		 */  		if (ret > 0) {  			WARN_ON(pi_state); -			drop_count++;  			task_count++;  			/*  			 * If we acquired the lock, then the user space value @@ -2242,7 +2180,6 @@ retry_private:  				 * doing so.  				 */  				requeue_pi_wake_futex(this, &key2, hb2); -				drop_count++;  				continue;  			} else if (ret) {  				/* @@ -2263,7 +2200,6 @@ retry_private:  			}  		}  		requeue_futex(this, hb1, hb2, &key2); -		drop_count++;  	}  	/* @@ -2278,15 +2214,6 @@ out_unlock:  	wake_up_q(&wake_q);  	hb_waiters_dec(hb2); -	/* -	 * drop_futex_key_refs() must be called outside the spinlocks. During -	 * the requeue we moved futex_q's from the hash bucket at key1 to the -	 * one at key2 and updated their key pointer.  We no longer need to -	 * hold the references to key1. -	 */ -	while (--drop_count >= 0) -		drop_futex_key_refs(&key1); -  out_put_keys:  	put_futex_key(&key2);  out_put_key1: @@ -2416,7 +2343,6 @@ retry:  		ret = 1;  	} -	drop_futex_key_refs(&q->key);  	return ret;  }  |