diff options
Diffstat (limited to 'fs/jbd2/commit.c')
| -rw-r--r-- | fs/jbd2/commit.c | 184 | 
1 files changed, 77 insertions, 107 deletions
| diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0f53946f13c1..559bec1a37b4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -30,15 +30,22 @@  #include <trace/events/jbd2.h>  /* - * Default IO end handler for temporary BJ_IO buffer_heads. + * IO end handler for temporary buffer_heads handling writes to the journal.   */  static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)  { +	struct buffer_head *orig_bh = bh->b_private; +  	BUFFER_TRACE(bh, "");  	if (uptodate)  		set_buffer_uptodate(bh);  	else  		clear_buffer_uptodate(bh); +	if (orig_bh) { +		clear_bit_unlock(BH_Shadow, &orig_bh->b_state); +		smp_mb__after_clear_bit(); +		wake_up_bit(&orig_bh->b_state, BH_Shadow); +	}  	unlock_buffer(bh);  } @@ -85,8 +92,7 @@ nope:  	__brelse(bh);  } -static void jbd2_commit_block_csum_set(journal_t *j, -				       struct journal_head *descriptor) +static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)  {  	struct commit_header *h;  	__u32 csum; @@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j,  	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))  		return; -	h = (struct commit_header *)(jh2bh(descriptor)->b_data); +	h = (struct commit_header *)(bh->b_data);  	h->h_chksum_type = 0;  	h->h_chksum_size = 0;  	h->h_chksum[0] = 0; -	csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, -			   j->j_blocksize); +	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);  	h->h_chksum[0] = cpu_to_be32(csum);  } @@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal,  					struct buffer_head **cbh,  					__u32 crc32_sum)  { -	struct journal_head *descriptor;  	struct commit_header *tmp;  	struct buffer_head *bh;  	int ret; @@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal,  	if (is_journal_aborted(journal))  		return 0; -	descriptor = jbd2_journal_get_descriptor_buffer(journal); -	if (!descriptor) +	bh = jbd2_journal_get_descriptor_buffer(journal); +	if (!bh)  		return 1; -	bh = jh2bh(descriptor); -  	tmp = (struct commit_header *)bh->b_data;  	tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);  	tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); @@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal,  		tmp->h_chksum_size 	= JBD2_CRC32_CHKSUM_SIZE;  		tmp->h_chksum[0] 	= cpu_to_be32(crc32_sum);  	} -	jbd2_commit_block_csum_set(journal, descriptor); +	jbd2_commit_block_csum_set(journal, bh); -	JBUFFER_TRACE(descriptor, "submit commit block"); +	BUFFER_TRACE(bh, "submit commit block");  	lock_buffer(bh);  	clear_buffer_dirty(bh);  	set_buffer_uptodate(bh); @@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal,  	if (unlikely(!buffer_uptodate(bh)))  		ret = -EIO;  	put_bh(bh);            /* One for getblk() */ -	jbd2_journal_put_journal_head(bh2jh(bh));  	return ret;  } @@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,  }  static void jbd2_descr_block_csum_set(journal_t *j, -				      struct journal_head *descriptor) +				      struct buffer_head *bh)  {  	struct jbd2_journal_block_tail *tail;  	__u32 csum; @@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j,  	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))  		return; -	tail = (struct jbd2_journal_block_tail *) -			(jh2bh(descriptor)->b_data + j->j_blocksize - +	tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -  			sizeof(struct jbd2_journal_block_tail));  	tail->t_checksum = 0; -	csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, -			   j->j_blocksize); +	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);  	tail->t_checksum = cpu_to_be32(csum);  } @@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,  {  	struct page *page = bh->b_page;  	__u8 *addr; -	__u32 csum; +	__u32 csum32;  	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))  		return;  	sequence = cpu_to_be32(sequence);  	addr = kmap_atomic(page); -	csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, -			  sizeof(sequence)); -	csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), -			  bh->b_size); +	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, +			     sizeof(sequence)); +	csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), +			     bh->b_size);  	kunmap_atomic(addr); -	tag->t_checksum = cpu_to_be32(csum); +	/* We only have space to store the lower 16 bits of the crc32c. */ +	tag->t_checksum = cpu_to_be16(csum32);  }  /*   * jbd2_journal_commit_transaction @@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)  {  	struct transaction_stats_s stats;  	transaction_t *commit_transaction; -	struct journal_head *jh, *new_jh, *descriptor; +	struct journal_head *jh; +	struct buffer_head *descriptor;  	struct buffer_head **wbuf = journal->j_wbuf;  	int bufs;  	int flags; @@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)  	tid_t first_tid;  	int update_tail;  	int csum_size = 0; +	LIST_HEAD(io_bufs); +	LIST_HEAD(log_bufs);  	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))  		csum_size = sizeof(struct jbd2_journal_block_tail); @@ -424,13 +427,13 @@ void jbd2_journal_commit_transaction(journal_t *journal)  	J_ASSERT(journal->j_committing_transaction == NULL);  	commit_transaction = journal->j_running_transaction; -	J_ASSERT(commit_transaction->t_state == T_RUNNING);  	trace_jbd2_start_commit(journal, commit_transaction);  	jbd_debug(1, "JBD2: starting commit of transaction %d\n",  			commit_transaction->t_tid);  	write_lock(&journal->j_state_lock); +	J_ASSERT(commit_transaction->t_state == T_RUNNING);  	commit_transaction->t_state = T_LOCKED;  	trace_jbd2_commit_locking(journal, commit_transaction); @@ -520,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)  	 */  	jbd2_journal_switch_revoke_table(journal); +	/* +	 * Reserved credits cannot be claimed anymore, free them +	 */ +	atomic_sub(atomic_read(&journal->j_reserved_credits), +		   &commit_transaction->t_outstanding_credits); +  	trace_jbd2_commit_flushing(journal, commit_transaction);  	stats.run.rs_flushing = jiffies;  	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, @@ -533,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)  	wake_up(&journal->j_wait_transaction_locked);  	write_unlock(&journal->j_state_lock); -	jbd_debug(3, "JBD2: commit phase 2\n"); +	jbd_debug(3, "JBD2: commit phase 2a\n");  	/*  	 * Now start flushing things to disk, in the order they appear @@ -545,10 +554,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)  	blk_start_plug(&plug);  	jbd2_journal_write_revoke_records(journal, commit_transaction, -					  WRITE_SYNC); +					  &log_bufs, WRITE_SYNC);  	blk_finish_plug(&plug); -	jbd_debug(3, "JBD2: commit phase 2\n"); +	jbd_debug(3, "JBD2: commit phase 2b\n");  	/*  	 * Way to go: we have now written out all of the data for a @@ -571,8 +580,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)  		 atomic_read(&commit_transaction->t_outstanding_credits));  	err = 0; -	descriptor = NULL;  	bufs = 0; +	descriptor = NULL;  	blk_start_plug(&plug);  	while (commit_transaction->t_buffers) { @@ -604,8 +613,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)  		   record the metadata buffer. */  		if (!descriptor) { -			struct buffer_head *bh; -  			J_ASSERT (bufs == 0);  			jbd_debug(4, "JBD2: get descriptor\n"); @@ -616,26 +623,26 @@ void jbd2_journal_commit_transaction(journal_t *journal)  				continue;  			} -			bh = jh2bh(descriptor);  			jbd_debug(4, "JBD2: got buffer %llu (%p)\n", -				(unsigned long long)bh->b_blocknr, bh->b_data); -			header = (journal_header_t *)&bh->b_data[0]; +				(unsigned long long)descriptor->b_blocknr, +				descriptor->b_data); +			header = (journal_header_t *)descriptor->b_data;  			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);  			header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);  			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid); -			tagp = &bh->b_data[sizeof(journal_header_t)]; -			space_left = bh->b_size - sizeof(journal_header_t); +			tagp = &descriptor->b_data[sizeof(journal_header_t)]; +			space_left = descriptor->b_size - +						sizeof(journal_header_t);  			first_tag = 1; -			set_buffer_jwrite(bh); -			set_buffer_dirty(bh); -			wbuf[bufs++] = bh; +			set_buffer_jwrite(descriptor); +			set_buffer_dirty(descriptor); +			wbuf[bufs++] = descriptor;  			/* Record it so that we can wait for IO                             completion later */ -			BUFFER_TRACE(bh, "ph3: file as descriptor"); -			jbd2_journal_file_buffer(descriptor, commit_transaction, -					BJ_LogCtl); +			BUFFER_TRACE(descriptor, "ph3: file as descriptor"); +			jbd2_file_log_bh(&log_bufs, descriptor);  		}  		/* Where is the buffer to be written? */ @@ -658,29 +665,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)  		/* Bump b_count to prevent truncate from stumbling over                     the shadowed buffer!  @@@ This can go if we ever get -                   rid of the BJ_IO/BJ_Shadow pairing of buffers. */ +                   rid of the shadow pairing of buffers. */  		atomic_inc(&jh2bh(jh)->b_count); -		/* Make a temporary IO buffer with which to write it out -                   (this will requeue both the metadata buffer and the -                   temporary IO buffer). new_bh goes on BJ_IO*/ - -		set_bit(BH_JWrite, &jh2bh(jh)->b_state);  		/* -		 * akpm: jbd2_journal_write_metadata_buffer() sets -		 * new_bh->b_transaction to commit_transaction. -		 * We need to clean this up before we release new_bh -		 * (which is of type BJ_IO) +		 * Make a temporary IO buffer with which to write it out +		 * (this will requeue the metadata buffer to BJ_Shadow).  		 */ +		set_bit(BH_JWrite, &jh2bh(jh)->b_state);  		JBUFFER_TRACE(jh, "ph3: write metadata");  		flags = jbd2_journal_write_metadata_buffer(commit_transaction, -						      jh, &new_jh, blocknr); +						jh, &wbuf[bufs], blocknr);  		if (flags < 0) {  			jbd2_journal_abort(journal, flags);  			continue;  		} -		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); -		wbuf[bufs++] = jh2bh(new_jh); +		jbd2_file_log_bh(&io_bufs, wbuf[bufs]);  		/* Record the new block's tag in the current descriptor                     buffer */ @@ -694,10 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)  		tag = (journal_block_tag_t *) tagp;  		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);  		tag->t_flags = cpu_to_be16(tag_flag); -		jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), +		jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],  					commit_transaction->t_tid);  		tagp += tag_bytes;  		space_left -= tag_bytes; +		bufs++;  		if (first_tag) {  			memcpy (tagp, journal->j_uuid, 16); @@ -809,7 +810,7 @@ start_journal_io:             the log.  Before we can commit it, wait for the IO so far to             complete.  Control buffers being written are on the             transaction's t_log_list queue, and metadata buffers are on -           the t_iobuf_list queue. +           the io_bufs list.  	   Wait for the buffers in reverse order.  That way we are  	   less likely to be woken up until all IOs have completed, and @@ -818,47 +819,33 @@ start_journal_io:  	jbd_debug(3, "JBD2: commit phase 3\n"); -	/* -	 * akpm: these are BJ_IO, and j_list_lock is not needed. -	 * See __journal_try_to_free_buffer. -	 */ -wait_for_iobuf: -	while (commit_transaction->t_iobuf_list != NULL) { -		struct buffer_head *bh; +	while (!list_empty(&io_bufs)) { +		struct buffer_head *bh = list_entry(io_bufs.prev, +						    struct buffer_head, +						    b_assoc_buffers); -		jh = commit_transaction->t_iobuf_list->b_tprev; -		bh = jh2bh(jh); -		if (buffer_locked(bh)) { -			wait_on_buffer(bh); -			goto wait_for_iobuf; -		} -		if (cond_resched()) -			goto wait_for_iobuf; +		wait_on_buffer(bh); +		cond_resched();  		if (unlikely(!buffer_uptodate(bh)))  			err = -EIO; - -		clear_buffer_jwrite(bh); - -		JBUFFER_TRACE(jh, "ph4: unfile after journal write"); -		jbd2_journal_unfile_buffer(journal, jh); +		jbd2_unfile_log_bh(bh);  		/* -		 * ->t_iobuf_list should contain only dummy buffer_heads -		 * which were created by jbd2_journal_write_metadata_buffer(). +		 * The list contains temporary buffer heads created by +		 * jbd2_journal_write_metadata_buffer().  		 */  		BUFFER_TRACE(bh, "dumping temporary bh"); -		jbd2_journal_put_journal_head(jh);  		__brelse(bh);  		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);  		free_buffer_head(bh); -		/* We also have to unlock and free the corresponding -                   shadowed buffer */ +		/* We also have to refile the corresponding shadowed buffer */  		jh = commit_transaction->t_shadow_list->b_tprev;  		bh = jh2bh(jh); -		clear_bit(BH_JWrite, &bh->b_state); +		clear_buffer_jwrite(bh);  		J_ASSERT_BH(bh, buffer_jbddirty(bh)); +		J_ASSERT_BH(bh, !buffer_shadow(bh));  		/* The metadata is now released for reuse, but we need                     to remember it against this transaction so that when @@ -866,14 +853,6 @@ wait_for_iobuf:                     required. */  		JBUFFER_TRACE(jh, "file as BJ_Forget");  		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); -		/* -		 * Wake up any transactions which were waiting for this IO to -		 * complete. The barrier must be here so that changes by -		 * jbd2_journal_file_buffer() take effect before wake_up_bit() -		 * does the waitqueue check. -		 */ -		smp_mb(); -		wake_up_bit(&bh->b_state, BH_Unshadow);  		JBUFFER_TRACE(jh, "brelse shadowed buffer");  		__brelse(bh);  	} @@ -883,26 +862,19 @@ wait_for_iobuf:  	jbd_debug(3, "JBD2: commit phase 4\n");  	/* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: -	while (commit_transaction->t_log_list != NULL) { +	while (!list_empty(&log_bufs)) {  		struct buffer_head *bh; -		jh = commit_transaction->t_log_list->b_tprev; -		bh = jh2bh(jh); -		if (buffer_locked(bh)) { -			wait_on_buffer(bh); -			goto wait_for_ctlbuf; -		} -		if (cond_resched()) -			goto wait_for_ctlbuf; +		bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); +		wait_on_buffer(bh); +		cond_resched();  		if (unlikely(!buffer_uptodate(bh)))  			err = -EIO;  		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");  		clear_buffer_jwrite(bh); -		jbd2_journal_unfile_buffer(journal, jh); -		jbd2_journal_put_journal_head(jh); +		jbd2_unfile_log_bh(bh);  		__brelse(bh);		/* One for getblk */  		/* AKPM: bforget here */  	} @@ -952,9 +924,7 @@ wait_for_iobuf:  	J_ASSERT(list_empty(&commit_transaction->t_inode_list));  	J_ASSERT(commit_transaction->t_buffers == NULL);  	J_ASSERT(commit_transaction->t_checkpoint_list == NULL); -	J_ASSERT(commit_transaction->t_iobuf_list == NULL);  	J_ASSERT(commit_transaction->t_shadow_list == NULL); -	J_ASSERT(commit_transaction->t_log_list == NULL);  restart_loop:  	/* |