diff options
-rw-r--r-- | Documentation/DocBook/kernel-locking.tmpl | 25 | ||||
-rw-r--r-- | arch/x86/boot/printf.c | 2 | ||||
-rw-r--r-- | drivers/acpi/dispatcher/dsmethod.c | 2 | ||||
-rw-r--r-- | drivers/acpi/executer/exmutex.c | 4 | ||||
-rw-r--r-- | drivers/isdn/hysdn/hycapi.c | 6 | ||||
-rw-r--r-- | drivers/scsi/3w-9xxx.c | 6 | ||||
-rw-r--r-- | drivers/scsi/aha152x.c | 4 | ||||
-rw-r--r-- | drivers/scsi/atp870u.c | 2 | ||||
-rw-r--r-- | drivers/scsi/hptiop.c | 12 | ||||
-rw-r--r-- | drivers/scsi/qla1280.c | 2 | ||||
-rw-r--r-- | drivers/video/aty/atyfb_base.c | 2 | ||||
-rw-r--r-- | drivers/video/aty/radeon_base.c | 4 | ||||
-rw-r--r-- | drivers/video/matrox/matroxfb_base.h | 2 | ||||
-rw-r--r-- | drivers/video/sis/sis_main.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 24 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.h | 19 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 17 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 112 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.h | 3 | ||||
-rw-r--r-- | kernel/module.c | 18 | ||||
-rw-r--r-- | kernel/stop_machine.c | 7 |
23 files changed, 167 insertions, 125 deletions
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 77c42f40be5d..2510763295d0 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -703,6 +703,31 @@ </sect1> </chapter> +<chapter id="trylock-functions"> + <title>The trylock Functions</title> + <para> + There are functions that try to acquire a lock only once and immediately + return a value telling about success or failure to acquire the lock. + They can be used if you need no access to the data protected with the lock + when some other thread is holding the lock. You should acquire the lock + later if you then need access to the data protected with the lock. + </para> + + <para> + <function>spin_trylock()</function> does not spin but returns non-zero if + it acquires the spinlock on the first try or 0 if not. This function can + be used in all contexts like <function>spin_lock</function>: you must have + disabled the contexts that might interrupt you and acquire the spin lock. + </para> + + <para> + <function>mutex_trylock()</function> does not suspend your task + but returns non-zero if it could lock the mutex on the first try + or 0 if not. This function cannot be safely used in hardware or software + interrupt contexts despite not sleeping. + </para> +</chapter> + <chapter id="Examples"> <title>Common Examples</title> <para> diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index c1d00c0274c4..50e47cdbdddd 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -56,7 +56,7 @@ static char *number(char *str, long num, int base, int size, int precision, if (type & LEFT) type &= ~ZEROPAD; if (base < 2 || base > 36) - return 0; + return NULL; c = (type & ZEROPAD) ? '0' : ' '; sign = 0; if (type & SIGN) { diff --git a/drivers/acpi/dispatcher/dsmethod.c b/drivers/acpi/dispatcher/dsmethod.c index e48a3ea03117..2509809a36cf 100644 --- a/drivers/acpi/dispatcher/dsmethod.c +++ b/drivers/acpi/dispatcher/dsmethod.c @@ -565,7 +565,7 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc, acpi_os_release_mutex(method_desc->method. mutex->mutex.os_mutex); - method_desc->method.mutex->mutex.thread_id = 0; + method_desc->method.mutex->mutex.thread_id = NULL; } } diff --git a/drivers/acpi/executer/exmutex.c b/drivers/acpi/executer/exmutex.c index c873ab40cd0e..a8bf3d713e28 100644 --- a/drivers/acpi/executer/exmutex.c +++ b/drivers/acpi/executer/exmutex.c @@ -326,7 +326,7 @@ acpi_status acpi_ex_release_mutex_object(union acpi_operand_object *obj_desc) /* Clear mutex info */ - obj_desc->mutex.thread_id = 0; + obj_desc->mutex.thread_id = NULL; return_ACPI_STATUS(status); } @@ -463,7 +463,7 @@ void acpi_ex_release_all_mutexes(struct acpi_thread_state *thread) /* Mark mutex unowned */ obj_desc->mutex.owner_thread = NULL; - obj_desc->mutex.thread_id = 0; + obj_desc->mutex.thread_id = NULL; /* Update Thread sync_level (Last mutex is the important one) */ diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c index d3999a8e9f88..53f6ad1235db 100644 --- a/drivers/isdn/hysdn/hycapi.c +++ b/drivers/isdn/hysdn/hycapi.c @@ -462,11 +462,11 @@ static int hycapi_read_proc(char *page, char **start, off_t off, default: s = "???"; break; } len += sprintf(page+len, "%-16s %s\n", "type", s); - if ((s = cinfo->version[VER_DRIVER]) != 0) + if ((s = cinfo->version[VER_DRIVER]) != NULL) len += sprintf(page+len, "%-16s %s\n", "ver_driver", s); - if ((s = cinfo->version[VER_CARDTYPE]) != 0) + if ((s = cinfo->version[VER_CARDTYPE]) != NULL) len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s); - if ((s = cinfo->version[VER_SERIAL]) != 0) + if ((s = cinfo->version[VER_SERIAL]) != NULL) len += sprintf(page+len, "%-16s %s\n", "ver_serial", s); len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname); diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c index b31faeccb9cd..867f6fd5c2c0 100644 --- a/drivers/scsi/3w-9xxx.c +++ b/drivers/scsi/3w-9xxx.c @@ -1278,7 +1278,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) error = 0; /* Check for command packet errors */ if (full_command_packet->command.newcommand.status != 0) { - if (tw_dev->srb[request_id] != 0) { + if (tw_dev->srb[request_id] != NULL) { error = twa_fill_sense(tw_dev, request_id, 1, 1); } else { /* Skip ioctl error prints */ @@ -1290,7 +1290,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) /* Check for correct state */ if (tw_dev->state[request_id] != TW_S_POSTED) { - if (tw_dev->srb[request_id] != 0) { + if (tw_dev->srb[request_id] != NULL) { TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1a, "Received a request id that wasn't posted"); TW_CLEAR_ALL_INTERRUPTS(tw_dev); goto twa_interrupt_bail; @@ -1298,7 +1298,7 @@ static irqreturn_t twa_interrupt(int irq, void *dev_instance) } /* Check for internal command completion */ - if (tw_dev->srb[request_id] == 0) { + if (tw_dev->srb[request_id] == NULL) { if (request_id != tw_dev->chrdev_request_id) { if (twa_aen_complete(tw_dev, request_id)) TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1b, "Error completing AEN during attention interrupt"); diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index 1dca1775f4b1..0899cb61e3dd 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -3582,7 +3582,7 @@ static int checksetup(struct aha152x_setup *setup) if (i == ARRAY_SIZE(ports)) return 0; - if ( request_region(setup->io_port, IO_RANGE, "aha152x")==0 ) { + if (!request_region(setup->io_port, IO_RANGE, "aha152x")) { printk(KERN_ERR "aha152x: io port 0x%x busy.\n", setup->io_port); return 0; } @@ -3842,7 +3842,7 @@ static int __init aha152x_init(void) if ((setup_count == 1) && (setup[0].io_port == ports[i])) continue; - if ( request_region(ports[i], IO_RANGE, "aha152x")==0 ) { + if (!request_region(ports[i], IO_RANGE, "aha152x")) { printk(KERN_ERR "aha152x: io port 0x%x busy.\n", ports[i]); continue; } diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c index db6de5e6afb3..7d311541c76c 100644 --- a/drivers/scsi/atp870u.c +++ b/drivers/scsi/atp870u.c @@ -747,7 +747,7 @@ static void send_s870(struct atp_unit *dev,unsigned char c) dev->quhd[c] = 0; } workreq = dev->quereq[c][dev->quhd[c]]; - if (dev->id[c][scmd_id(workreq)].curr_req == 0) { + if (dev->id[c][scmd_id(workreq)].curr_req == NULL) { dev->id[c][scmd_id(workreq)].curr_req = workreq; dev->last_cmd[c] = scmd_id(workreq); goto cmd_subp; diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index aaa48e0c8ed0..da876d3924be 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -444,7 +444,7 @@ static void __iomem *hptiop_map_pci_bar(struct hptiop_hba *hba, int index) if (!(pci_resource_flags(pcidev, index) & IORESOURCE_MEM)) { printk(KERN_ERR "scsi%d: pci resource invalid\n", hba->host->host_no); - return 0; + return NULL; } mem_base_phy = pci_resource_start(pcidev, index); @@ -454,7 +454,7 @@ static void __iomem *hptiop_map_pci_bar(struct hptiop_hba *hba, int index) if (!mem_base_virt) { printk(KERN_ERR "scsi%d: Fail to ioremap memory space\n", hba->host->host_no); - return 0; + return NULL; } return mem_base_virt; } @@ -476,11 +476,11 @@ static void hptiop_unmap_pci_bar_itl(struct hptiop_hba *hba) static int hptiop_map_pci_bar_mv(struct hptiop_hba *hba) { hba->u.mv.regs = hptiop_map_pci_bar(hba, 0); - if (hba->u.mv.regs == 0) + if (hba->u.mv.regs == NULL) return -1; hba->u.mv.mu = hptiop_map_pci_bar(hba, 2); - if (hba->u.mv.mu == 0) { + if (hba->u.mv.mu == NULL) { iounmap(hba->u.mv.regs); return -1; } @@ -1210,8 +1210,8 @@ static void hptiop_remove(struct pci_dev *pcidev) static struct hptiop_adapter_ops hptiop_itl_ops = { .iop_wait_ready = iop_wait_ready_itl, - .internal_memalloc = 0, - .internal_memfree = 0, + .internal_memalloc = NULL, + .internal_memfree = NULL, .map_pci_bar = hptiop_map_pci_bar_itl, .unmap_pci_bar = hptiop_unmap_pci_bar_itl, .enable_intr = hptiop_enable_intr_itl, diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 51e2f299dbbb..3754ab87f89a 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2811,7 +2811,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) /* Check for room in outstanding command list. */ for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS && - ha->outstanding_cmds[cnt] != 0; cnt++); + ha->outstanding_cmds[cnt] != NULL; cnt++); if (cnt >= MAX_OUTSTANDING_COMMANDS) { status = 1; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index e4bcf5376a99..bd4ac0bafecb 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -3356,7 +3356,7 @@ static int __devinit atyfb_setup_generic(struct pci_dev *pdev, struct fb_info *i info->fix.mmio_start = raddr; par->ati_regbase = ioremap(info->fix.mmio_start, 0x1000); - if (par->ati_regbase == 0) + if (par->ati_regbase == NULL) return -ENOMEM; info->fix.mmio_start += par->aux_start ? 0x400 : 0xc00; diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index 72cd0d2f14ec..400e9264e456 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -2277,8 +2277,8 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev, do { rinfo->fb_base = ioremap (rinfo->fb_base_phys, rinfo->mapped_vram); - } while ( rinfo->fb_base == 0 && - ((rinfo->mapped_vram /=2) >= MIN_MAPPED_VRAM) ); + } while (rinfo->fb_base == NULL && + ((rinfo->mapped_vram /= 2) >= MIN_MAPPED_VRAM)); if (rinfo->fb_base == NULL) { printk (KERN_ERR "radeonfb (%s): cannot map FB\n", diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h index f3107ad7e545..95883236c0cd 100644 --- a/drivers/video/matrox/matroxfb_base.h +++ b/drivers/video/matrox/matroxfb_base.h @@ -200,7 +200,7 @@ static inline int mga_ioremap(unsigned long phys, unsigned long size, int flags, virt->vaddr = ioremap_nocache(phys, size); else virt->vaddr = ioremap(phys, size); - return (virt->vaddr == 0); /* 0, !0... 0, error_code in future */ + return (virt->vaddr == NULL); /* 0, !0... 0, error_code in future */ } static inline void mga_iounmap(vaddr_t va) { diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c index 73803624c131..b9343844cd1f 100644 --- a/drivers/video/sis/sis_main.c +++ b/drivers/video/sis/sis_main.c @@ -5787,7 +5787,7 @@ sisfb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } else { struct sis_video_info *countvideo = card_list; ivideo->cardnumber = 1; - while((countvideo = countvideo->next) != 0) + while((countvideo = countvideo->next) != NULL) ivideo->cardnumber++; } diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 5105015a75ad..98e0e86093b4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -387,6 +387,8 @@ _xfs_buf_lookup_pages( if (unlikely(page == NULL)) { if (flags & XBF_READ_AHEAD) { bp->b_page_count = i; + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); return -ENOMEM; } @@ -416,17 +418,24 @@ _xfs_buf_lookup_pages( ASSERT(!PagePrivate(page)); if (!PageUptodate(page)) { page_count--; - if (blocksize < PAGE_CACHE_SIZE && !PagePrivate(page)) { + if (blocksize >= PAGE_CACHE_SIZE) { + if (flags & XBF_READ) + bp->b_flags |= _XBF_PAGE_LOCKED; + } else if (!PagePrivate(page)) { if (test_page_region(page, offset, nbytes)) page_count++; } } - unlock_page(page); bp->b_pages[i] = page; offset = 0; } + if (!(bp->b_flags & _XBF_PAGE_LOCKED)) { + for (i = 0; i < bp->b_page_count; i++) + unlock_page(bp->b_pages[i]); + } + if (page_count == bp->b_page_count) bp->b_flags |= XBF_DONE; @@ -746,6 +755,7 @@ xfs_buf_associate_memory( bp->b_count_desired = len; bp->b_buffer_length = buflen; bp->b_flags |= XBF_MAPPED; + bp->b_flags &= ~_XBF_PAGE_LOCKED; return 0; } @@ -1093,8 +1103,10 @@ _xfs_buf_ioend( xfs_buf_t *bp, int schedule) { - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) { + bp->b_flags &= ~_XBF_PAGE_LOCKED; xfs_buf_ioend(bp, schedule); + } } STATIC void @@ -1125,6 +1137,9 @@ xfs_buf_bio_end_io( if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); + + if (bp->b_flags & _XBF_PAGE_LOCKED) + unlock_page(page); } while (bvec >= bio->bi_io_vec); _xfs_buf_ioend(bp, 1); @@ -1163,7 +1178,8 @@ _xfs_buf_ioapply( * filesystem block size is not smaller than the page size. */ if ((bp->b_buffer_length < PAGE_CACHE_SIZE) && - (bp->b_flags & XBF_READ) && + ((bp->b_flags & (XBF_READ|_XBF_PAGE_LOCKED)) == + (XBF_READ|_XBF_PAGE_LOCKED)) && (blocksize >= PAGE_CACHE_SIZE)) { bio = bio_alloc(GFP_NOIO, 1); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 841d7883528d..f948ec7ba9a4 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -66,6 +66,25 @@ typedef enum { _XBF_PAGES = (1 << 18), /* backed by refcounted pages */ _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */ _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */ + + /* + * Special flag for supporting metadata blocks smaller than a FSB. + * + * In this case we can have multiple xfs_buf_t on a single page and + * need to lock out concurrent xfs_buf_t readers as they only + * serialise access to the buffer. + * + * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation + * between reads of the page. Hence we can have one thread read the + * page and modify it, but then race with another thread that thinks + * the page is not up-to-date and hence reads it again. + * + * The result is that the first modifcation to the page is lost. + * This sort of AGF/AGI reading race can happen when unlinking inodes + * that require truncation and results in the AGI unlinked list + * modifications being lost. + */ + _XBF_PAGE_LOCKED = (1 << 22), } xfs_buf_flags_t; typedef enum { diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 65e78c13d4ae..5f60363b9343 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -184,19 +184,24 @@ xfs_file_release( return -xfs_release(XFS_I(inode)); } +/* + * We ignore the datasync flag here because a datasync is effectively + * identical to an fsync. That is, datasync implies that we need to write + * only the metadata needed to be able to access the data that is written + * if we crash after the call completes. Hence if we are writing beyond + * EOF we have to log the inode size change as well, which makes it a + * full fsync. If we don't write beyond EOF, the inode core will be + * clean in memory and so we don't need to log the inode, just like + * fsync. + */ STATIC int xfs_file_fsync( struct file *filp, struct dentry *dentry, int datasync) { - int flags = FSYNC_WAIT; - - if (datasync) - flags |= FSYNC_DATA; xfs_iflags_clear(XFS_I(dentry->d_inode), XFS_ITRUNCATED); - return -xfs_fsync(XFS_I(dentry->d_inode), flags, - (xfs_off_t)0, (xfs_off_t)-1); + return -xfs_fsync(XFS_I(dentry->d_inode)); } /* diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 9d73cb5c0fc7..25eb2a9e8d9b 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -230,14 +230,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) #define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ /* - * Flags to vop_fsync/reclaim. - */ -#define FSYNC_NOWAIT 0 /* asynchronous flush */ -#define FSYNC_WAIT 0x1 /* synchronous fsync or forced reclaim */ -#define FSYNC_INVAL 0x2 /* flush and invalidate cached data */ -#define FSYNC_DATA 0x4 /* synchronous fsync of data only */ - -/* * Tracking vnode activity. */ #if defined(XFS_INODE_TRACE) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index cf0bb9c1d621..e569bf5d6cf0 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2974,6 +2974,7 @@ xfs_iflush_cluster( xfs_mount_t *mp = ip->i_mount; xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); unsigned long first_index, mask; + unsigned long inodes_per_cluster; int ilist_size; xfs_inode_t **ilist; xfs_inode_t *iq; @@ -2985,8 +2986,9 @@ xfs_iflush_cluster( ASSERT(pag->pagi_inodeok); ASSERT(pag->pag_ici_init); - ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); - ilist = kmem_alloc(ilist_size, KM_MAYFAIL); + inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; + ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); + ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); if (!ilist) return 0; @@ -2995,8 +2997,7 @@ xfs_iflush_cluster( read_lock(&pag->pag_ici_lock); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, - first_index, - XFS_INODE_CLUSTER_SIZE(mp)); + first_index, inodes_per_cluster); if (nr_found == 0) goto out_free; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 70702a60b4bb..e475e3717eb3 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -856,18 +856,14 @@ xfs_readlink( /* * xfs_fsync * - * This is called to sync the inode and its data out to disk. - * We need to hold the I/O lock while flushing the data, and - * the inode lock while flushing the inode. The inode lock CANNOT - * be held while flushing the data, so acquire after we're done - * with that. + * This is called to sync the inode and its data out to disk. We need to hold + * the I/O lock while flushing the data, and the inode lock while flushing the + * inode. The inode lock CANNOT be held while flushing the data, so acquire + * after we're done with that. */ int xfs_fsync( - xfs_inode_t *ip, - int flag, - xfs_off_t start, - xfs_off_t stop) + xfs_inode_t *ip) { xfs_trans_t *tp; int error; @@ -875,103 +871,79 @@ xfs_fsync( xfs_itrace_entry(ip); - ASSERT(start >= 0 && stop >= -1); - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); - if (flag & FSYNC_DATA) - filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* capture size updates in I/O completion before writing the inode. */ + error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + if (error) + return XFS_ERROR(error); /* - * We always need to make sure that the required inode state - * is safe on disk. The vnode might be clean but because - * of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional - * changes to the inode core that have to go to disk. + * We always need to make sure that the required inode state is safe on + * disk. The vnode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. * - * The following code depends on one assumption: that - * any transaction that changes an inode logs the core - * because it has to change some field in the inode core - * (typically nextents or nblocks). That assumption - * implies that any transactions against an inode will - * catch any non-transactional updates. If inode-altering - * transactions exist that violate this assumption, the - * code breaks. Right now, it figures that if the involved - * update_* field is clear and the inode is unpinned, the - * inode is clean. Either it's been flushed or it's been - * committed and the commit has hit the disk unpinning the inode. - * (Note that xfs_inode_item_format() called at commit clears - * the update_* fields.) + * This code relies on the assumption that if the update_* fields + * of the inode are clear and the inode is unpinned then it is clean + * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); - /* If we are flushing data then we care about update_size - * being set, otherwise we care about update_core - */ - if ((flag & FSYNC_DATA) ? - (ip->i_update_size == 0) : - (ip->i_update_core == 0)) { + if (!(ip->i_update_size || ip->i_update_core)) { /* - * Timestamps/size haven't changed since last inode - * flush or inode transaction commit. That means - * either nothing got written or a transaction - * committed which caught the updates. If the - * latter happened and the transaction hasn't - * hit the disk yet, the inode will be still - * be pinned. If it is, force the log. + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { - _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, - XFS_LOG_FORCE | - ((flag & FSYNC_WAIT) - ? XFS_LOG_SYNC : 0), + error = _xfs_log_force(ip->i_mount, (xfs_lsn_t)0, + XFS_LOG_FORCE | XFS_LOG_SYNC, &log_flushed); } else { /* - * If the inode is not pinned and nothing - * has changed we don't need to flush the - * cache. + * If the inode is not pinned and nothing has changed + * we don't need to flush the cache. */ changed = 0; } - error = 0; } else { /* - * Kick off a transaction to log the inode - * core to get the updates. Make it - * sync if FSYNC_WAIT is passed in (which - * is done by everybody but specfs). The - * sync transaction will also force the log. + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); - if ((error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(ip->i_mount), - 0, 0, 0))) { + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); + if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* - * Note - it's possible that we might have pushed - * ourselves out of the way during trans_reserve - * which would flush the inode. But there's no - * guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer - * could be pinned anyway if it's part of an - * inode in another recent transaction. So we - * play it safe and fire off the transaction anyway. + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (flag & FSYNC_WAIT) - xfs_trans_set_sync(tp); + xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 8abe8f186e20..57335ba4ce53 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -18,8 +18,7 @@ int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, struct cred *credp); int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_fsync(struct xfs_inode *ip, int flag, xfs_off_t start, - xfs_off_t stop); +int xfs_fsync(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, diff --git a/kernel/module.c b/kernel/module.c index f5e9491ef7ac..5f80478b746d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1337,7 +1337,19 @@ out_unreg: kobject_put(&mod->mkobj.kobj); return err; } -#endif + +static void mod_sysfs_fini(struct module *mod) +{ + kobject_put(&mod->mkobj.kobj); +} + +#else /* CONFIG_SYSFS */ + +static void mod_sysfs_fini(struct module *mod) +{ +} + +#endif /* CONFIG_SYSFS */ static void mod_kobject_remove(struct module *mod) { @@ -1345,7 +1357,7 @@ static void mod_kobject_remove(struct module *mod) module_param_sysfs_remove(mod); kobject_put(mod->mkobj.drivers_dir); kobject_put(mod->holders_dir); - kobject_put(&mod->mkobj.kobj); + mod_sysfs_fini(mod); } /* @@ -1780,7 +1792,7 @@ static struct module *load_module(void __user *umod, /* Sanity checks against insmoding binaries or wrong arch, weird elf version */ - if (memcmp(hdr->e_ident, ELFMAG, 4) != 0 + if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 || hdr->e_type != ET_REL || !elf_check_arch(hdr) || hdr->e_shentsize != sizeof(*sechdrs)) { diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 0101aeef7ed7..b7350bbfb076 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -62,8 +62,7 @@ static int stopmachine(void *cpu) * help our sisters onto their CPUs. */ if (!prepared && !irqs_disabled) yield(); - else - cpu_relax(); + cpu_relax(); } /* Ack: we are exiting. */ @@ -106,8 +105,10 @@ static int stop_machine(void) } /* Wait for them all to come to life. */ - while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) + while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads) { yield(); + cpu_relax(); + } /* If some failed, kill them all. */ if (ret < 0) { |