From 6e7a333eaa522ef73be01caec7a01521490aaf00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: rtc: Limit RTC PIE frequency The RTC pie hrtimer is self rearming. We really need to limit the frequency to something sensible. Thus limit it to the 8192Hz max value from the rtc man documentation Cc: Willy Tarreau Cc: stable@kernel.org Signed-off-by: Thomas Gleixner [jstultz: slightly reworked to use RTC_MAX_FREQ value] Signed-off-by: John Stultz --- include/linux/rtc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea25660..93f4d035076b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -97,6 +97,9 @@ struct rtc_pll_info { #define RTC_AF 0x20 /* Alarm interrupt */ #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + +#define RTC_MAX_FREQ 8192 + #ifdef __KERNEL__ #include -- cgit From b6873807a7143b7d6d8b06809295e559d07d7deb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Jul 2011 12:17:31 +0200 Subject: irq: Track the owner of irq descriptor Interrupt descriptors can be allocated from modules. The interrupts are used by other modules, but we have no refcount on the module which provides the interrupts and there is no way to establish one on the device level as the interrupt using module is agnostic to the fact that the interrupt is provided by a module rather than by some builtin interrupt controller. To prevent removal of the interrupt providing module, we can track the owner of the interrupt descriptor, which also provides the relevant irq chip functions in the irq descriptor. request/setup_irq() can now acquire a refcount on the owner module to prevent unloading. free_irq() drops the refcount. Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/20110711101731.GA13804@Chamillionaire.breakpoint.cc Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 11 ++++++++++- include/linux/irqdesc.h | 1 + kernel/irq/irqdesc.c | 36 ++++++++++++++++++++++++------------ kernel/irq/manage.c | 17 +++++++++++++---- 4 files changed, 48 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index baa397eb9c33..16d6f54ef1dd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -546,7 +547,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) return d->msi_desc; } -int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); +int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner); + +static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, + int node) +{ + return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE); +} + void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 2d921b35212c..150134ac709a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -66,6 +66,7 @@ struct irq_desc { #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif + struct module *owner; const char *name; } ____cacheline_internodealigned_in_smp; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b2..cb65d0360e31 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } static inline int desc_node(struct irq_desc *desc) { return 0; } #endif -static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + struct module *owner) { int cpu; @@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; + desc->owner = owner; for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); @@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif -static struct irq_desc *alloc_desc(int irq, int node) +static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; gfp_t gfp = GFP_KERNEL; @@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) raw_spin_lock_init(&desc->lock); lockdep_set_class(&desc->lock, &irq_desc_lock_class); - desc_set_defaults(irq, desc, node); + desc_set_defaults(irq, desc, node, owner); return desc; @@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) kfree(desc); } -static int alloc_descs(unsigned int start, unsigned int cnt, int node) +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { struct irq_desc *desc; int i; for (i = 0; i < cnt; i++) { - desc = alloc_desc(start + i, node); + desc = alloc_desc(start + i, node, owner); if (!desc) goto err; mutex_lock(&sparse_irq_lock); @@ -227,7 +230,7 @@ int __init early_irq_init(void) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { - desc = alloc_desc(i, node); + desc = alloc_desc(i, node, NULL); set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } @@ -261,7 +264,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], GFP_KERNEL, node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - desc_set_defaults(i, &desc[i], node); + desc_set_defaults(i, &desc[i], node, NULL); } return arch_early_irq_init(); } @@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) dynamic_irq_cleanup(irq); } -static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + } return start; } @@ -337,7 +348,8 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * Returns the first irq number or error code */ int __ref -irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner) { int start, ret; @@ -366,13 +378,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node); + return alloc_descs(start, cnt, node, owner); err: mutex_unlock(&sparse_irq_lock); return ret; } -EXPORT_SYMBOL_GPL(irq_alloc_descs); +EXPORT_SYMBOL_GPL(__irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -440,7 +452,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, desc_node(desc)); + desc_set_defaults(irq, desc, desc_node(desc), NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3f9cd4799da7..2e9425889fa8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; /* * Some drivers like serial.c use request_irq() heavily, * so we have to be careful not to interfere with a @@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) - return -EINVAL; + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) t = kthread_create(irq_thread, new, "irq/%d-%s", irq, new->name); - if (IS_ERR(t)) - return PTR_ERR(t); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto out_mput; + } /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1095,6 +1101,8 @@ out_thread: kthread_stop(t); put_task_struct(t); } +out_mput: + module_put(desc->owner); return ret; } @@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) put_task_struct(action->thread); } + module_put(desc->owner); return action; } -- cgit From aa387cc895672b00f807ad7c734a2defaf677712 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 31 Jul 2011 22:05:09 +0200 Subject: block: add bsg helper library This moves the FC classes bsg code to the block layer and makes it a lib so that other classes like iscsi and SAS can use it. It is helpful because working with the request queue, bios, creating scatterlists, etc are a pain that the LLD does not have to worry about with normal IOs and should not have to worry about for bsg requests. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- block/Kconfig | 10 ++ block/Makefile | 1 + block/bsg-lib.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 + include/linux/bsg-lib.h | 73 ++++++++++++ 5 files changed, 385 insertions(+) create mode 100644 block/bsg-lib.c create mode 100644 include/linux/bsg-lib.h (limited to 'include/linux') diff --git a/block/Kconfig b/block/Kconfig index 60be1e0455da..e97934eececa 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -65,6 +65,16 @@ config BLK_DEV_BSG If unsure, say Y. +config BLK_DEV_BSGLIB + bool "Block layer SG support v4 helper lib" + default n + select BLK_DEV_BSG + help + Subsystems will normally enable this if needed. Users will not + normally need to manually enable this. + + If unsure, say N. + config BLK_DEV_INTEGRITY bool "Block layer data integrity support" ---help--- diff --git a/block/Makefile b/block/Makefile index 0fec4b3fab51..514c6e4f427a 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 000000000000..f8c0a61a529c --- /dev/null +++ b/block/bsg-lib.c @@ -0,0 +1,297 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include + +/** + * bsg_destroy_job - routine to teardown/delete a bsg job + * @job: bsg_job that is to be torn down + */ +static void bsg_destroy_job(struct bsg_job *job) +{ + put_device(job->dev); /* release reference for the request */ + + kfree(job->request_payload.sg_list); + kfree(job->reply_payload.sg_list); + kfree(job); +} + +/** + * bsg_job_done - completion routine for bsg requests + * @job: bsg_job that is complete + * @result: job reply result + * @reply_payload_rcv_len: length of payload recvd + * + * The LLD should call this when the bsg job has completed. + */ +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len) +{ + struct request *req = job->req; + struct request *rsp = req->next_rq; + int err; + + err = job->req->errors = result; + if (err < 0) + /* we're only returning the result field in the reply */ + job->req->sense_len = sizeof(u32); + else + job->req->sense_len = job->reply_len; + /* we assume all request payload was transferred, residual == 0 */ + req->resid_len = 0; + + if (rsp) { + WARN_ON(reply_payload_rcv_len > rsp->resid_len); + + /* set reply (bidi) residual */ + rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + } + blk_complete_request(req); +} +EXPORT_SYMBOL_GPL(bsg_job_done); + +/** + * bsg_softirq_done - softirq done routine for destroying the bsg requests + * @rq: BSG request that holds the job to be destroyed + */ +static void bsg_softirq_done(struct request *rq) +{ + struct bsg_job *job = rq->special; + + blk_end_request_all(rq, rq->errors); + bsg_destroy_job(job); +} + +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) +{ + size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); + + BUG_ON(!req->nr_phys_segments); + + buf->sg_list = kzalloc(sz, GFP_KERNEL); + if (!buf->sg_list) + return -ENOMEM; + sg_init_table(buf->sg_list, req->nr_phys_segments); + buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); + buf->payload_len = blk_rq_bytes(req); + return 0; +} + +/** + * bsg_create_job - create the bsg_job structure for the bsg request + * @dev: device that is being sent the bsg request + * @req: BSG request that needs a job structure + */ +static int bsg_create_job(struct device *dev, struct request *req) +{ + struct request *rsp = req->next_rq; + struct request_queue *q = req->q; + struct bsg_job *job; + int ret; + + BUG_ON(req->special); + + job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); + if (!job) + return -ENOMEM; + + req->special = job; + job->req = req; + if (q->bsg_job_size) + job->dd_data = (void *)&job[1]; + job->request = req->cmd; + job->request_len = req->cmd_len; + job->reply = req->sense; + job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer + * allocated */ + if (req->bio) { + ret = bsg_map_buffer(&job->request_payload, req); + if (ret) + goto failjob_rls_job; + } + if (rsp && rsp->bio) { + ret = bsg_map_buffer(&job->reply_payload, rsp); + if (ret) + goto failjob_rls_rqst_payload; + } + job->dev = dev; + /* take a reference for the request */ + get_device(job->dev); + return 0; + +failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); +failjob_rls_job: + kfree(job); + return -ENOMEM; +} + +/* + * bsg_goose_queue - restart queue in case it was stopped + * @q: request q to be restarted + */ +void bsg_goose_queue(struct request_queue *q) +{ + if (!q) + return; + + blk_run_queue_async(q); +} +EXPORT_SYMBOL_GPL(bsg_goose_queue); + +/** + * bsg_request_fn - generic handler for bsg requests + * @q: request queue to manage + * + * On error the create_bsg_job function should return a -Exyz error value + * that will be set to the req->errors. + * + * Drivers/subsys should pass this to the queue init function. + */ +void bsg_request_fn(struct request_queue *q) +{ + struct device *dev = q->queuedata; + struct request *req; + struct bsg_job *job; + int ret; + + if (!get_device(dev)) + return; + + while (1) { + req = blk_fetch_request(q); + if (!req) + break; + spin_unlock_irq(q->queue_lock); + + ret = bsg_create_job(dev, req); + if (ret) { + req->errors = ret; + blk_end_request_all(req, ret); + spin_lock_irq(q->queue_lock); + continue; + } + + job = req->special; + ret = q->bsg_job_fn(job); + spin_lock_irq(q->queue_lock); + if (ret) + break; + } + + spin_unlock_irq(q->queue_lock); + put_device(dev); + spin_lock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(bsg_request_fn); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @q: request queue setup by caller + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * + * The caller should have setup the reuqest queue with bsg_request_fn + * as the request_fn. + */ +int bsg_setup_queue(struct device *dev, struct request_queue *q, + char *name, bsg_job_fn *job_fn, int dd_job_size) +{ + int ret; + + q->queuedata = dev; + q->bsg_job_size = dd_job_size; + q->bsg_job_fn = job_fn; + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + blk_queue_softirq_done(q, bsg_softirq_done); + blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + + ret = bsg_register_queue(q, dev, name, NULL); + if (ret) { + printk(KERN_ERR "%s: bsg interface failed to " + "initialize - register queue\n", dev->kobj.name); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_remove_queue - Deletes the bsg dev from the q + * @q: the request_queue that is to be torn down. + * + * Notes: + * Before unregistering the queue empty any requests that are blocked + */ +void bsg_remove_queue(struct request_queue *q) +{ + struct request *req; /* block request */ + int counts; /* totals for request_list count and starved */ + + if (!q) + return; + + /* Stop taking in new requests */ + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + + /* drain all requests in the queue */ + while (1) { + /* need the lock to fetch a request + * this may fetch the same reqeust as the previous pass + */ + req = blk_fetch_request(q); + /* save requests in use and starved */ + counts = q->rq.count[0] + q->rq.count[1] + + q->rq.starved[0] + q->rq.starved[1]; + spin_unlock_irq(q->queue_lock); + /* any requests still outstanding? */ + if (counts == 0) + break; + + /* This may be the same req as the previous iteration, + * always send the blk_end_request_all after a prefetch. + * It is not okay to not end the request because the + * prefetch started the request. + */ + if (req) { + /* return -ENXIO to indicate that this queue is + * going away + */ + req->errors = -ENXIO; + blk_end_request_all(req, -ENXIO); + } + + msleep(200); /* allow bsg to possibly finish */ + spin_lock_irq(q->queue_lock); + } + bsg_unregister_queue(q); +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc9..847928546076 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,6 +30,7 @@ struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; +struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -209,6 +210,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); +typedef int (bsg_job_fn) (struct bsg_job *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -375,6 +377,8 @@ struct request_queue { struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) + bsg_job_fn *bsg_job_fn; + int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h new file mode 100644 index 000000000000..f55ab8cdc106 --- /dev/null +++ b/include/linux/bsg-lib.h @@ -0,0 +1,73 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _BLK_BSG_ +#define _BLK_BSG_ + +#include + +struct request; +struct device; +struct scatterlist; +struct request_queue; + +struct bsg_buffer { + unsigned int payload_len; + int sg_cnt; + struct scatterlist *sg_list; +}; + +struct bsg_job { + struct device *dev; + struct request *req; + + /* Transport/driver specific request/reply structs */ + void *request; + void *reply; + + unsigned int request_len; + unsigned int reply_len; + /* + * On entry : reply_len indicates the buffer size allocated for + * the reply. + * + * Upon completion : the message handler must set reply_len + * to indicates the size of the reply to be returned to the + * caller. + */ + + /* DMA payloads for the request/response */ + struct bsg_buffer request_payload; + struct bsg_buffer reply_payload; + + void *dd_data; /* Used for driver-specific storage */ +}; + +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len); +int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, + bsg_job_fn *job_fn, int dd_job_size); +void bsg_request_fn(struct request_queue *q); +void bsg_remove_queue(struct request_queue *q); +void bsg_goose_queue(struct request_queue *q); + +#endif -- cgit From 34dd82afd27da2537199d7f71f1542501c6f96e7 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: replace linked list of allocated devices with an idr index Replace the linked list, that keeps track of allocated devices, with an idr index to allow a more efficient lookup of devices. Cc: Tejun Heo Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 152 +++++++++++++++++++++++++++------------------------ include/linux/loop.h | 1 - 2 files changed, 80 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da78212b..f58532e77777 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -78,8 +78,8 @@ #include -static LIST_HEAD(loop_devices); -static DEFINE_MUTEX(loop_devices_mutex); +static DEFINE_IDR(loop_index_idr); +static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; @@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file) static ssize_t loop_attr_show(struct device *dev, char *page, ssize_t (*callback)(struct loop_device *, char *)) { - struct loop_device *l, *lo = NULL; - - mutex_lock(&loop_devices_mutex); - list_for_each_entry(l, &loop_devices, lo_list) - if (disk_to_dev(l->lo_disk) == dev) { - lo = l; - break; - } - mutex_unlock(&loop_devices_mutex); + struct gendisk *disk = dev_to_disk(dev); + struct loop_device *lo = disk->private_data; - return lo ? callback(lo, page) : -EIO; + return callback(lo, page); } #define LOOP_ATTR_RO(_name) \ @@ -1557,40 +1550,64 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } +static int unregister_transfer_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_func_table *xfer = data; + + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_encryption == xfer) + loop_release_xfer(lo); + mutex_unlock(&lo->lo_ctl_mutex); + return 0; +} + int loop_unregister_transfer(int number) { unsigned int n = number; - struct loop_device *lo; struct loop_func_table *xfer; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - - list_for_each_entry(lo, &loop_devices, lo_list) { - mutex_lock(&lo->lo_ctl_mutex); - - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - - mutex_unlock(&lo->lo_ctl_mutex); - } - + idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); return 0; } EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); -static struct loop_device *loop_alloc(int i) +static int loop_add(struct loop_device **l, int i) { struct loop_device *lo; struct gendisk *disk; + int err; lo = kzalloc(sizeof(*lo), GFP_KERNEL); - if (!lo) + if (!lo) { + err = -ENOMEM; goto out; + } + + err = idr_pre_get(&loop_index_idr, GFP_KERNEL); + if (err < 0) + goto out_free_dev; + + if (i >= 0) { + int m; + + /* create specific i in the index */ + err = idr_get_new_above(&loop_index_idr, lo, i, &m); + if (err >= 0 && i != m) { + idr_remove(&loop_index_idr, m); + err = -EEXIST; + } + } else { + err = -EINVAL; + } + if (err < 0) + goto out_free_dev; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@ -1611,56 +1628,54 @@ static struct loop_device *loop_alloc(int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); - return lo; + add_disk(disk); + *l = lo; + return lo->lo_number; out_free_queue: blk_cleanup_queue(lo->lo_queue); out_free_dev: kfree(lo); out: - return NULL; + return err; } -static void loop_free(struct loop_device *lo) +static void loop_remove(struct loop_device *lo) { + del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); put_disk(lo->lo_disk); - list_del(&lo->lo_list); kfree(lo); } -static struct loop_device *loop_init_one(int i) +static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; + int ret = -ENODEV; - list_for_each_entry(lo, &loop_devices, lo_list) { - if (lo->lo_number == i) - return lo; - } - - lo = loop_alloc(i); + lo = idr_find(&loop_index_idr, i); if (lo) { - add_disk(lo->lo_disk); - list_add_tail(&lo->lo_list, &loop_devices); + *l = lo; + ret = lo->lo_number; } - return lo; -} - -static void loop_del_one(struct loop_device *lo) -{ - del_gendisk(lo->lo_disk); - loop_free(lo); + return ret; } static struct kobject *loop_probe(dev_t dev, int *part, void *data) { struct loop_device *lo; struct kobject *kobj; + int err; - mutex_lock(&loop_devices_mutex); - lo = loop_init_one(MINOR(dev) >> part_shift); - kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); - mutex_unlock(&loop_devices_mutex); + mutex_lock(&loop_index_mutex); + err = loop_lookup(&lo, MINOR(dev) >> part_shift); + if (err < 0) + err = loop_add(&lo, MINOR(dev) >> part_shift); + if (err < 0) + kobj = ERR_PTR(err); + else + kobj = get_disk(lo->lo_disk); + mutex_unlock(&loop_index_mutex); *part = 0; return kobj; @@ -1670,7 +1685,7 @@ static int __init loop_init(void) { int i, nr; unsigned long range; - struct loop_device *lo, *next; + struct loop_device *lo; /* * loop module now has a feature to instantiate underlying device @@ -1719,43 +1734,36 @@ static int __init loop_init(void) if (register_blkdev(LOOP_MAJOR, "loop")) return -EIO; - for (i = 0; i < nr; i++) { - lo = loop_alloc(i); - if (!lo) - goto Enomem; - list_add_tail(&lo->lo_list, &loop_devices); - } - - /* point of no return */ - - list_for_each_entry(lo, &loop_devices, lo_list) - add_disk(lo->lo_disk); - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); + /* pre-create number devices of devices given by config or max_loop */ + mutex_lock(&loop_index_mutex); + for (i = 0; i < nr; i++) + loop_add(&lo, i); + mutex_unlock(&loop_index_mutex); + printk(KERN_INFO "loop: module loaded\n"); return 0; +} -Enomem: - printk(KERN_INFO "loop: out of memory\n"); - - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_free(lo); +static int loop_exit_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; - unregister_blkdev(LOOP_MAJOR, "loop"); - return -ENOMEM; + loop_remove(lo); + return 0; } static void __exit loop_exit(void) { unsigned long range; - struct loop_device *lo, *next; range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_del_one(lo); + idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); + idr_remove_all(&loop_index_idr); + idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); diff --git a/include/linux/loop.h b/include/linux/loop.h index 66c194e2d9b9..5f08d18fa148 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -64,7 +64,6 @@ struct loop_device { struct request_queue *lo_queue; struct gendisk *lo_disk; - struct list_head lo_list; }; #endif /* __KERNEL__ */ -- cgit From 770fe30a46a12b6fb6b63fbe1737654d28e84844 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: add management interface for on-demand device allocation Loop devices today have a fixed pre-allocated number of usually 8. The number can only be changed at module init time. To find a free device to use, /dev/loop%i needs to be scanned, and all devices need to be opened until a free one is possibly found. This adds a new /dev/loop-control device node, that allows to dynamically find or allocate a free device, and to add and remove loop devices from the running system: LOOP_CTL_ADD adds a specific device. Arg is the number of the device. It returns the device i or a negative error code. LOOP_CTL_REMOVE removes a specific device, Arg is the number the device. It returns the device i or a negative error code. LOOP_CTL_GET_FREE finds the next unbound device or allocates a new one. No arg is given. It returns the device i or a negative error code. The loop kernel module gets automatically loaded when /dev/loop-control is accessed the first time. The alias specified in the module, instructs udev to create this 'dead' device node, even when the module is not loaded. Example: cfd = open("/dev/loop-control", O_RDWR); # add a new specific loop device err = ioctl(cfd, LOOP_CTL_ADD, devnr); # remove a specific loop device err = ioctl(cfd, LOOP_CTL_REMOVE, devnr); # find or allocate a free loop device to use devnr = ioctl(cfd, LOOP_CTL_GET_FREE); sprintf(loopname, "/dev/loop%i", devnr); ffd = open("backing-file", O_RDWR); lfd = open(loopname, O_RDWR); err = ioctl(lfd, LOOP_SET_FD, ffd); Cc: Tejun Heo Cc: Karel Zak Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 120 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/loop.h | 4 ++ include/linux/miscdevice.h | 1 + 3 files changed, 121 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f58532e77777..5c9edf944879 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -75,7 +75,7 @@ #include #include #include - +#include #include static DEFINE_IDR(loop_index_idr); @@ -1478,13 +1478,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo = bdev->bd_disk->private_data; + struct loop_device *lo; + int err = 0; + + mutex_lock(&loop_index_mutex); + lo = bdev->bd_disk->private_data; + if (!lo) { + err = -ENXIO; + goto out; + } mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); - - return 0; +out: + mutex_unlock(&loop_index_mutex); + return err; } static int lo_release(struct gendisk *disk, fmode_t mode) @@ -1603,6 +1612,13 @@ static int loop_add(struct loop_device **l, int i) idr_remove(&loop_index_idr, m); err = -EEXIST; } + } else if (i == -1) { + int m; + + /* get next free nr */ + err = idr_get_new(&loop_index_idr, lo, &m); + if (err >= 0) + i = m; } else { err = -EINVAL; } @@ -1648,16 +1664,41 @@ static void loop_remove(struct loop_device *lo) kfree(lo); } +static int find_free_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_device **l = data; + + if (lo->lo_state == Lo_unbound) { + *l = lo; + return 1; + } + return 0; +} + static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; int ret = -ENODEV; + if (i < 0) { + int err; + + err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); + if (err == 1) { + *l = lo; + ret = lo->lo_number; + } + goto out; + } + + /* lookup and return a specific i */ lo = idr_find(&loop_index_idr, i); if (lo) { *l = lo; ret = lo->lo_number; } +out: return ret; } @@ -1681,11 +1722,76 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) return kobj; } +static long loop_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct loop_device *lo; + int ret = -ENOSYS; + + mutex_lock(&loop_index_mutex); + switch (cmd) { + case LOOP_CTL_ADD: + ret = loop_lookup(&lo, parm); + if (ret >= 0) { + ret = -EEXIST; + break; + } + ret = loop_add(&lo, parm); + break; + case LOOP_CTL_REMOVE: + ret = loop_lookup(&lo, parm); + if (ret < 0) + break; + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_state != Lo_unbound) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + if (lo->lo_refcnt > 0) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + lo->lo_disk->private_data = NULL; + mutex_unlock(&lo->lo_ctl_mutex); + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); + break; + case LOOP_CTL_GET_FREE: + ret = loop_lookup(&lo, -1); + if (ret >= 0) + break; + ret = loop_add(&lo, -1); + } + mutex_unlock(&loop_index_mutex); + + return ret; +} + +static const struct file_operations loop_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = loop_control_ioctl, + .compat_ioctl = loop_control_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice loop_misc = { + .minor = LOOP_CTRL_MINOR, + .name = "loop-control", + .fops = &loop_ctl_fops, +}; + +MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); +MODULE_ALIAS("devname:loop-control"); + static int __init loop_init(void) { int i, nr; unsigned long range; struct loop_device *lo; + int err; /* * loop module now has a feature to instantiate underlying device @@ -1702,6 +1808,10 @@ static int __init loop_init(void) * device on-demand. */ + err = misc_register(&loop_misc); + if (err < 0) + return err; + part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -1767,6 +1877,8 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); + + misc_deregister(&loop_misc); } module_init(loop_init); diff --git a/include/linux/loop.h b/include/linux/loop.h index 5f08d18fa148..683d69890119 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,4 +160,8 @@ int loop_unregister_transfer(int number); #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 +/* /dev/loop-control interface */ +#define LOOP_CTL_ADD 0x4C80 +#define LOOP_CTL_REMOVE 0x4C81 +#define LOOP_CTL_GET_FREE 0x4C82 #endif diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 18fd13028ba1..c309b1ecdc1c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -40,6 +40,7 @@ #define BTRFS_MINOR 234 #define AUTOFS_MINOR 235 #define MAPPER_CTRL_MINOR 236 +#define LOOP_CTRL_MINOR 237 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 20 Jul 2011 15:20:54 -0500 Subject: PCI: Set PCI-E Max Payload Size on fabric On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 9 +++ drivers/pci/hotplug/pcihp_slot.c | 45 +----------- drivers/pci/pci.c | 67 ++++++++++++++++++ drivers/pci/probe.c | 145 +++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 15 +++- 5 files changed, 236 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ae3cb23cd89b..c95330267f08 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -360,6 +360,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } } + /* After the PCI-E bus has been walked and all devices discovered, + * configure any settings of the fabric that might be necessary. + */ + if (bus) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child, child->self->pcie_mpss); + } + if (!bus) kfree(sd); diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 749fdf070319..753b21aaea61 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ -static int pci_set_payload(struct pci_dev *dev) -{ - int pos, ppos; - u16 pctl, psz; - u16 dctl, dsz, dcap, dmax; - struct pci_dev *parent; - - parent = dev->bus->self; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - - /* Read Device MaxPayload capability and setting */ - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); - pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); - dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); - - /* Read Parent MaxPayload setting */ - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (!ppos) - return 0; - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - - /* If parent payload > device max payload -> error - * If parent payload > device payload -> set speed - * If parent payload <= device payload -> do nothing - */ - if (psz > dmax) - return -1; - else if (psz > dsz) { - dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, - (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + - (psz << 5)); - } - return 0; -} - void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - ret = pci_set_payload(dev); - if (ret) - dev_warn(&dev->dev, "could not set device max payload\n"); + pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 08a95b369d85..466fad6e6ee2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; + /* * The default CLS is used if arch didn't set CLS explicitly and not * all pci devices agree on the same value. Arch can override either @@ -3222,6 +3224,67 @@ out: } EXPORT_SYMBOL(pcie_set_readrq); +/** + * pcie_get_mps - get PCI Express maximum payload size + * @dev: PCI device to query + * + * Returns maximum payload size in bytes + * or appropriate error value. + */ +int pcie_get_mps(struct pci_dev *dev) +{ + int ret, cap; + u16 ctl; + + cap = pci_pcie_cap(dev); + if (!cap) + return -EINVAL; + + ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (!ret) + ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); + + return ret; +} + +/** + * pcie_set_mps - set PCI Express maximum payload size + * @dev: PCI device to query + * @rq: maximum payload size in bytes + * valid values are 128, 256, 512, 1024, 2048, 4096 + * + * If possible sets maximum payload size + */ +int pcie_set_mps(struct pci_dev *dev, int mps) +{ + int cap, err = -EINVAL; + u16 ctl, v; + + if (mps < 128 || mps > 4096 || !is_power_of_2(mps)) + goto out; + + v = ffs(mps) - 8; + if (v > dev->pcie_mpss) + goto out; + v <<= 5; + + cap = pci_pcie_cap(dev); + if (!cap) + goto out; + + err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (err) + goto out; + + if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) { + ctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + ctl |= v; + err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl); + } +out: + return err; +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made @@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pcie_bus_safe", 13)) { + pcie_bus_config = PCIE_BUS_SAFE; + } else if (!strncmp(str, "pcie_bus_perf", 13)) { + pcie_bus_config = PCIE_BUS_PERFORMANCE; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 795c9026d55f..5becf7cd50d8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; + pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); + pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; } void set_pcie_hotplug_bridge(struct pci_dev *pdev) @@ -1326,6 +1328,149 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) return nr; } +static int pcie_find_smpss(struct pci_dev *dev, void *data) +{ + u8 *smpss = data; + + if (!pci_is_pcie(dev)) + return 0; + + /* For PCIE hotplug enabled slots not connected directly to a + * PCI-E root port, there can be problems when hotplugging + * devices. This is due to the possibility of hotplugging a + * device into the fabric with a smaller MPS that the devices + * currently running have configured. Modifying the MPS on the + * running devices could cause a fatal bus error due to an + * incoming frame being larger than the newly configured MPS. + * To work around this, the MPS for the entire fabric must be + * set to the minimum size. Any devices hotplugged into this + * fabric will have the minimum MPS set. If the PCI hotplug + * slot is directly connected to the root port and there are not + * other devices on the fabric (which seems to be the most + * common case), then this is not an issue and MPS discovery + * will occur as normal. + */ + if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || + dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + *smpss = 0; + + if (*smpss > dev->pcie_mpss) + *smpss = dev->pcie_mpss; + + return 0; +} + +static void pcie_write_mps(struct pci_dev *dev, int mps) +{ + int rc, dev_mpss; + + dev_mpss = 128 << dev->pcie_mpss; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + if (dev->bus->self) { + dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", + 128 << dev->bus->self->pcie_mpss); + + /* For "MPS Force Max", the assumption is made that + * downstream communication will never be larger than + * the MRRS. So, the MPS only needs to be configured + * for the upstream communication. This being the case, + * walk from the top down and set the MPS of the child + * to that of the parent bus. + */ + mps = 128 << dev->bus->self->pcie_mpss; + if (mps > dev_mpss) + dev_warn(&dev->dev, "MPS configured higher than" + " maximum supported by the device. If" + " a bus issue occurs, try running with" + " pci=pcie_bus_safe.\n"); + } + + dev->pcie_mpss = ffs(mps) - 8; + } + + rc = pcie_set_mps(dev, mps); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MPS\n"); +} + +static void pcie_write_mrrs(struct pci_dev *dev, int mps) +{ + int rc, mrrs; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + int dev_mpss = 128 << dev->pcie_mpss; + + /* For Max performance, the MRRS must be set to the largest + * supported value. However, it cannot be configured larger + * than the MPS the device or the bus can support. This assumes + * that the largest MRRS available on the device cannot be + * smaller than the device MPSS. + */ + mrrs = mps < dev_mpss ? mps : dev_mpss; + } else + /* In the "safe" case, configure the MRRS for fairness on the + * bus by making all devices have the same size + */ + mrrs = mps; + + + /* MRRS is a R/W register. Invalid values can be written, but a + * subsiquent read will verify if the value is acceptable or not. + * If the MRRS value provided is not acceptable (e.g., too large), + * shrink the value until it is acceptable to the HW. + */ + while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { + rc = pcie_set_readrq(dev, mrrs); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); + + mrrs /= 2; + } +} + +static int pcie_bus_configure_set(struct pci_dev *dev, void *data) +{ + int mps = 128 << *(u8 *)data; + + if (!pci_is_pcie(dev)) + return 0; + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + pcie_write_mps(dev, mps); + pcie_write_mrrs(dev, mps); + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + return 0; +} + +/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, + * parents then children fashion. If this changes, then this code will not + * work as designed. + */ +void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) +{ + u8 smpss = mpss; + + if (!bus->self) + return; + + if (!pci_is_pcie(bus->self)) + return; + + if (pcie_bus_config == PCIE_BUS_SAFE) { + pcie_find_smpss(bus->self, &smpss); + pci_walk_bus(bus, pcie_find_smpss, &smpss); + } + + pcie_bus_configure_set(bus->self, &smpss); + pci_walk_bus(bus, pcie_bus_configure_set, &smpss); +} + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->secondary; diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b3b724..1ff9bbafd932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -251,7 +251,8 @@ struct pci_dev { u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 pcie_cap; /* PCI-E capability offset */ - u8 pcie_type; /* PCI-E device/port type */ + u8 pcie_type:4; /* PCI-E device/port type */ + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -617,6 +618,16 @@ struct pci_driver { /* these external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI +extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); + +enum pcie_bus_config_types { + PCIE_BUS_PERFORMANCE, + PCIE_BUS_SAFE, + PCIE_BUS_PEER2PEER, +}; + +extern enum pcie_bus_config_types pcie_bus_config; + extern struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch specific pci @@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pcie_get_mps(struct pci_dev *dev); +int pcie_set_mps(struct pci_dev *dev, int mps); int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit From 2bbc6942273b5b3097bd265d82227bdd84b351b2 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:39 -0700 Subject: PCI : ability to relocate assigned pci-resources Currently pci-bridges are allocated enough resources to satisfy their immediate requirements. Any additional resource-requests fail if additional free space, contiguous to the one already allocated, is not available. This behavior is not reasonable since sufficient contiguous resources, that can satisfy the request, are available at a different location. This patch provides the ability to expand and relocate a allocated resource. v2: Changelog: Fixed size calculation in pci_reassign_resource() v3: Changelog : Split this patch. The resource.c changes are already upstream. All the pci driver changes are in here. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 27 +++++---- drivers/pci/setup-res.c | 152 ++++++++++++++++++++++++++++++------------------ include/linux/pci.h | 1 + 3 files changed, 113 insertions(+), 67 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4409cd0e15fa..1796c6ffe91c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -34,6 +34,7 @@ struct resource_list_x { resource_size_t start; resource_size_t end; resource_size_t add_size; + resource_size_t min_align; unsigned long flags; }; @@ -65,7 +66,7 @@ void pci_realloc(void) */ static void add_to_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res, - resource_size_t add_size) + resource_size_t add_size, resource_size_t min_align) { struct resource_list_x *list = head; struct resource_list_x *ln = list->next; @@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head, tmp->end = res->end; tmp->flags = res->flags; tmp->add_size = add_size; + tmp->min_align = min_align; list->next = tmp; } static void add_to_failed_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res) { - add_to_list(head, dev, res, 0); + add_to_list(head, dev, res, + 0 /* dont care */, + 0 /* dont care */); } static void __dev_sort_resources(struct pci_dev *dev, @@ -159,13 +163,16 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; - if (!resource_size(res) && add_size) { - res->end = res->start + add_size - 1; - if(pci_assign_resource(list->dev, idx)) + if (!resource_size(res)) { + res->end = res->start + add_size - 1; + if(pci_assign_resource(list->dev, idx)) reset_resource(res); - } else if (add_size) { - adjust_resource(res, res->start, - resource_size(res) + add_size); + } else { + resource_size_t align = list->min_align; + res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); + if (pci_reassign_resource(list->dev, idx, add_size, align)) + dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", + res); } out: tmp = list; @@ -619,7 +626,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, 4096); } /** @@ -722,7 +729,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, min_align); return 1; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 319f359906e8..51a9095c7da4 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev) } #endif /* CONFIG_PCI_QUIRKS */ + + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, - int resno) + int resno, resource_size_t size, resource_size_t align) { struct resource *res = dev->resource + resno; - resource_size_t size, min, align; + resource_size_t min; int ret; - size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = pci_resource_alignment(dev, res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, pcibios_align_resource, dev); } + return ret; +} - if (ret < 0 && dev->fw_addr[resno]) { - struct resource *root, *conflict; - resource_size_t start, end; +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, + int resno, resource_size_t size) +{ + struct resource *root, *conflict; + resource_size_t start, end; + int ret = 0; - /* - * If we failed to assign anything, let's try the address - * where firmware left it. That at least has a chance of - * working, which is better than just leaving it disabled. - */ + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + + start = res->start; + end = res->end; + res->start = dev->fw_addr[resno]; + res->end = res->start + size - 1; + dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", + resno, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", resno, + res, conflict->name, conflict); + res->start = start; + res->end = end; + ret = 1; + } + return ret; +} + +static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + struct pci_bus *bus; + int ret; + char *type; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; + bus = dev->bus; + while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) { + if (!bus->parent || !bus->self->transparent) + break; + bus = bus->parent; + } + + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; else - root = &iomem_resource; - - start = res->start; - end = res->end; - res->start = dev->fw_addr[resno]; - res->end = res->start + size - 1; - dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", - resno, res); - conflict = request_resource_conflict(root, res); - if (conflict) { - dev_info(&dev->dev, - "BAR %d: %pR conflicts with %s %pR\n", resno, - res, conflict->name, conflict); - res->start = start; - res->end = end; - } else - ret = 0; + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); } + return ret; +} + +int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize, + resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + resource_size_t new_size; + int ret; + + if (!res->parent) { + dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " + "\n", resno, res); + return -EINVAL; + } + + new_size = resource_size(res) + addsize + min_align; + ret = _pci_assign_resource(dev, resno, new_size, min_align); if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } - return ret; } int pci_assign_resource(struct pci_dev *dev, int resno) { struct resource *res = dev->resource + resno; - resource_size_t align; + resource_size_t align, size; struct pci_bus *bus; int ret; - char *type; align = pci_resource_alignment(dev, res); if (!align) { @@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } bus = dev->bus; - while ((ret = __pci_assign_resource(bus, dev, resno))) { - if (bus->parent && bus->self->transparent) - bus = bus->parent; - else - bus = NULL; - if (bus) - continue; - break; - } + size = resource_size(res); + ret = _pci_assign_resource(dev, resno, size, align); - if (ret) { - if (res->flags & IORESOURCE_MEM) - if (res->flags & IORESOURCE_PREFETCH) - type = "mem pref"; - else - type = "mem"; - else if (res->flags & IORESOURCE_IO) - type = "io"; - else - type = "unknown"; - dev_info(&dev->dev, - "BAR %d: can't assign %s (size %#llx)\n", - resno, type, (unsigned long long) resource_size(res)); - } + /* + * If we failed to assign anything, let's try the address + * where firmware left it. That at least has a chance of + * working, which is better than just leaving it disabled. + */ + if (ret < 0 && dev->fw_addr[resno]) + ret = pci_revert_fw_address(res, dev, resno, size); + if (!ret) { + res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); + if (resno < PCI_BRIDGE_RESOURCES) + pci_update_resource(dev, resno); + } return ret; } + /* Sort resources by alignment */ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 1ff9bbafd932..8c230cbcbb48 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -813,6 +813,7 @@ int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ -- cgit From 6602a4baf4d1a73cc4685a39ef859e1c5ddf654c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 7 Aug 2011 22:48:07 -0700 Subject: net: Make userland include of netlink.h more sane. Currently userland will barf when including linux/netlink.h unless it precisely includes sys/socket.h first. The issue is where the definition of "sa_family_t" comes from. We've been back and forth on how to fix this issue in the past, see: http://thread.gmane.org/gmane.linux.debian.devel.bugs.general/622621 http://thread.gmane.org/gmane.linux.network/143380 Ben Hutchings suggested we take a hint from how we handle the sockaddr_storage type. First we define a "__kernel_sa_family_t" to linux/socket.h that is always defined. Then if __KERNEL__ is defined, we also define "sa_family_t" as equal to "__kernel_sa_family_t". Then in places like linux/netlink.h we use __kernel_sa_family_t in user visible datastructures. Reported-by: Michel Machado Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/linux/socket.h | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 2e17c5dbdcb8..180540a84d37 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -29,7 +29,7 @@ #define MAX_LINKS 32 struct sockaddr_nl { - sa_family_t nl_family; /* AF_NETLINK */ + __kernel_sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ __u32 nl_pid; /* port ID */ __u32 nl_groups; /* multicast groups mask */ diff --git a/include/linux/socket.h b/include/linux/socket.h index e17f82266639..d0e77f607a79 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -8,8 +8,10 @@ #define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) /* Implementation specific desired alignment */ +typedef unsigned short __kernel_sa_family_t; + struct __kernel_sockaddr_storage { - unsigned short ss_family; /* address family */ + __kernel_sa_family_t ss_family; /* address family */ /* Following field(s) are implementation specific */ char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; /* space to achieve desired size, */ @@ -35,7 +37,7 @@ struct seq_file; extern void socket_seq_show(struct seq_file *seq); #endif -typedef unsigned short sa_family_t; +typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. -- cgit From 37fb3a30b46237f23cfdf7ee09d49f9888dd13bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 8 Aug 2011 16:08:08 +0200 Subject: fuse: fix flock Commit a9ff4f87 "fuse: support BSD locking semantics" overlooked a number of issues with supporing flock locks over existing POSIX locking infrastructure: - it's not backward compatible, passing flock(2) calls to userspace unconditionally (if userspace sets FUSE_POSIX_LOCKS) - it doesn't cater for the fact that flock locks are automatically unlocked on file release - it doesn't take into account the fact that flock exclusive locks (write locks) don't need an fd opened for write. The last one invalidates the original premise of the patch that flock locks can be emulated with POSIX locks. This patch fixes the first two issues. The last one needs to be fixed in userspace if the filesystem assumed that a write lock will happen only on a file operned for write (as in the case of the current fuse library). Reported-by: Sebastian Pipping Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 ++++++++++- fs/fuse/fuse_i.h | 8 +++++++- fs/fuse/inode.c | 8 +++++++- include/linux/fuse.h | 9 ++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 82a66466a24c..e32784924355 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -245,6 +245,12 @@ void fuse_release_common(struct file *file, int opcode) req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); + if (ff->flock) { + struct fuse_release_in *inarg = &req->misc.release.in; + inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + inarg->lock_owner = fuse_lock_owner_id(ff->fc, + (fl_owner_t) file); + } /* Hold vfsmount and dentry until release is finished */ path_get(&file->f_path); req->misc.release.path = file->f_path; @@ -1547,11 +1553,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (fc->no_lock) { + if (fc->no_flock) { err = flock_lock_file_wait(file, fl); } else { + struct fuse_file *ff = file->private_data; + /* emulate flock with POSIX locks */ fl->fl_owner = (fl_owner_t) file; + ff->flock = true; err = fuse_setlk(file, fl, 1); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b788becada76..eb8c6135fbbf 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -135,6 +135,9 @@ struct fuse_file { /** Wait queue head for poll */ wait_queue_head_t poll_wait; + + /** Has flock been performed on this file? */ + bool flock:1; }; /** One input argument of a request */ @@ -448,7 +451,7 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr:1; - /** Are file locking primitives not implemented by fs? */ + /** Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; /** Is access not implemented by fs? */ @@ -472,6 +475,9 @@ struct fuse_conn { /** Don't apply umask to creation modes */ unsigned dont_mask:1; + /** Are BSD file locking primitives not implemented by fs? */ + unsigned no_flock:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5354906e797c..f541d639844b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_read = 1; if (!(arg->flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; + if (arg->minor >= 17) { + if (!(arg->flags & FUSE_FLOCK_LOCKS)) + fc->no_flock = 1; + } if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; if (arg->minor >= 9) { @@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; + fc->no_flock = 1; } fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); @@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | + FUSE_FLOCK_LOCKS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d464de53db43..464cff526860 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -47,6 +47,9 @@ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK */ #ifndef _LINUX_FUSE_H @@ -78,7 +81,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 16 +#define FUSE_KERNEL_MINOR_VERSION 17 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -153,8 +156,10 @@ struct fuse_file_lock { /** * INIT request/reply flags * + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -163,6 +168,7 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) +#define FUSE_FLOCK_LOCKS (1 << 10) /** * CUSE INIT request/reply flags @@ -175,6 +181,7 @@ struct fuse_file_lock { * Release flags */ #define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) /** * Getattr flags -- cgit From 89272b8c0d427021bed70b1b83e1a16be375ccf5 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 5 Aug 2011 16:50:30 -0600 Subject: dt: add empty of_get_property for non-dt The patch adds empty function of_get_property for non-dt build, so that drivers migrating to dt can save some '#ifdef CONFIG_OF'. This also fixes the current Tegra compile problem in linux-next. Signed-off-by: Stephen Warren Signed-off-by: Grant Likely --- include/linux/of.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 0085bb01c041..9180dc5cb00b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -256,6 +256,13 @@ static inline int of_property_read_string(struct device_node *np, return -ENOSYS; } +static inline const void *of_get_property(const struct device_node *node, + const char *name, + int *lenp) +{ + return NULL; +} + #endif /* CONFIG_OF */ static inline int of_property_read_u32(const struct device_node *np, -- cgit From 8e4bf84474960e832b56293c9b0674c88b5b05ce Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 11 Aug 2011 10:36:03 +0200 Subject: Move some REQ flags to the common bio/request area REQ_SECURE, REQ_FLUSH and REQ_FUA may all be set on a bio as well as on a request, so relocate them to the shared part of the enum. Signed-off-by: Matthew Wilcox Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6395692b2e7a..32f0076e844b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -125,7 +125,11 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_DISCARD, /* request to discard sectors */ + __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_FUA, /* forced unit access */ + __REQ_FLUSH, /* request for cache flush */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -135,7 +139,6 @@ enum rq_flag_bits { /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_FUA, /* forced unit access */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ @@ -146,11 +149,9 @@ enum rq_flag_bits { __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_FLUSH, /* request for cache flush */ __REQ_FLUSH_SEQ, /* request for flush sequence */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ __REQ_NR_BITS, /* stops here */ }; -- cgit From c09c47caedc9854d59378d6e34c989e51cfdd2b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Aug 2011 10:36:05 +0200 Subject: blktrace: add FLUSH/FUA support Add FLUSH/FUA support to blktrace. As FLUSH precedes WRITE and/or FUA follows WRITE, use the same 'F' flag for both cases and distinguish them by their (relative) position. The end results look like (other flags might be shown also): - WRITE: W - WRITE_FLUSH: FW - WRITE_FUA: WF - WRITE_FLUSH_FUA: FWF Note that we reuse TC_BARRIER due to lack of bit space of act_mask so that the older versions of blktrace tools will report flush requests as barriers from now on. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 5 +++-- include/trace/events/block.h | 20 +++++++++++--------- kernel/trace/blktrace.c | 21 ++++++++++++++++----- 3 files changed, 30 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8c7c2de7631a..8e9e4bc6d73b 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -14,7 +14,7 @@ enum blktrace_cat { BLK_TC_READ = 1 << 0, /* reads */ BLK_TC_WRITE = 1 << 1, /* writes */ - BLK_TC_BARRIER = 1 << 2, /* barrier */ + BLK_TC_FLUSH = 1 << 2, /* flush */ BLK_TC_SYNC = 1 << 3, /* sync IO */ BLK_TC_SYNCIO = BLK_TC_SYNC, BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ @@ -28,8 +28,9 @@ enum blktrace_cat { BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ + BLK_TC_FUA = 1 << 15, /* fua requests */ - BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ + BLK_TC_END = 1 << 15, /* we've run out of bits! */ }; #define BLK_TC_SHIFT (16) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bf366547da25..05c5e61f0a7c 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,6 +8,8 @@ #include #include +#define RWBS_LEN 8 + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int, errors ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete, __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -456,7 +458,7 @@ TRACE_EVENT(block_split, __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298dfa..7c910a5593a6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) -- cgit From 72fa59970f8698023045ab0713d66f3f4f96945c Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Mon, 8 Aug 2011 19:02:04 +0400 Subject: move RLIMIT_NPROC check from set_user() to do_execve_common() The patch http://lkml.org/lkml/2003/7/13/226 introduced an RLIMIT_NPROC check in set_user() to check for NPROC exceeding via setuid() and similar functions. Before the check there was a possibility to greatly exceed the allowed number of processes by an unprivileged user if the program relied on rlimit only. But the check created new security threat: many poorly written programs simply don't check setuid() return code and believe it cannot fail if executed with root privileges. So, the check is removed in this patch because of too often privilege escalations related to buggy programs. The NPROC can still be enforced in the common code flow of daemons spawning user processes. Most of daemons do fork()+setuid()+execve(). The check introduced in execve() (1) enforces the same limit as in setuid() and (2) doesn't create similar security issues. Neil Brown suggested to track what specific process has exceeded the limit by setting PF_NPROC_EXCEEDED process flag. With the change only this process would fail on execve(), and other processes' execve() behaviour is not changed. Solar Designer suggested to re-check whether NPROC limit is still exceeded at the moment of execve(). If the process was sleeping for days between set*uid() and execve(), and the NPROC counter step down under the limit, the defered execve() failure because NPROC limit was exceeded days ago would be unexpected. If the limit is not exceeded anymore, we clear the flag on successful calls to execve() and fork(). The flag is also cleared on successful calls to set_user() as the limit was exceeded for the previous user, not the current one. Similar check was introduced in -ow patches (without the process flag). v3 - clear PF_NPROC_EXCEEDED on successful calls to set_user(). Reviewed-by: James Morris Signed-off-by: Vasiliy Kulikov Acked-by: NeilBrown Signed-off-by: Linus Torvalds --- fs/exec.c | 17 +++++++++++++++++ include/linux/sched.h | 1 + kernel/cred.c | 6 ++---- kernel/fork.c | 1 + kernel/sys.c | 15 +++++++++++---- 5 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index da80612a35f4..25dcbe5fc356 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1459,6 +1459,23 @@ static int do_execve_common(const char *filename, struct files_struct *displaced; bool clear_in_exec; int retval; + const struct cred *cred = current_cred(); + + /* + * We move the actual failure in case of RLIMIT_NPROC excess from + * set*uid() to execve() because too many poorly written programs + * don't check setuid() return code. Here we additionally recheck + * whether NPROC limit is still exceeded. + */ + if ((current->flags & PF_NPROC_EXCEEDED) && + atomic_read(&cred->user->processes) > rlimit(RLIMIT_NPROC)) { + retval = -EAGAIN; + goto out_ret; + } + + /* We're below the limit (still or again), so we don't want to make + * further execve() calls fail. */ + current->flags &= ~PF_NPROC_EXCEEDED; retval = unshare_files(&displaced); if (retval) diff --git a/include/linux/sched.h b/include/linux/sched.h index 20b03bf94748..4ac2c0578e0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1767,6 +1767,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ #define PF_FREEZING 0x00004000 /* freeze in progress. do not account to load */ #define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ diff --git a/kernel/cred.c b/kernel/cred.c index 174fa84eca30..8ef31f53c44c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -508,10 +508,8 @@ int commit_creds(struct cred *new) key_fsgid_changed(task); /* do it - * - What if a process setreuid()'s and this brings the - * new uid over his NPROC rlimit? We can check this now - * cheaply with the new uid cache, so if it matters - * we should be checking for it. -DaveM + * RLIMIT_NPROC limits on user->processes have already been checked + * in set_user(). */ alter_cred_subscribers(new, 2); if (new->user != old->user) diff --git a/kernel/fork.c b/kernel/fork.c index e7ceaca89609..8e6b6f4fb272 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1111,6 +1111,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_cred->user != INIT_USER) goto bad_fork_free; } + current->flags &= ~PF_NPROC_EXCEEDED; retval = copy_creds(p, clone_flags); if (retval < 0) diff --git a/kernel/sys.c b/kernel/sys.c index a101ba36c444..dd948a1fca4c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -621,11 +621,18 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + /* + * We don't fail in case of NPROC limit excess here because too many + * poorly written programs don't check set*uid() return code, assuming + * it never fails if called by root. We may still enforce NPROC limit + * for programs doing set*uid()+execve() by harmlessly deferring the + * failure to the execve() stage. + */ if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && - new_user != INIT_USER) { - free_uid(new_user); - return -EAGAIN; - } + new_user != INIT_USER) + current->flags |= PF_NPROC_EXCEEDED; + else + current->flags &= ~PF_NPROC_EXCEEDED; free_uid(new->user); new->user = new_user; -- cgit From 7fd781e8f9b72544a1c7f04456eb33d5ffaed592 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 8 Aug 2011 18:10:52 +0900 Subject: mmc: remove unused "ddr" parameter in struct mmc_ios "mmc: dw_mmc: Fix DDR mode support" removed the last user. Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0f83858147a6..1d09562ccf73 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -56,8 +56,6 @@ struct mmc_ios { #define MMC_TIMING_UHS_SDR104 4 #define MMC_TIMING_UHS_DDR50 5 - unsigned char ddr; /* dual data rate used */ - #define MMC_SDR_MODE 0 #define MMC_1_2V_DDR_MODE 1 #define MMC_1_8V_DDR_MODE 2 -- cgit From 17f2ae7f677f023997e02fd2ebabd90ea2a0390d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2011 13:34:31 +0200 Subject: PM / Domains: Fix build for CONFIG_PM_RUNTIME unset Function genpd_queue_power_off_work() is not defined for CONFIG_PM_RUNTIME, so pm_genpd_poweroff_unused() causes a build error to happen in that case. Fix the problem by making pm_genpd_poweroff_unused() depend on CONFIG_PM_RUNTIME too. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 30 +++++++++++++++--------------- include/linux/pm_domain.h | 10 +++++++--- kernel/power/Kconfig | 4 ++++ 3 files changed, 26 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e18566a0fedd..1c374579407c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev) return 0; } +/** + * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. + */ +void pm_genpd_poweroff_unused(void) +{ + struct generic_pm_domain *genpd; + + mutex_lock(&gpd_list_lock); + + list_for_each_entry(genpd, &gpd_list, gpd_list_node) + genpd_queue_power_off_work(genpd); + + mutex_unlock(&gpd_list_lock); +} + #else static inline void genpd_power_off_work_fn(struct work_struct *work) {} @@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd, list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); } - -/** - * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. - */ -void pm_genpd_poweroff_unused(void) -{ - struct generic_pm_domain *genpd; - - mutex_lock(&gpd_list_lock); - - list_for_each_entry(genpd, &gpd_list, gpd_list_node) - genpd_queue_power_off_work(genpd); - - mutex_unlock(&gpd_list_lock); -} diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 21097cb086fe..f9ec1736a116 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); -extern void pm_genpd_poweroff_unused(void); -extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); #else static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) @@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline void pm_genpd_poweroff_unused(void) {} +#endif + +#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME +extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); +extern void pm_genpd_poweroff_unused(void); +#else static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} +static inline void pm_genpd_poweroff_unused(void) {} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095c..3744c594b19b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS -- cgit From 4853abaae7e4a2af938115ce9071ef8684fb7af4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 15 Aug 2011 21:37:25 +0200 Subject: block: fix flush machinery for stacking drivers with differring flush flags Commit ae1b1539622fb46e51b4d13b3f9e5f4c713f86ae, block: reimplement FLUSH/FUA to support merge, introduced a performance regression when running any sort of fsyncing workload using dm-multipath and certain storage (in our case, an HP EVA). The test I ran was fs_mark, and it dropped from ~800 files/sec on ext4 to ~100 files/sec. It turns out that dm-multipath always advertised flush+fua support, and passed commands on down the stack, where those flags used to get stripped off. The above commit changed that behavior: static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; while (1) { - while (!list_empty(&q->queue_head)) { + if (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || - (rq->cmd_flags & REQ_FLUSH_SEQ)) - return rq; - rq = blk_do_flush(q, rq); - if (rq) - return rq; + return rq; } Note that previously, a command would come in here, have REQ_FLUSH|REQ_FUA set, and then get handed off to blk_do_flush: struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned int fflags = q->flush_flags; /* may change, cache it */ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; ... if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { rq->cmd_flags &= ~REQ_FLUSH; if (!has_fua) rq->cmd_flags &= ~REQ_FUA; return rq; } So, the flush machinery was bypassed in such cases (q->flush_flags == 0 && rq->cmd_flags & (REQ_FLUSH|REQ_FUA)). Now, however, we don't get into the flush machinery at all. Instead, __elv_next_request just hands a request with flush and fua bits set to the scsi_request_fn, even if the underlying request_queue does not support flush or fua. The agreed upon approach is to fix the flush machinery to allow stacking. While this isn't used in practice (since there is only one request-based dm target, and that target will now reflect the flush flags of the underlying device), it does future-proof the solution, and make it function as designed. In order to make this work, I had to add a field to the struct request, inside the flush structure (to store the original req->end_io). Shaohua had suggested overloading the union with rb_node and completion_data, but the completion data is used by device mapper and can also be used by other drivers. So, I didn't see a way around the additional field. I tested this patch on an HP EVA with both ext4 and xfs, and it recovers the lost performance. Comments and other testers, as always, are appreciated. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++++-- block/blk-flush.c | 20 ++++++++++++++++---- block/blk.h | 2 ++ include/linux/blkdev.h | 1 + 4 files changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/block/blk-core.c b/block/blk-core.c index b850bedad229..7c59b0f5eae8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1700,6 +1700,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; @@ -1716,7 +1717,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2273,7 +2277,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) diff --git a/block/blk-flush.c b/block/blk-flush.c index 2d162bd840d3..491eb30a242d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -123,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; - rq->end_io = NULL; + rq->end_io = rq->flush.saved_end_io; } /** @@ -301,9 +301,6 @@ void blk_insert_flush(struct request *rq) unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); - BUG_ON(rq->end_io); - BUG_ON(!rq->bio || rq->bio != rq->biotail); - /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -312,6 +309,19 @@ void blk_insert_flush(struct request *rq) if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; + /* + * An empty flush handed down from a stacking driver may + * translate into nothing if the underlying device does not + * advertise a write-back cache. In this case, simply + * complete the request. + */ + if (!policy) { + __blk_end_bidi_request(rq, 0, 0, 0); + return; + } + + BUG_ON(!rq->bio || rq->bio != rq->biotail); + /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue @@ -320,6 +330,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); + blk_run_queue_async(q); return; } @@ -330,6 +341,7 @@ void blk_insert_flush(struct request *rq) memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = flush_data_end_io; blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); diff --git a/block/blk.h b/block/blk.h index d6586287adc9..20b900a377c9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 847928546076..84b15d54f8c2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ struct request { struct { unsigned int seq; struct list_head list; + rq_end_io_fn *saved_end_io; } flush; }; -- cgit From f991879473828f320a714e9494fb37a26ccd6b66 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Aug 2011 13:45:09 +0100 Subject: mm: make HASHED_PAGE_VIRTUAL page_address' struct page argument const. Followup to 33dd4e0ec911 "mm: make some struct page's const" which missed the HASHED_PAGE_VIRTUAL case. Signed-off-by: Ian Campbell Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hash.h | 2 +- include/linux/mm.h | 2 +- mm/highmem.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hash.h b/include/linux/hash.h index 06d25c189cc5..b80506bdd733 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits) return hash >> (32 - bits); } -static inline unsigned long hash_ptr(void *ptr, unsigned int bits) +static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } diff --git a/include/linux/mm.h b/include/linux/mm.h index fd599f4bb846..c06454d60a3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page) #endif #if defined(HASHED_PAGE_VIRTUAL) -void *page_address(struct page *page); +void *page_address(const struct page *page); void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif diff --git a/mm/highmem.c b/mm/highmem.c index 693394daa2ed..5ef672c07f75 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -326,7 +326,7 @@ static struct page_address_slot { spinlock_t lock; /* Protect this bucket's list */ } ____cacheline_aligned_in_smp page_address_htable[1< Date: Wed, 17 Aug 2011 17:40:33 +0100 Subject: mm: fix __page_to_pfn for a const struct page argument This allows the cast in lowmem_page_address (introduced as a warning fixup to 33dd4e0ec911 "mm: make some struct page's const") to be removed. Propagate const'ness to page_to_section() as well since it is required by __page_to_pfn. Signed-off-by: Ian Campbell Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/memory_model.h | 4 ++-- include/linux/mm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index fb2d63f13f4c..aea9e45efce6 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -39,7 +39,7 @@ }) #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ (unsigned long)(__pg - __pgdat->node_mem_map) + \ __pgdat->node_start_pfn; \ @@ -57,7 +57,7 @@ * section[i].section_mem_map == mem_map's address - start_pfn; */ #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ int __sec = page_to_section(__pg); \ (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ }) diff --git a/include/linux/mm.h b/include/linux/mm.h index c06454d60a3d..7438071b44aa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline unsigned long page_to_section(struct page *page) +static inline unsigned long page_to_section(const struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } @@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn((struct page *)page))); + return __va(PFN_PHYS(page_to_pfn(page))); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -- cgit From bb0822954aab7d23a3f902c2a103ee0242f6046e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 16 Aug 2011 13:37:14 -0600 Subject: squeeze max-pause area and drop pass-good area Revert the pass-good area introduced in ffd1f609ab10 ("writeback: introduce max-pause and pass-good dirty limits") and make the max-pause area smaller and safe. This fixes ~30% performance regression in the ext3 data=writeback fio_mmap_randwrite_64k/fio_mmap_randrw_64k test cases, where there are 12 JBOD disks, on each disk runs 8 concurrent tasks doing reads+writes. Using deadline scheduler also has a regression, but not that big as CFQ, so this suggests we have some write starvation. The test logs show that - the disks are sometimes under utilized - global dirty pages sometimes rush high to the pass-good area for several hundred seconds, while in the mean time some bdi dirty pages drop to very low value (bdi_dirty << bdi_thresh). Then suddenly the global dirty pages dropped under global dirty threshold and bdi_dirty rush very high (for example, 2 times higher than bdi_thresh). During which time balance_dirty_pages() is not called at all. So the problems are 1) The random writes progress so slow that they break the assumption of the max-pause logic that "8 pages per 200ms is typically more than enough to curb heavy dirtiers". 2) The max-pause logic ignored task_bdi_thresh and thus opens the possibility for some bdi's to over dirty pages, leading to (bdi_dirty >> bdi_thresh) and then (bdi_thresh >> bdi_dirty) for others. 3) The higher max-pause/pass-good thresholds somehow leads to the bad swing of dirty pages. The fix is to allow the task to slightly dirty over task_bdi_thresh, but no way to exceed bdi_dirty and/or global dirty_thresh. Tests show that it fixed the JBOD regression completely (both behavior and performance), while still being able to cut down large pause times in balance_dirty_pages() for single-disk cases. Reported-by: Li Shaohua Tested-by: Li Shaohua Acked-by: Jan Kara Signed-off-by: Wu Fengguang --- include/linux/writeback.h | 11 ----------- mm/page-writeback.c | 15 ++------------- 2 files changed, 2 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f1bfa12ea246..2b8963ff0f35 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -12,15 +12,6 @@ * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * - * The 1/16 region above the global dirty limit will be put to maximum pauses: - * - * (limit, limit + limit/DIRTY_MAXPAUSE_AREA) - * - * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put - * to loops: - * - * (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA) - * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * @@ -31,8 +22,6 @@ */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) -#define DIRTY_MAXPAUSE_AREA 16 -#define DIRTY_PASSGOOD_AREA 8 /* * 4MB minimal write chunk size diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d1960744f881..0e309cd1b5b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping, * 200ms is typically more than enough to curb heavy dirtiers; * (b) the pause time limit makes the dirtiers more responsive. */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_MAXPAUSE_AREA && + if (nr_dirty < dirty_thresh && + bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 && time_after(jiffies, start_time + MAX_PAUSE)) break; - /* - * pass-good area. When some bdi gets blocked (eg. NFS server - * not responding), or write bandwidth dropped dramatically due - * to concurrent reads, or dirty threshold suddenly dropped and - * the dirty pages cannot be brought down anytime soon (eg. on - * slow USB stick), at least let go of the good bdi's. - */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_PASSGOOD_AREA && - bdi_dirty < bdi_thresh) - break; /* * Increase the delay for each loop, up to our previous -- cgit From 0d7c5f2572ccfa7bf83292b1506926663f2d164a Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:31 -0500 Subject: drivers:misc:ti-st: platform hooks for chip states Certain platform specific or Host-WiLink Interface specific actions would be required to be taken when the chip is being enabled and after the chip is disabled such as configuration of the mux modes for the GPIO of host connected to the nshutdown of the chip or relinquishing UART after the chip is disabled. Similar actions can also be taken when the chip is in deep sleep or when the chip is awake. Performance enhancements such as configuring the host to run faster when chip is awake and slower when chip is asleep can also be made here. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 12 ++++++++++++ drivers/misc/ti-st/st_ll.c | 19 +++++++++++++++++++ include/linux/ti_wilink_st.h | 27 ++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 38fd2f04c07e..6884dd1c997b 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -434,11 +434,17 @@ long st_kim_start(void *kim_data) { long err = 0; long retry = POR_RETRY_COUNT; + struct ti_st_plat_data *pdata; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; pr_info(" %s", __func__); + pdata = kim_gdata->kim_pdev->dev.platform_data; do { + /* platform specific enabling code here */ + if (pdata->chip_enable) + pdata->chip_enable(kim_gdata); + /* Configure BT nShutdown to HIGH state */ gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); mdelay(5); /* FIXME: a proper toggle */ @@ -489,6 +495,8 @@ long st_kim_stop(void *kim_data) { long err = 0; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; INIT_COMPLETION(kim_gdata->ldisc_installed); @@ -515,6 +523,10 @@ long st_kim_stop(void *kim_data) gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); mdelay(1); gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + + /* platform specific disable */ + if (pdata->chip_disable) + pdata->chip_disable(kim_gdata); return err; } diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c index 3f2495138855..1ff460a8e9c7 100644 --- a/drivers/misc/ti-st/st_ll.c +++ b/drivers/misc/ti-st/st_ll.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "(stll) :" fmt #include #include +#include #include /**********************************************************************/ @@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data, static void ll_device_want_to_sleep(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + pr_debug("%s", __func__); /* sanity check */ if (st_data->ll_state != ST_LL_AWAKE) @@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data) send_ll_cmd(st_data, LL_SLEEP_ACK); /* update state */ st_data->ll_state = ST_LL_ASLEEP; + + /* communicate to platform about chip asleep */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_asleep(NULL); } static void ll_device_want_to_wakeup(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + /* diff actions in diff states */ switch (st_data->ll_state) { case ST_LL_ASLEEP: @@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data) } /* update state */ st_data->ll_state = ST_LL_AWAKE; + + /* communicate to platform about chip wakeup */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_awake(NULL); } /**********************************************************************/ diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index b004e557caa9..2ef4385da6bf 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -410,7 +410,28 @@ struct gps_event_hdr { u16 plen; } __attribute__ ((packed)); -/* platform data */ +/** + * struct ti_st_plat_data - platform data shared between ST driver and + * platform specific board file which adds the ST device. + * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected. + * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1) + * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM + * purposes. + * @baud_rate: The baud rate supported by the Host UART controller, this will + * be shared across with the chip via a HCI VS command from User-Space Init + * Mgr application. + * @suspend: + * @resume: legacy PM routines hooked to platform specific board file, so as + * to take chip-host interface specific action. + * @chip_enable: + * @chip_disable: Platform/Interface specific mux mode setting, GPIO + * configuring, Host side PM disabling etc.. can be done here. + * @chip_asleep: + * @chip_awake: Chip specific deep sleep states is communicated to Host + * specific board-xx.c to take actions such as cut UART clocks when chip + * asleep or run host faster when chip awake etc.. + * + */ struct ti_st_plat_data { long nshutdown_gpio; unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */ @@ -418,6 +439,10 @@ struct ti_st_plat_data { unsigned long baud_rate; int (*suspend)(struct platform_device *, pm_message_t); int (*resume)(struct platform_device *); + int (*chip_enable) (struct kim_data_s *); + int (*chip_disable) (struct kim_data_s *); + int (*chip_asleep) (struct kim_data_s *); + int (*chip_awake) (struct kim_data_s *); }; #endif /* TI_WILINK_ST_H */ -- cgit From 24d406a6bf736f7aebdc8fa0f0ec86e0890c6d24 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Aug 2011 14:59:28 +0200 Subject: TTY: pty, fix pty counting tty_operations->remove is normally called like: queue_release_one_tty ->tty_shutdown ->tty_driver_remove_tty ->tty_operations->remove However tty_shutdown() is called from queue_release_one_tty() only if tty_operations->shutdown is NULL. But for pty, it is not. pty_unix98_shutdown() is used there as ->shutdown. So tty_operations->remove of pty (i.e. pty_unix98_remove()) is never called. This results in invalid pty_count. I.e. what can be seen in /proc/sys/kernel/pty/nr. I see this was already reported at: https://lkml.org/lkml/2009/11/5/370 But it was not fixed since then. This patch is kind of a hackish way. The problem lies in ->install. We allocate there another tty (so-called tty->link). So ->install is called once, but ->remove twice, for both tty and tty->link. The fix here is to count both tty and tty->link and divide the count by 2 for user. And to have ->remove called, let's make tty_driver_remove_tty() global and call that from pty_unix98_shutdown() (tty_operations->shutdown). While at it, let's document that when ->shutdown is defined, tty_shutdown() is not called. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: "H. Peter Anvin" Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 17 +++++++++++++++-- drivers/tty/tty_io.c | 3 +-- include/linux/tty.h | 2 ++ include/linux/tty_driver.h | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 98b6e3bdb000..e809e9d4683c 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { } int pty_limit = NR_UNIX98_PTY_DEFAULT; static int pty_limit_min; static int pty_limit_max = NR_UNIX98_PTY_MAX; +static int tty_count; static int pty_count; +static inline void pty_inc_count(void) +{ + pty_count = (++tty_count) / 2; +} + +static inline void pty_dec_count(void) +{ + pty_count = (--tty_count) / 2; +} + static struct cdev ptmx_cdev; static struct ctl_table pty_table[] = { @@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, static void pty_unix98_shutdown(struct tty_struct *tty) { + tty_driver_remove_tty(tty->driver, tty); /* We have our own method as we don't use the tty index */ kfree(tty->termios); } @@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) */ tty_driver_kref_get(driver); tty->count++; - pty_count++; + pty_inc_count(); /* tty */ + pty_inc_count(); /* tty->link */ return 0; err_free_mem: deinitialize_tty_struct(o_tty); @@ -602,7 +615,7 @@ err_free_tty: static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { - pty_count--; + pty_dec_count(); } static const struct tty_operations ptm_unix98_ops = { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 150e4f747c7d..4f1fc81112e6 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver, * * Locking: tty_mutex for now */ -static void tty_driver_remove_tty(struct tty_driver *driver, - struct tty_struct *tty) +void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty) { if (driver->ops->remove) driver->ops->remove(driver, tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 44bc0c5617e1..5f2ede82b3d6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); +extern void tty_driver_remove_tty(struct tty_driver *driver, + struct tty_struct *tty); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 9deeac855240..ecdaeb98b293 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -47,6 +47,9 @@ * * This routine is called synchronously when a particular tty device * is closed for the last time freeing up the resources. + * Note that tty_shutdown() is not called if ops->shutdown is defined. + * This means one is responsible to take care of calling ops->remove (e.g. + * via tty_driver_remove_tty) and releasing tty->termios. * * * void (*cleanup)(struct tty_struct * tty); -- cgit From be27425dcc516fd08245b047ea57f83b8f6f0903 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 19 Aug 2011 16:15:10 -0700 Subject: Add a personality to report 2.6.x version numbers I ran into a couple of programs which broke with the new Linux 3.0 version. Some of those were binary only. I tried to use LD_PRELOAD to work around it, but it was quite difficult and in one case impossible because of a mix of 32bit and 64bit executables. For example, all kind of management software from HP doesnt work, unless we pretend to run a 2.6 kernel. $ uname -a Linux svivoipvnx001 3.0.0-08107-g97cd98f #1062 SMP Fri Aug 12 18:11:45 CEST 2011 i686 i686 i386 GNU/Linux $ hpacucli ctrl all show Error: No controllers detected. $ rpm -qf /usr/sbin/hpacucli hpacucli-8.75-12.0 Another notable case is that Python now reports "linux3" from sys.platform(); which in turn can break things that were checking sys.platform() == "linux2": https://bugzilla.mozilla.org/show_bug.cgi?id=664564 It seems pretty clear to me though it's a bug in the apps that are using '==' instead of .startswith(), but this allows us to unbreak broken programs. This patch adds a UNAME26 personality that makes the kernel report a 2.6.40+x version number instead. The x is the x in 3.x. I know this is somewhat ugly, but I didn't find a better workaround, and compatibility to existing programs is important. Some programs also read /proc/sys/kernel/osrelease. This can be worked around in user space with mount --bind (and a mount namespace) To use: wget ftp://ftp.kernel.org/pub/linux/kernel/people/ak/uname26/uname26.c gcc -o uname26 uname26.c ./uname26 program Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/personality.h | 1 + kernel/sys.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/personality.h b/include/linux/personality.h index eec3bae164d4..8fc7dd1a57ff 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -22,6 +22,7 @@ extern int __set_personality(unsigned int); * These occupy the top three bytes. */ enum { + UNAME26 = 0x0020000, ADDR_NO_RANDOMIZE = 0x0040000, /* disable randomization of VA space */ FDPIC_FUNCPTRS = 0x0080000, /* userspace function ptrs point to descriptors * (signal handling) diff --git a/kernel/sys.c b/kernel/sys.c index dd948a1fca4c..18ee1d2f6474 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -44,6 +46,8 @@ #include #include +/* Move somewhere else to avoid recompiling? */ +#include #include #include @@ -1161,6 +1165,34 @@ DECLARE_RWSEM(uts_sem); #define override_architecture(name) 0 #endif +/* + * Work around broken programs that cannot handle "Linux 3.0". + * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 + */ +static int override_release(char __user *release, int len) +{ + int ret = 0; + char buf[len]; + + if (current->personality & UNAME26) { + char *rest = UTS_RELEASE; + int ndots = 0; + unsigned v; + + while (*rest) { + if (*rest == '.' && ++ndots >= 3) + break; + if (!isdigit(*rest) && *rest != '.') + break; + rest++; + } + v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40; + snprintf(buf, len, "2.6.%u%s", v, rest); + ret = copy_to_user(release, buf, len); + } + return ret; +} + SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1170,6 +1202,8 @@ SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) errno = -EFAULT; up_read(&uts_sem); + if (!errno && override_release(name->release, sizeof(name->release))) + errno = -EFAULT; if (!errno && override_architecture(name)) errno = -EFAULT; return errno; @@ -1191,6 +1225,8 @@ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) error = -EFAULT; up_read(&uts_sem); + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; if (!error && override_architecture(name)) error = -EFAULT; return error; @@ -1225,6 +1261,8 @@ SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) if (!error && override_architecture(name)) error = -EFAULT; + if (!error && override_release(name->release, sizeof(name->release))) + error = -EFAULT; return error ? -EFAULT : 0; } #endif -- cgit From e096d0c7e2e4e5893792db865dd065ac73cf1f00 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 25 Aug 2011 07:48:12 -0400 Subject: lockdep: Add helper function for dir vs file i_mutex annotation Purely in-memory filesystems do not use the inode hash as the dcache tells us if an entry already exists. As a result, they do not call unlock_new_inode, and thus directory inodes do not get put into a different lockdep class for i_sem. We need the different lockdep classes, because the locking order for i_mutex is different for directory inodes and regular inodes. Directory inodes can do "readdir()", which takes i_mutex *before* possibly taking mm->mmap_sem (due to a page fault while copying the directory entry to user space). In contrast, regular inodes can be mmap'ed, which takes mm->mmap_sem before accessing i_mutex. The two cases can never happen for the same inode, so no real deadlock can occur, but without the different lockdep classes, lockdep cannot understand that. As a result, if CONFIG_DEBUG_LOCK_ALLOC is set, this can lead to false positives from lockdep like below: find/645 is trying to acquire lock: (&mm->mmap_sem){++++++}, at: [] might_fault+0x5c/0xac but task is already holding lock: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] vfs_readdir+0x5b/0xb4 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&sb->s_type->i_mutex_key#15){+.+.+.}: [] lock_acquire+0xbf/0x103 [] __mutex_lock_common+0x4c/0x361 [] mutex_lock_nested+0x40/0x45 [] hugetlbfs_file_mmap+0x82/0x110 [] mmap_region+0x258/0x432 [] do_mmap_pgoff+0x2ac/0x306 [] sys_mmap_pgoff+0x118/0x16a [] sys_mmap+0x22/0x24 [] system_call_fastpath+0x16/0x1b -> #0 (&mm->mmap_sem){++++++}: [] __lock_acquire+0xa1a/0xcf7 [] lock_acquire+0xbf/0x103 [] might_fault+0x89/0xac [] filldir+0x6f/0xc7 [] dcache_readdir+0x67/0x205 [] vfs_readdir+0x7b/0xb4 [] sys_getdents+0x7e/0xd1 [] system_call_fastpath+0x16/0x1b This patch moves the directory vs file lockdep annotation into a helper function that can be called by in-memory filesystems and has hugetlbfs call it. Signed-off-by: Josh Boyer Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 1 + fs/inode.c | 24 +++++++++++++++--------- include/linux/fs.h | 5 +++++ 3 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 87b6e0421c12..ec889538e5a6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -491,6 +491,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_op = &page_symlink_inode_operations; break; } + lockdep_annotate_inode_mutex_key(inode); } return inode; } diff --git a/fs/inode.c b/fs/inode.c index 73920d555c88..ec7924696a13 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -848,16 +848,9 @@ struct inode *new_inode(struct super_block *sb) } EXPORT_SYMBOL(new_inode); -/** - * unlock_new_inode - clear the I_NEW state and wake up any waiters - * @inode: new inode to unlock - * - * Called when the inode is fully initialised to clear the new state of the - * inode and wake up anyone waiting for the inode to finish initialisation. - */ -void unlock_new_inode(struct inode *inode) -{ #ifdef CONFIG_DEBUG_LOCK_ALLOC +void lockdep_annotate_inode_mutex_key(struct inode *inode) +{ if (S_ISDIR(inode->i_mode)) { struct file_system_type *type = inode->i_sb->s_type; @@ -873,7 +866,20 @@ void unlock_new_inode(struct inode *inode) &type->i_mutex_dir_key); } } +} +EXPORT_SYMBOL(lockdep_annotate_inode_mutex_key); #endif + +/** + * unlock_new_inode - clear the I_NEW state and wake up any waiters + * @inode: new inode to unlock + * + * Called when the inode is fully initialised to clear the new state of the + * inode and wake up anyone waiting for the inode to finish initialisation. + */ +void unlock_new_inode(struct inode *inode) +{ + lockdep_annotate_inode_mutex_key(inode); spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; diff --git a/include/linux/fs.h b/include/linux/fs.h index 178cdb4f1d4a..c2bd68f2277a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2318,6 +2318,11 @@ extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*te extern struct inode * iget_locked(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern void lockdep_annotate_inode_mutex_key(struct inode *inode); +#else +static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; +#endif extern void unlock_new_inode(struct inode *); extern unsigned int get_next_ino(void); -- cgit From a801876638c5ce650223476c4eb8f37cea32dc1c Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Thu, 25 Aug 2011 15:59:06 -0700 Subject: MAINTAINERS: Evgeniy has moved Signed-off-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- drivers/w1/masters/ds2490.c | 4 ++-- drivers/w1/masters/matrox_w1.c | 4 ++-- drivers/w1/slaves/w1_smem.c | 4 ++-- drivers/w1/slaves/w1_therm.c | 4 ++-- drivers/w1/w1.c | 4 ++-- drivers/w1/w1.h | 2 +- drivers/w1/w1_family.c | 2 +- drivers/w1/w1_family.h | 2 +- drivers/w1/w1_int.c | 2 +- drivers/w1/w1_int.h | 2 +- drivers/w1/w1_io.c | 2 +- drivers/w1/w1_log.h | 2 +- drivers/w1/w1_netlink.c | 2 +- drivers/w1/w1_netlink.h | 2 +- include/linux/connector.h | 2 +- 16 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index 069ee3b5c651..ee0ac2c98bef 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7087,7 +7087,7 @@ S: Supported F: drivers/mmc/host/vub300.c W1 DALLAS'S 1-WIRE BUS -M: Evgeniy Polyakov +M: Evgeniy Polyakov S: Maintained F: Documentation/w1/ F: drivers/w1/ diff --git a/drivers/w1/masters/ds2490.c b/drivers/w1/masters/ds2490.c index 02bf7bf7160b..b5abaae38e97 100644 --- a/drivers/w1/masters/ds2490.c +++ b/drivers/w1/masters/ds2490.c @@ -1,7 +1,7 @@ /* * dscore.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify @@ -1024,5 +1024,5 @@ module_init(ds_init); module_exit(ds_fini); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("DS2490 USB <-> W1 bus master driver (DS9490*)"); diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c index 334d1ccf9c92..f667c26b2195 100644 --- a/drivers/w1/masters/matrox_w1.c +++ b/drivers/w1/masters/matrox_w1.c @@ -1,7 +1,7 @@ /* * matrox_w1.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify @@ -39,7 +39,7 @@ #include "../w1_log.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("Driver for transport(Dallas 1-wire prtocol) over VGA DDC(matrox gpio)."); static struct pci_device_id matrox_w1_tbl[] = { diff --git a/drivers/w1/slaves/w1_smem.c b/drivers/w1/slaves/w1_smem.c index cc8c02e92593..84655625c870 100644 --- a/drivers/w1/slaves/w1_smem.c +++ b/drivers/w1/slaves/w1_smem.c @@ -1,7 +1,7 @@ /* * w1_smem.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify @@ -32,7 +32,7 @@ #include "../w1_family.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, 64bit memory family."); static struct w1_family w1_smem_family_01 = { diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 402928b135d1..a1ef9b5b38cf 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -1,7 +1,7 @@ /* * w1_therm.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify @@ -34,7 +34,7 @@ #include "../w1_family.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol, temperature family."); /* Allow the strong pullup to be disabled, but default to enabled. diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index 6c136c19e982..c37497823851 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -1,7 +1,7 @@ /* * w1.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify @@ -42,7 +42,7 @@ #include "w1_netlink.h" MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_AUTHOR("Evgeniy Polyakov "); MODULE_DESCRIPTION("Driver for 1-wire Dallas network protocol."); static int w1_timeout = 10; diff --git a/drivers/w1/w1.h b/drivers/w1/w1.h index 1ce23fc6186c..4d012ca3f32c 100644 --- a/drivers/w1/w1.h +++ b/drivers/w1/w1.h @@ -1,7 +1,7 @@ /* * w1.h * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c index 4a099041f28a..63359797c8b1 100644 --- a/drivers/w1/w1_family.c +++ b/drivers/w1/w1_family.c @@ -1,7 +1,7 @@ /* * w1_family.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h index 98a1ac0f4693..490cda2281bc 100644 --- a/drivers/w1/w1_family.h +++ b/drivers/w1/w1_family.h @@ -1,7 +1,7 @@ /* * w1_family.h * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b50be3f1073d..d220bce2cee4 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -1,7 +1,7 @@ /* * w1_int.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_int.h b/drivers/w1/w1_int.h index 4274082d2262..2ad7d4414bed 100644 --- a/drivers/w1/w1_int.h +++ b/drivers/w1/w1_int.h @@ -1,7 +1,7 @@ /* * w1_int.h * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_io.c b/drivers/w1/w1_io.c index 8e8b64cfafb6..765b37b62a4f 100644 --- a/drivers/w1/w1_io.c +++ b/drivers/w1/w1_io.c @@ -1,7 +1,7 @@ /* * w1_io.c * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_log.h b/drivers/w1/w1_log.h index e6ab7cf08f88..9c7bd62e6bdc 100644 --- a/drivers/w1/w1_log.h +++ b/drivers/w1/w1_log.h @@ -1,7 +1,7 @@ /* * w1_log.h * - * Copyright (c) 2004 Evgeniy Polyakov + * Copyright (c) 2004 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 55aabd927c60..40788c925d1c 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -1,7 +1,7 @@ /* * w1_netlink.c * - * Copyright (c) 2003 Evgeniy Polyakov + * Copyright (c) 2003 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/drivers/w1/w1_netlink.h b/drivers/w1/w1_netlink.h index 27e950f935b1..b0922dc29658 100644 --- a/drivers/w1/w1_netlink.h +++ b/drivers/w1/w1_netlink.h @@ -1,7 +1,7 @@ /* * w1_netlink.h * - * Copyright (c) 2003 Evgeniy Polyakov + * Copyright (c) 2003 Evgeniy Polyakov * * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/connector.h b/include/linux/connector.h index 0c69ad825b39..3c9c54fd5690 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -1,7 +1,7 @@ /* * connector.h * - * 2004-2005 Copyright (c) Evgeniy Polyakov + * 2004-2005 Copyright (c) Evgeniy Polyakov * All rights reserved. * * This program is free software; you can redistribute it and/or modify -- cgit From 284fb68d00c56e971ed01e0b4bac5ddd4d1b74ab Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 25 Aug 2011 15:59:13 -0700 Subject: rapidio: fix use of non-compatible registers Replace/remove use of RIO v.1.2 registers/bits that are not forward-compatible with newer versions of RapidIO specification. RapidIO specification v.1.3 removed Write Port CSR, Doorbell CSR, Mailbox CSR and Mailbox and Doorbell bits of the PEF CAR. Use of removed (since RIO v.1.3) register bits affects users of currently available 1.3 and 2.x compliant devices who may use not so recent kernel versions. Removing checks for unsupported bits makes corresponding routines compatible with all versions of RapidIO specification. Therefore, backporting makes stable kernel versions compliant with RIO v.1.3 and later as well. Signed-off-by: Alexandre Bounine Cc: Kumar Gala Cc: Matt Porter Cc: Li Yang Cc: Thomas Moll Cc: Chul Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/net/rionet.c | 23 ++++++++--------------- drivers/rapidio/rio-scan.c | 3 +-- include/linux/rio_regs.h | 18 +++++++++--------- 3 files changed, 18 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 86ac38c96bcf..3bb131137033 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -80,13 +80,13 @@ static int rionet_capable = 1; */ static struct rio_dev **rionet_active; -#define is_rionet_capable(pef, src_ops, dst_ops) \ - ((pef & RIO_PEF_INB_MBOX) && \ - (pef & RIO_PEF_INB_DOORBELL) && \ +#define is_rionet_capable(src_ops, dst_ops) \ + ((src_ops & RIO_SRC_OPS_DATA_MSG) && \ + (dst_ops & RIO_DST_OPS_DATA_MSG) && \ (src_ops & RIO_SRC_OPS_DOORBELL) && \ (dst_ops & RIO_DST_OPS_DOORBELL)) #define dev_rionet_capable(dev) \ - is_rionet_capable(dev->pef, dev->src_ops, dev->dst_ops) + is_rionet_capable(dev->src_ops, dev->dst_ops) #define RIONET_MAC_MATCH(x) (*(u32 *)x == 0x00010001) #define RIONET_GET_DESTID(x) (*(u16 *)(x + 4)) @@ -282,7 +282,6 @@ static int rionet_open(struct net_device *ndev) { int i, rc = 0; struct rionet_peer *peer, *tmp; - u32 pwdcsr; struct rionet_private *rnet = netdev_priv(ndev); if (netif_msg_ifup(rnet)) @@ -332,13 +331,8 @@ static int rionet_open(struct net_device *ndev) continue; } - /* - * If device has initialized inbound doorbells, - * send a join message - */ - rio_read_config_32(peer->rdev, RIO_WRITE_PORT_CSR, &pwdcsr); - if (pwdcsr & RIO_DOORBELL_AVAIL) - rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN); + /* Send a join message */ + rio_send_doorbell(peer->rdev, RIONET_DOORBELL_JOIN); } out: @@ -492,7 +486,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev) static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) { int rc = -ENODEV; - u32 lpef, lsrc_ops, ldst_ops; + u32 lsrc_ops, ldst_ops; struct rionet_peer *peer; struct net_device *ndev = NULL; @@ -515,12 +509,11 @@ static int rionet_probe(struct rio_dev *rdev, const struct rio_device_id *id) * on later probes */ if (!rionet_check) { - rio_local_read_config_32(rdev->net->hport, RIO_PEF_CAR, &lpef); rio_local_read_config_32(rdev->net->hport, RIO_SRC_OPS_CAR, &lsrc_ops); rio_local_read_config_32(rdev->net->hport, RIO_DST_OPS_CAR, &ldst_ops); - if (!is_rionet_capable(lpef, lsrc_ops, ldst_ops)) { + if (!is_rionet_capable(lsrc_ops, ldst_ops)) { printk(KERN_ERR "%s: local device is not network capable\n", DRV_NAME); diff --git a/drivers/rapidio/rio-scan.c b/drivers/rapidio/rio-scan.c index ee893581d4b7..ebe77dd87daf 100644 --- a/drivers/rapidio/rio-scan.c +++ b/drivers/rapidio/rio-scan.c @@ -505,8 +505,7 @@ static struct rio_dev __devinit *rio_setup_device(struct rio_net *net, rdev->dev.dma_mask = &rdev->dma_mask; rdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); - if ((rdev->pef & RIO_PEF_INB_DOORBELL) && - (rdev->dst_ops & RIO_DST_OPS_DOORBELL)) + if (rdev->dst_ops & RIO_DST_OPS_DOORBELL) rio_init_dbell_res(&rdev->riores[RIO_DOORBELL_RESOURCE], 0, 0xffff); diff --git a/include/linux/rio_regs.h b/include/linux/rio_regs.h index 9026b30238f3..218168a2b5e9 100644 --- a/include/linux/rio_regs.h +++ b/include/linux/rio_regs.h @@ -36,12 +36,12 @@ #define RIO_PEF_PROCESSOR 0x20000000 /* [I] Processor */ #define RIO_PEF_SWITCH 0x10000000 /* [I] Switch */ #define RIO_PEF_MULTIPORT 0x08000000 /* [VI, 2.1] Multiport */ -#define RIO_PEF_INB_MBOX 0x00f00000 /* [II] Mailboxes */ -#define RIO_PEF_INB_MBOX0 0x00800000 /* [II] Mailbox 0 */ -#define RIO_PEF_INB_MBOX1 0x00400000 /* [II] Mailbox 1 */ -#define RIO_PEF_INB_MBOX2 0x00200000 /* [II] Mailbox 2 */ -#define RIO_PEF_INB_MBOX3 0x00100000 /* [II] Mailbox 3 */ -#define RIO_PEF_INB_DOORBELL 0x00080000 /* [II] Doorbells */ +#define RIO_PEF_INB_MBOX 0x00f00000 /* [II, <= 1.2] Mailboxes */ +#define RIO_PEF_INB_MBOX0 0x00800000 /* [II, <= 1.2] Mailbox 0 */ +#define RIO_PEF_INB_MBOX1 0x00400000 /* [II, <= 1.2] Mailbox 1 */ +#define RIO_PEF_INB_MBOX2 0x00200000 /* [II, <= 1.2] Mailbox 2 */ +#define RIO_PEF_INB_MBOX3 0x00100000 /* [II, <= 1.2] Mailbox 3 */ +#define RIO_PEF_INB_DOORBELL 0x00080000 /* [II, <= 1.2] Doorbells */ #define RIO_PEF_EXT_RT 0x00000200 /* [III, 1.3] Extended route table support */ #define RIO_PEF_STD_RT 0x00000100 /* [III, 1.3] Standard route table support */ #define RIO_PEF_CTLS 0x00000010 /* [III] CTLS */ @@ -102,7 +102,7 @@ #define RIO_SWITCH_RT_LIMIT 0x34 /* [III, 1.3] Switch Route Table Destination ID Limit CAR */ #define RIO_RT_MAX_DESTID 0x0000ffff -#define RIO_MBOX_CSR 0x40 /* [II] Mailbox CSR */ +#define RIO_MBOX_CSR 0x40 /* [II, <= 1.2] Mailbox CSR */ #define RIO_MBOX0_AVAIL 0x80000000 /* [II] Mbox 0 avail */ #define RIO_MBOX0_FULL 0x40000000 /* [II] Mbox 0 full */ #define RIO_MBOX0_EMPTY 0x20000000 /* [II] Mbox 0 empty */ @@ -128,8 +128,8 @@ #define RIO_MBOX3_FAIL 0x00000008 /* [II] Mbox 3 fail */ #define RIO_MBOX3_ERROR 0x00000004 /* [II] Mbox 3 error */ -#define RIO_WRITE_PORT_CSR 0x44 /* [I] Write Port CSR */ -#define RIO_DOORBELL_CSR 0x44 /* [II] Doorbell CSR */ +#define RIO_WRITE_PORT_CSR 0x44 /* [I, <= 1.2] Write Port CSR */ +#define RIO_DOORBELL_CSR 0x44 /* [II, <= 1.2] Doorbell CSR */ #define RIO_DOORBELL_AVAIL 0x80000000 /* [II] Doorbell avail */ #define RIO_DOORBELL_FULL 0x40000000 /* [II] Doorbell full */ #define RIO_DOORBELL_EMPTY 0x20000000 /* [II] Doorbell empty */ -- cgit From cc7993f6439b49909a8792660c4d0741fec9d584 Mon Sep 17 00:00:00 2001 From: Dilan Lee Date: Thu, 25 Aug 2011 15:59:17 -0700 Subject: backlight: add a callback 'notify_after' for backlight control We need a callback to do some things after pwm_enable, pwm_disable and pwm_config. Signed-off-by: Dilan Lee Reviewed-by: Robert Morell Reviewed-by: Arun Murthy Cc: Richard Purdie Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/backlight/pwm_bl.c | 9 +++++++++ include/linux/pwm_backlight.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c index b8f38ec6eb18..8b5b2a4124c7 100644 --- a/drivers/video/backlight/pwm_bl.c +++ b/drivers/video/backlight/pwm_bl.c @@ -28,6 +28,8 @@ struct pwm_bl_data { unsigned int lth_brightness; int (*notify)(struct device *, int brightness); + void (*notify_after)(struct device *, + int brightness); int (*check_fb)(struct device *, struct fb_info *); }; @@ -55,6 +57,10 @@ static int pwm_backlight_update_status(struct backlight_device *bl) pwm_config(pb->pwm, brightness, pb->period); pwm_enable(pb->pwm); } + + if (pb->notify_after) + pb->notify_after(pb->dev, brightness); + return 0; } @@ -105,6 +111,7 @@ static int pwm_backlight_probe(struct platform_device *pdev) pb->period = data->pwm_period_ns; pb->notify = data->notify; + pb->notify_after = data->notify_after; pb->check_fb = data->check_fb; pb->lth_brightness = data->lth_brightness * (data->pwm_period_ns / data->max_brightness); @@ -172,6 +179,8 @@ static int pwm_backlight_suspend(struct platform_device *pdev, pb->notify(pb->dev, 0); pwm_config(pb->pwm, 0, pb->period); pwm_disable(pb->pwm); + if (pb->notify_after) + pb->notify_after(pb->dev, 0); return 0; } diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index 5e3e25a3c9c3..63d2df43e61a 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -14,6 +14,7 @@ struct platform_pwm_backlight_data { unsigned int pwm_period_ns; int (*init)(struct device *dev); int (*notify)(struct device *dev, int brightness); + void (*notify_after)(struct device *dev, int brightness); void (*exit)(struct device *dev); int (*check_fb)(struct device *dev, struct fb_info *info); }; -- cgit From f5b940997397229975ea073679b03967932a541b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 26 Aug 2011 18:03:11 -0400 Subject: All Arch: remove linkage for sys_nfsservctl system call The nfsservctl system call is now gone, so we should remove all linkage for it. Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields Signed-off-by: Linus Torvalds --- arch/alpha/kernel/systbls.S | 2 +- arch/arm/kernel/calls.S | 2 +- arch/avr32/kernel/syscall_table.S | 2 +- arch/blackfin/mach-common/entry.S | 2 +- arch/cris/arch-v10/kernel/entry.S | 2 +- arch/cris/arch-v32/kernel/entry.S | 2 +- arch/frv/kernel/entry.S | 2 +- arch/h8300/kernel/syscalls.S | 2 +- arch/ia64/kernel/entry.S | 2 +- arch/m32r/kernel/syscall_table.S | 2 +- arch/m68k/kernel/syscalltable.S | 2 +- arch/microblaze/kernel/syscall_table.S | 2 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mips/kernel/scall64-64.S | 2 +- arch/mips/kernel/scall64-n32.S | 2 +- arch/mips/kernel/scall64-o32.S | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/s390/kernel/compat_wrapper.S | 6 ------ arch/s390/kernel/syscalls.S | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sparc/kernel/sys32.S | 1 - arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 2 +- arch/x86/ia32/ia32entry.S | 2 +- arch/x86/include/asm/unistd_64.h | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- arch/xtensa/include/asm/unistd.h | 2 +- fs/compat.c | 5 ----- include/asm-generic/unistd.h | 2 +- include/linux/compat.h | 1 - include/linux/syscalls.h | 3 --- kernel/sys_ni.c | 1 - 33 files changed, 27 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index b9c28f3f1956..6acea1f96de3 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -360,7 +360,7 @@ sys_call_table: .quad sys_newuname .quad sys_nanosleep /* 340 */ .quad sys_mremap - .quad sys_nfsservctl + .quad sys_ni_syscall /* old nfsservctl */ .quad sys_setresuid .quad sys_getresuid .quad sys_pciconfig_read /* 345 */ diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 80f7896cc016..9943e9e74a1b 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -178,7 +178,7 @@ CALL(sys_ni_syscall) /* vm86 */ CALL(sys_ni_syscall) /* was sys_query_module */ CALL(sys_poll) - CALL(sys_nfsservctl) + CALL(sys_ni_syscall) /* was nfsservctl */ /* 170 */ CALL(sys_setresgid16) CALL(sys_getresgid16) CALL(sys_prctl) diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index c7fd394d28a4..6eba53530d1c 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -158,7 +158,7 @@ sys_call_table: .long sys_sched_rr_get_interval .long sys_nanosleep .long sys_poll - .long sys_nfsservctl /* 145 */ + .long sys_ni_syscall /* 145 was nfsservctl */ .long sys_setresgid .long sys_getresgid .long sys_prctl diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 225d311c9701..e4137297b790 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1543,7 +1543,7 @@ ENTRY(_sys_call_table) .long _sys_ni_syscall /* for vm86 */ .long _sys_ni_syscall /* old "query_module" */ .long _sys_ni_syscall /* sys_poll */ - .long _sys_nfsservctl + .long _sys_ni_syscall /* old nfsservctl */ .long _sys_setresgid /* setresgid16 */ /* 170 */ .long _sys_getresgid /* getresgid16 */ .long _sys_prctl diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index 1161883eb582..592fbe9dfb62 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -771,7 +771,7 @@ sys_call_table: .long sys_ni_syscall /* sys_vm86 */ .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* old nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 84fed7e91ada..c3ea4694fbaf 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -714,7 +714,7 @@ sys_call_table: .long sys_ni_syscall /* sys_vm86 */ .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* Old nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 017d6d7b784f..5ba23f715ea5 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1358,7 +1358,7 @@ sys_call_table: .long sys_ni_syscall /* for vm86 */ .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* Old nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index f4b2e67bcc34..4be2ea2fbe26 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -183,7 +183,7 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* for vm86 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_query_module */ .long SYMBOL_NAME(sys_poll) - .long SYMBOL_NAME(sys_nfsservctl) + .long SYMBOL_NAME(sys_ni_syscall) /* old nfsservctl */ .long SYMBOL_NAME(sys_setresgid16) /* 170 */ .long SYMBOL_NAME(sys_getresgid16) .long SYMBOL_NAME(sys_prctl) diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 97dd2abdeb1a..198c753d1006 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1614,7 +1614,7 @@ sys_call_table: data8 sys_sched_get_priority_min data8 sys_sched_rr_get_interval data8 sys_nanosleep - data8 sys_nfsservctl + data8 sys_ni_syscall // old nfsservctl data8 sys_prctl // 1170 data8 sys_getpagesize data8 sys_mmap2 diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S index 528f2e6ad064..f365c19795ef 100644 --- a/arch/m32r/kernel/syscall_table.S +++ b/arch/m32r/kernel/syscall_table.S @@ -168,7 +168,7 @@ ENTRY(sys_call_table) .long sys_tas /* vm86 syscall holder */ .long sys_ni_syscall /* query_module syscall holder */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* was nfsservctl */ .long sys_setresgid /* 170 */ .long sys_getresgid .long sys_prctl diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 00d1452f9571..c468f2edaa85 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -189,7 +189,7 @@ ENTRY(sys_call_table) .long sys_getpagesize .long sys_ni_syscall /* old "query_module" */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* old nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index d915a122c865..8789daa2a346 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -173,7 +173,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* sys_vm86 */ .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* old nfsservctl */ .long sys_setresgid /* 170 */ .long sys_getresgid .long sys_prctl diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index e521420a45a5..865bc7a6f5a1 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -424,7 +424,7 @@ einval: li v0, -ENOSYS sys sys_getresuid 3 sys sys_ni_syscall 0 /* was sys_query_module */ sys sys_poll 3 - sys sys_nfsservctl 3 + sys sys_ni_syscall 0 /* was nfsservctl */ sys sys_setresgid 3 /* 4190 */ sys sys_getresgid 3 sys sys_prctl 5 diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 85874d6a8a70..fb7334bea731 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -299,7 +299,7 @@ sys_call_table: PTR sys_ni_syscall /* 5170, was get_kernel_syms */ PTR sys_ni_syscall /* was query_module */ PTR sys_quotactl - PTR sys_nfsservctl + PTR sys_ni_syscall /* was nfsservctl */ PTR sys_ni_syscall /* res. for getpmsg */ PTR sys_ni_syscall /* 5175 for putpmsg */ PTR sys_ni_syscall /* res. for afs_syscall */ diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index b85842fc87ae..f9296e894e46 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -294,7 +294,7 @@ EXPORT(sysn32_call_table) PTR sys_ni_syscall /* 6170, was get_kernel_syms */ PTR sys_ni_syscall /* was query_module */ PTR sys_quotactl - PTR compat_sys_nfsservctl + PTR sys_ni_syscall /* was nfsservctl */ PTR sys_ni_syscall /* res. for getpmsg */ PTR sys_ni_syscall /* 6175 for putpmsg */ PTR sys_ni_syscall /* res. for afs_syscall */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 46c4763edf21..4d7c9827706f 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -392,7 +392,7 @@ sys_call_table: PTR sys_getresuid PTR sys_ni_syscall /* was query_module */ PTR sys_poll - PTR compat_sys_nfsservctl + PTR sys_ni_syscall /* was nfsservctl */ PTR sys_setresgid /* 4190 */ PTR sys_getresgid PTR sys_prctl diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index ae435e1d5669..3e3620d9fc45 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -589,7 +589,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* vm86 */ .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* was nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 08ab9aa6a0d5..7526db6bf501 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -665,12 +665,6 @@ ENTRY(sys32_poll_wrapper) lgfr %r4,%r4 # long jg sys_poll # branch to system call -ENTRY(compat_sys_nfsservctl_wrapper) - lgfr %r2,%r2 # int - llgtr %r3,%r3 # struct compat_nfsctl_arg* - llgtr %r4,%r4 # union compat_nfsctl_res* - jg compat_sys_nfsservctl # branch to system call - ENTRY(sys32_setresgid16_wrapper) llgfr %r2,%r2 # __kernel_old_gid_emu31_t llgfr %r3,%r3 # __kernel_old_gid_emu31_t diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 6ee39ef8fe4a..73eb08c874fb 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -177,7 +177,7 @@ SYSCALL(sys_getresuid16,sys_ni_syscall,sys32_getresuid16_wrapper) /* 165 old get NI_SYSCALL /* for vm86 */ NI_SYSCALL /* old sys_query_module */ SYSCALL(sys_poll,sys_poll,sys32_poll_wrapper) -SYSCALL(sys_nfsservctl,sys_nfsservctl,compat_sys_nfsservctl_wrapper) +NI_SYSCALL /* old nfsservctl */ SYSCALL(sys_setresgid16,sys_ni_syscall,sys32_setresgid16_wrapper) /* 170 old setresgid16 syscall */ SYSCALL(sys_getresgid16,sys_ni_syscall,sys32_getresgid16_wrapper) /* old getresgid16 syscall */ SYSCALL(sys_prctl,sys_prctl,sys32_prctl_wrapper) diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 39b051de4c7c..293e39c59c00 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -185,7 +185,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* vm86 */ .long sys_ni_syscall /* old "query_module" */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* was nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 089c4d825d08..ceb34b94afa9 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -189,7 +189,7 @@ sys_call_table: .long sys_ni_syscall /* vm86 */ .long sys_ni_syscall /* old "query_module" */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* was nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 44e5faf1ad5f..d97f3eb72e06 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -81,7 +81,6 @@ SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4) SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5) SIGN2(sys32_bdflush, sys_bdflush, %o0, %o1) SIGN1(sys32_mlockall, sys_mlockall, %o0) -SIGN1(sys32_nfsservctl, compat_sys_nfsservctl, %o0) SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1) SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1) SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 6e492d59f6b1..09d8ec454450 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -67,7 +67,7 @@ sys_call_table: /*235*/ .long sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall /*240*/ .long sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler /*245*/ .long sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep -/*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl +/*250*/ .long sys_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_ni_syscall /*255*/ .long sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .long sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun /*265*/ .long sys_timer_delete, sys_timer_create, sys_nis_syscall, sys_io_setup, sys_io_destroy diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index f566518483b5..c9296ab0b1f4 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -145,7 +145,7 @@ sys_call_table: .word sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, sys_sched_rr_get_interval, sys_nanosleep -/*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nfsservctl +/*250*/ .word sys_64_mremap, sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall .word sys_sync_file_range, sys_clock_settime, sys_clock_gettime, sys_clock_getres, sys_clock_nanosleep /*260*/ .word sys_sched_getaffinity, sys_sched_setaffinity, sys_timer_settime, sys_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, sys_timer_create, sys_ni_syscall, sys_io_setup, sys_io_destroy diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a0e866d233ee..54edb207ff3a 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -672,7 +672,7 @@ ia32_sys_call_table: .quad sys32_vm86_warning /* vm86 */ .quad quiet_ni_syscall /* query_module */ .quad sys_poll - .quad compat_sys_nfsservctl + .quad quiet_ni_syscall /* old nfsservctl */ .quad sys_setresgid16 /* 170 */ .quad sys_getresgid16 .quad sys_prctl diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index d92641cc7acc..201040573444 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall) __SYSCALL(__NR_quotactl, sys_quotactl) #define __NR_nfsservctl 180 -__SYSCALL(__NR_nfsservctl, sys_nfsservctl) +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) /* reserved for LiS/STREAMS */ #define __NR_getpmsg 181 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index fbb0a045a1a2..bc19be332bc9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -168,7 +168,7 @@ ENTRY(sys_call_table) .long ptregs_vm86 .long sys_ni_syscall /* Old sys_query_module */ .long sys_poll - .long sys_nfsservctl + .long sys_ni_syscall /* Old nfsservctl */ .long sys_setresgid16 /* 170 */ .long sys_getresgid16 .long sys_prctl diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index a6f934f37f1a..798ee6d285a1 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -455,7 +455,7 @@ __SYSCALL(203, sys_reboot, 3) #define __NR_quotactl 204 __SYSCALL(204, sys_quotactl, 4) #define __NR_nfsservctl 205 -__SYSCALL(205, sys_nfsservctl, 3) +__SYSCALL(205, sys_ni_syscall, 0) #define __NR__sysctl 206 __SYSCALL(206, sys_sysctl, 1) #define __NR_bdflush 207 diff --git a/fs/compat.c b/fs/compat.c index 0b48d018e38a..58b1da459893 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1675,11 +1675,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } #endif /* HAVE_SET_RESTORE_SIGMASK */ -long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) -{ - return sys_ni_syscall(); -} - #ifdef CONFIG_EPOLL #ifdef HAVE_SET_RESTORE_SIGMASK diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 4f76959397fa..f4c38d8c6674 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -143,7 +143,7 @@ __SYSCALL(__NR_pivot_root, sys_pivot_root) /* fs/nfsctl.c */ #define __NR_nfsservctl 42 -__SC_COMP(__NR_nfsservctl, sys_nfsservctl, compat_sys_nfsservctl) +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) /* fs/open.c */ #define __NR3264_statfs 43 diff --git a/include/linux/compat.h b/include/linux/compat.h index 8779405e15a8..c6e7523bf765 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -438,7 +438,6 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, struct compat_timespec __user *tsp, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize); -asmlinkage long compat_sys_nfsservctl(int cmd, void *notused, void *notused2); asmlinkage long compat_sys_signalfd4(int ufd, const compat_sigset_t __user *sigmask, compat_size_t sigsetsize, int flags); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 8c03b98df5f9..1ff0ec2a5e8d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -702,9 +702,6 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args); asmlinkage long sys_sysinfo(struct sysinfo __user *info); asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2); -asmlinkage long sys_nfsservctl(int cmd, - struct nfsctl_arg __user *arg, - void __user *res); asmlinkage long sys_syslog(int type, char __user *buf, int len); asmlinkage long sys_uselib(const char __user *library); asmlinkage long sys_ni_syscall(void); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 62cbc8877fef..a9a5de07c4f1 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -16,7 +16,6 @@ asmlinkage long sys_ni_syscall(void) return -ENOSYS; } -cond_syscall(sys_nfsservctl); cond_syscall(sys_quotactl); cond_syscall(sys32_quotactl); cond_syscall(sys_acct); -- cgit