From 0c6a89ba640d28e1dcd7fd1a217d2cfb92ae4953 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sun, 29 Jul 2007 23:00:46 +0900 Subject: [SCSI] bsg: update sg_io_v4 structure This updates sg_io_v4 structure (based on Doug's RFC, release 1.3). The major changes are: - add dout_resid field - increase tag size to 64 bits to comply with SAM-4 and SRP - add dout_iovec_count and din_iovec_count dout_iovec_count and din_iovec_count aren't supported now. I'm not sure whether they will be supported or not but they were added for the possible future changes. Signed-off-by: FUJITA Tomonori Signed-off-by: James Bottomley --- include/linux/bsg.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg.h b/include/linux/bsg.h index 102dc096e1cb..60e377b520f8 100644 --- a/include/linux/bsg.h +++ b/include/linux/bsg.h @@ -15,14 +15,18 @@ struct sg_io_v4 { __u32 request_len; /* [i] in bytes */ __u64 request; /* [i], [*i] {SCSI: cdb} */ + __u64 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ __u32 request_attr; /* [i] {SCSI: task attribute} */ - __u32 request_tag; /* [i] {SCSI: task tag (only if flagged)} */ __u32 request_priority; /* [i] {SCSI: task priority} */ + __u32 request_extra; /* [i] {spare, for padding} */ __u32 max_response_len; /* [i] in bytes */ __u64 response; /* [i], [*o] {SCSI: (auto)sense data} */ - /* "din_" for data in (from device); "dout_" for data out (to device) */ + /* "dout_": data out (to device); "din_": data in (from device) */ + __u32 dout_iovec_count; /* [i] 0 -> "flat" dout transfer else + dout_xfer points to array of iovec */ __u32 dout_xfer_len; /* [i] bytes to be transferred to device */ + __u32 din_iovec_count; /* [i] 0 -> "flat" din transfer */ __u32 din_xfer_len; /* [i] bytes to be transferred from device */ __u64 dout_xferp; /* [i], [*i] */ __u64 din_xferp; /* [i], [*o] */ @@ -39,8 +43,9 @@ struct sg_io_v4 { __u32 info; /* [o] additional information */ __u32 duration; /* [o] time to complete, in milliseconds */ __u32 response_len; /* [o] bytes of response actually written */ - __s32 din_resid; /* [o] actual_din_xfer_len - din_xfer_len */ - __u32 generated_tag; /* [o] {SCSI: task tag that transport chose} */ + __s32 din_resid; /* [o] din_xfer_len - actual_din_xfer_len */ + __s32 dout_resid; /* [o] dout_xfer_len - actual_dout_xfer_len */ + __u64 generated_tag; /* [o] {SCSI: transport generated task tag} */ __u32 spare_out; /* [o] */ __u32 padding; -- cgit From be1b685fe6c9928848b26b568eaa86ba8ce0046c Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Sat, 4 Aug 2007 21:18:16 -0700 Subject: [NETFILTER]: Add xt_statistic.h to the header list for usermode programs Add xt_statistic.h to the list of headers to install. Apparently needed to build newer versions of iptables. Signed-off-by: Chuck Ebbert Signed-off-by: David S. Miller --- include/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 43397a414cd6..ab57cb7d7c61 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -28,6 +28,7 @@ header-y += xt_policy.h header-y += xt_realm.h header-y += xt_sctp.h header-y += xt_state.h +header-y += xt_statistic.h header-y += xt_string.h header-y += xt_tcpmss.h header-y += xt_tcpudp.h -- cgit From 5e11934d13c9a3bcb0cadad6c7a7de5c32660422 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 26 Jul 2007 12:06:17 -0400 Subject: NFS: Fix put_nfs_open_context We need to grab the inode->i_lock atomically with the last reference put in order to remove the open context that is being freed from the nfsi->open_files list. Fix by converting the kref to a standard atomic counter and then using atomic_dec_and_lock()... Thanks to Arnd Bergmann for pointing out the problem. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 24 ++++++++---------------- include/linux/nfs_fs.h | 2 +- 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bca6cdcb9f0d..71a49c3acabd 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -468,7 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str ctx->lockowner = current->files; ctx->error = 0; ctx->dir_cookie = 0; - kref_init(&ctx->kref); + atomic_set(&ctx->count, 1); } return ctx; } @@ -476,21 +476,18 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - kref_get(&ctx->kref); + atomic_inc(&ctx->count); return ctx; } -static void nfs_free_open_context(struct kref *kref) +void put_nfs_open_context(struct nfs_open_context *ctx) { - struct nfs_open_context *ctx = container_of(kref, - struct nfs_open_context, kref); + struct inode *inode = ctx->path.dentry->d_inode; - if (!list_empty(&ctx->list)) { - struct inode *inode = ctx->path.dentry->d_inode; - spin_lock(&inode->i_lock); - list_del(&ctx->list); - spin_unlock(&inode->i_lock); - } + if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) + return; + list_del(&ctx->list); + spin_unlock(&inode->i_lock); if (ctx->state != NULL) nfs4_close_state(&ctx->path, ctx->state, ctx->mode); if (ctx->cred != NULL) @@ -500,11 +497,6 @@ static void nfs_free_open_context(struct kref *kref) kfree(ctx); } -void put_nfs_open_context(struct nfs_open_context *ctx) -{ - kref_put(&ctx->kref, nfs_free_open_context); -} - /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9ba4aec37c50..157dcb055b5c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -71,7 +71,7 @@ struct nfs_access_entry { struct nfs4_state; struct nfs_open_context { - struct kref kref; + atomic_t count; struct path path; struct rpc_cred *cred; struct nfs4_state *state; -- cgit From 4301065920b0cbde3986519582347e883b166f3e Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: simplify move_tasks() The move_tasks() function is currently multiplexed with two distinct capabilities: 1. attempt to move a specified amount of weighted load from one run queue to another; and 2. attempt to move a specified number of tasks from one run queue to another. The first of these capabilities is used in two places, load_balance() and load_balance_idle(), and in both of these cases the return value of move_tasks() is used purely to decide if tasks/load were moved and no notice of the actual number of tasks moved is taken. The second capability is used in exactly one place, active_load_balance(), to attempt to move exactly one task and, as before, the return value is only used as an indicator of success or failure. This multiplexing of sched_task() was introduced, by me, as part of the smpnice patches and was motivated by the fact that the alternative, one function to move specified load and one to move a single task, would have led to two functions of roughly the same complexity as the old move_tasks() (or the new balance_tasks()). However, the new modular design of the new CFS scheduler allows a simpler solution to be adopted and this patch addresses that solution by: 1. adding a new function, move_one_task(), to be used by active_load_balance(); and 2. making move_tasks() a single purpose function that tries to move a specified weighted load and returns 1 for success and 0 for failure. One of the consequences of these changes is that neither move_one_task() or the new move_tasks() care how many tasks sched_class.load_balance() moves and this enables its interface to be simplified by returning the amount of load moved as its result and removing the load_moved pointer from the argument list. This helps simplify the new move_tasks() and slightly reduces the amount of work done in each of sched_class.load_balance()'s implementations. Further simplification, e.g. changes to balance_tasks(), are possible but (slightly) complicated by the special needs of load_balance_fair() so I've left them to a later patch (if this one gets accepted). NB Since move_tasks() gets called with two run queue locks held even small reductions in overhead are worthwhile. [ mingo@elte.hu ] this change also reduces code size nicely: text data bss dec hex filename 39216 3618 24 42858 a76a sched.o.before 39173 3618 24 42815 a73f sched.o.after Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +-- kernel/sched.c | 82 +++++++++++++++++++++++++++---------------------- kernel/sched_fair.c | 8 ++--- kernel/sched_idletask.c | 4 +-- kernel/sched_rt.c | 9 +++--- 5 files changed, 58 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 17249fae5014..24bce423f10d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -866,11 +866,11 @@ struct sched_class { struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); - int (*load_balance) (struct rq *this_rq, int this_cpu, + unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved); + int *all_pinned); void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 4680f52974e3..42029634ef5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2231,32 +2231,49 @@ out: } /* - * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted - * load from busiest to this_rq, as part of a balancing operation within - * "domain". Returns the number of tasks moved. + * move_tasks tries to move up to max_load_move weighted load from busiest to + * this_rq, as part of a balancing operation within domain "sd". + * Returns 1 if successful and 0 otherwise. * * Called with both runqueues locked. */ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, + unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { struct sched_class *class = sched_class_highest; - unsigned long load_moved, total_nr_moved = 0, nr_moved; - long rem_load_move = max_load_move; + unsigned long total_load_moved = 0; do { - nr_moved = class->load_balance(this_rq, this_cpu, busiest, - max_nr_move, (unsigned long)rem_load_move, - sd, idle, all_pinned, &load_moved); - total_nr_moved += nr_moved; - max_nr_move -= nr_moved; - rem_load_move -= load_moved; + total_load_moved += + class->load_balance(this_rq, this_cpu, busiest, + ULONG_MAX, max_load_move - total_load_moved, + sd, idle, all_pinned); class = class->next; - } while (class && max_nr_move && rem_load_move > 0); + } while (class && max_load_move > total_load_moved); - return total_nr_moved; + return total_load_moved > 0; +} + +/* + * move_one_task tries to move exactly one task from busiest to this_rq, as + * part of active balancing operations within "domain". + * Returns 1 if successful and 0 otherwise. + * + * Called with both runqueues locked. + */ +static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, + struct sched_domain *sd, enum cpu_idle_type idle) +{ + struct sched_class *class; + + for (class = sched_class_highest; class; class = class->next) + if (class->load_balance(this_rq, this_cpu, busiest, + 1, ULONG_MAX, sd, idle, NULL)) + return 1; + + return 0; } /* @@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ #define MAX_PINNED_INTERVAL 512 -static inline unsigned long minus_1_or_zero(unsigned long n) -{ - return n > 0 ? n - 1 : 0; -} - /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; + int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; @@ -2642,18 +2654,17 @@ redo: schedstat_add(sd, lb_imbalance[idle], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. nr_moved simply stays zero, so it is + * still unbalanced. ld_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -2661,7 +2672,7 @@ redo: /* * some other cpu did the load balance for us. */ - if (nr_moved && this_cpu != smp_processor_id()) + if (ld_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2673,7 +2684,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; @@ -2722,10 +2733,10 @@ redo: sd->balance_interval *= 2; } - if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[idle]); @@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) struct sched_group *group; struct rq *busiest = NULL; unsigned long imbalance; - int nr_moved = 0; + int ld_moved = 0; int sd_idle = 0; int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; @@ -2792,12 +2803,11 @@ redo: schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); - nr_moved = 0; + ld_moved = 0; if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - nr_moved = move_tasks(this_rq, this_cpu, busiest, - minus_1_or_zero(busiest->nr_running), + ld_moved = move_tasks(this_rq, this_cpu, busiest, imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); spin_unlock(&busiest->lock); @@ -2809,7 +2819,7 @@ redo: } } - if (!nr_moved) { + if (!ld_moved) { schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) @@ -2817,7 +2827,7 @@ redo: } else sd->nr_balance_failed = 0; - return nr_moved; + return ld_moved; out_balanced: schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); @@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) if (likely(sd)) { schedstat_inc(sd, alb_cnt); - if (move_tasks(target_rq, target_cpu, busiest_rq, 1, - ULONG_MAX, sd, CPU_IDLE, NULL)) + if (move_one_task(target_rq, target_cpu, busiest_rq, + sd, CPU_IDLE)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9f401588d509..7307a37cf26f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } -static int +static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + int *all_pinned) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, break; } - *total_load_moved = max_load_move - rem_load_move; - - return total_nr_moved; + return max_load_move - rem_load_move; } /* diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 41841e741c4a..1d8d9e13d950 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) { } -static int +static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *total_load_moved) + int *all_pinned) { return 0; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 002fcf8d3f64..2b0626a43cb8 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg) return p; } -static int +static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, unsigned long *load_moved) + int *all_pinned) { int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; + unsigned long load_moved; best_prio = sched_find_first_bit(busiest->rt.active.bitmap); this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); @@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, rt_rq_iterator.arg = busiest; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, - max_load_move, sd, idle, all_pinned, load_moved, + max_load_move, sd, idle, all_pinned, &load_moved, this_best_prio, best_prio, best_prio_seen, &rt_rq_iterator); - return nr_moved; + return load_moved; } static void task_tick_rt(struct rq *rq, struct task_struct *p) -- cgit From a4ac01c36e286dd1b9a1d5cd7422c5af51dc55f8 Mon Sep 17 00:00:00 2001 From: Peter Williams Date: Thu, 9 Aug 2007 11:16:46 +0200 Subject: sched: fix bug in balance_tasks() There are two problems with balance_tasks() and how it used: 1. The variables best_prio and best_prio_seen (inherited from the old move_tasks()) were only required to handle problems caused by the active/expired arrays, the order in which they were processed and the possibility that the task with the highest priority could be on either. These issues are no longer present and the extra overhead associated with their use is unnecessary (and possibly wrong). 2. In the absence of CONFIG_FAIR_GROUP_SCHED being set, the same this_best_prio variable needs to be used by all scheduling classes or there is a risk of moving too much load. E.g. if the highest priority task on this at the beginning is a fairly low priority task and the rt class migrates a task (during its turn) then that moved task becomes the new highest priority task on this_rq but when the sched_fair class initializes its copy of this_best_prio it will get the priority of the original highest priority task as, due to the run queue locks being held, the reschedule triggered by pull_task() will not have taken place. This could result in inappropriate overriding of skip_for_load and excessive load being moved. The attached patch addresses these problems by deleting all reference to best_prio and best_prio_seen and making this_best_prio a reference parameter to the various functions involved. load_balance_fair() has also been modified so that this_best_prio is only reset (in the loop) if CONFIG_FAIR_GROUP_SCHED is set. This should preserve the effect of helping spread groups' higher priority tasks around the available CPUs while improving system performance when CONFIG_FAIR_GROUP_SCHED isn't set. Signed-off-by: Peter Williams Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 26 +++++++++++--------------- kernel/sched_fair.c | 32 ++++++++++++-------------------- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 19 ++----------------- 5 files changed, 27 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 24bce423f10d..513b81c60e87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -870,7 +870,7 @@ struct sched_class { struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned); + int *all_pinned, int *this_best_prio); void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 85b93118d244..1fa07c14624e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -745,8 +745,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator); + int *this_best_prio, struct rq_iterator *iterator); #include "sched_stats.h" #include "sched_rt.c" @@ -2165,8 +2164,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; @@ -2191,12 +2189,8 @@ next: */ skip_for_load = (p->se.load.weight >> 1) > rem_load_move + SCHED_LOAD_SCALE_FUZZ; - if (skip_for_load && p->prio < this_best_prio) - skip_for_load = !best_prio_seen && p->prio == best_prio; - if (skip_for_load || + if ((skip_for_load && p->prio >= *this_best_prio) || !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { - - best_prio_seen |= p->prio == best_prio; p = iterator->next(iterator->arg); goto next; } @@ -2210,8 +2204,8 @@ next: * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { - if (p->prio < this_best_prio) - this_best_prio = p->prio; + if (p->prio < *this_best_prio) + *this_best_prio = p->prio; p = iterator->next(iterator->arg); goto next; } @@ -2243,12 +2237,13 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, { struct sched_class *class = sched_class_highest; unsigned long total_load_moved = 0; + int this_best_prio = this_rq->curr->prio; do { total_load_moved += class->load_balance(this_rq, this_cpu, busiest, ULONG_MAX, max_load_move - total_load_moved, - sd, idle, all_pinned); + sd, idle, all_pinned, &this_best_prio); class = class->next; } while (class && max_load_move > total_load_moved); @@ -2266,10 +2261,12 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle) { struct sched_class *class; + int this_best_prio = MAX_PRIO; for (class = sched_class_highest; class; class = class->next) if (class->load_balance(this_rq, this_cpu, busiest, - 1, ULONG_MAX, sd, idle, NULL)) + 1, ULONG_MAX, sd, idle, NULL, + &this_best_prio)) return 1; return 0; @@ -3184,8 +3181,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int this_best_prio, int best_prio, int best_prio_seen, - struct rq_iterator *iterator) + int *this_best_prio, struct rq_iterator *iterator) { *load_moved = 0; diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 16511e9e5528..923bed0b0c42 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -929,6 +929,7 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } +#ifdef CONFIG_FAIR_GROUP_SCHED static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) { struct sched_entity *curr; @@ -942,12 +943,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } +#endif static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, int *this_best_prio) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -958,10 +960,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { +#ifdef CONFIG_FAIR_GROUP_SCHED struct cfs_rq *this_cfs_rq; - long imbalance; + long imbalances; unsigned long maxload; - int this_best_prio, best_prio, best_prio_seen = 0; this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); @@ -975,27 +977,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, imbalance /= 2; maxload = min(rem_load_move, imbalance); - this_best_prio = cfs_rq_best_prio(this_cfs_rq); - best_prio = cfs_rq_best_prio(busy_cfs_rq); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se) - best_prio_seen = 1; - + *this_best_prio = cfs_rq_best_prio(this_cfs_rq); +#else +#define maxload rem_load_move +#endif /* pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, best_prio, - best_prio_seen, &cfs_rq_iterator); + &load_moved, this_best_prio, &cfs_rq_iterator); total_nr_moved += nr_moved; max_nr_move -= nr_moved; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 1d8d9e13d950..dc9e1068911f 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -41,7 +41,7 @@ static unsigned long load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { return 0; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2b0626a43cb8..5b559e8c8aa6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -176,26 +176,12 @@ static unsigned long load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned) + int *all_pinned, int *this_best_prio) { - int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; unsigned long load_moved; - best_prio = sched_find_first_bit(busiest->rt.active.bitmap); - this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); - - /* - * Enable handling of the case where there is more than one task - * with the best priority. If the current running task is one - * of those with prio==best_prio we know it won't be moved - * and therefore it's safe to override the skip (based on load) - * of any task we find with that prio. - */ - if (busiest->curr->prio == best_prio) - best_prio_seen = 1; - rt_rq_iterator.start = load_balance_start_rt; rt_rq_iterator.next = load_balance_next_rt; /* pass 'busiest' rq argument into @@ -205,8 +191,7 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, max_load_move, sd, idle, all_pinned, &load_moved, - this_best_prio, best_prio, best_prio_seen, - &rt_rq_iterator); + this_best_prio, &rt_rq_iterator); return load_moved; } -- cgit From 5cef9eca3837a8dcf605a360e213c4179a07c41a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:47 +0200 Subject: sched: remove the 'u64 now' parameter from print_cfs_rq() remove the 'u64 now' parameter from print_cfs_rq(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- kernel/sched_debug.c | 4 ++-- kernel/sched_fair.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 513b81c60e87..62ddddb49db3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -139,7 +139,7 @@ struct cfs_rq; extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #else static inline void proc_sched_show_task(struct task_struct *p, struct seq_file *m) @@ -149,7 +149,7 @@ static inline void proc_sched_set_task(struct task_struct *p) { } static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { } #endif diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 8421b9399e10..f977ee53f8ce 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -106,7 +106,7 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) (long long)wait_runtime_rq_sum); } -void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) +void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); @@ -166,7 +166,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) P(cpu_load[4]); #undef P - print_cfs_stats(m, cpu, now); + print_cfs_stats(m, cpu); print_rq(m, rq, cpu, now); } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index bcf5fc59e8e9..025ac532b27a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1108,12 +1108,12 @@ struct sched_class fair_sched_class __read_mostly = { }; #ifdef CONFIG_SCHED_DEBUG -static void print_cfs_stats(struct seq_file *m, int cpu, u64 now) +static void print_cfs_stats(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq; for_each_leaf_cfs_rq(rq, cfs_rq) - print_cfs_rq(m, cpu, cfs_rq, now); + print_cfs_rq(m, cpu, cfs_rq); } #endif -- cgit From fd390f6a04f22fb457d6fd1855964f79536525de Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->enqueue_task() remove the 'u64 now' parameter from ->enqueue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- kernel/sched.c | 2 +- kernel/sched_fair.c | 3 +-- kernel/sched_rt.c | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 62ddddb49db3..b11dedfbab6e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -855,8 +855,7 @@ struct sched_domain; struct sched_class { struct sched_class *next; - void (*enqueue_task) (struct rq *rq, struct task_struct *p, - int wakeup, u64 now); + void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep, u64 now); void (*yield_task) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 49a5fb0cdea0..43ae1566b8fc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -852,7 +852,7 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup, now); + p->sched_class->enqueue_task(rq, p, wakeup); p->se.on_rq = 1; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a11d18861a3c..81db9626b7ed 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -782,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p) * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ -static void -enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index fa5a46273b79..1edaa99e0d3d 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -25,8 +25,7 @@ static inline void update_curr_rt(struct rq *rq) curr->se.exec_start = rq->clock; } -static void -enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) +static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) { struct rt_prio_array *array = &rq->rt.active; -- cgit From f02231e51a280f1a0fee4d03ad8f50048e06cced Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->dequeue_task() remove the 'u64 now' parameter from ->dequeue_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +-- kernel/sched.c | 2 +- kernel/sched_fair.c | 3 +-- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 3 +-- 5 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b11dedfbab6e..c7815a6b70e0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -856,8 +856,7 @@ struct sched_class { struct sched_class *next; void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, - int sleep, u64 now); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); void (*yield_task) (struct rq *rq, struct task_struct *p); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); diff --git a/kernel/sched.c b/kernel/sched.c index 43ae1566b8fc..e51d75f4b4d7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -859,7 +859,7 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) { - p->sched_class->dequeue_task(rq, p, sleep, now); + p->sched_class->dequeue_task(rq, p, sleep); p->se.on_rq = 0; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 81db9626b7ed..fb4d614af2c3 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -800,8 +800,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) * decreased. We remove the task from the rbtree and * update the fair scheduling stats: */ -static void -dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dc9e1068911f..f69e083e0d96 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) * message if some code attempts to do it: */ static void -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) +dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) { spin_unlock_irq(&rq->lock); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1edaa99e0d3d..60591e2512b1 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -36,8 +36,7 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) /* * Adding/removing a task to/from a priority array: */ -static void -dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now) +static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) { struct rt_prio_array *array = &rq->rt.active; -- cgit From fb8d47240246e20f864f0724a23a7220cd1c59ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:48 +0200 Subject: sched: remove the 'u64 now' parameter from ->pick_next_task() remove the 'u64 now' parameter from ->pick_next_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 4 ++-- kernel/sched_fair.c | 2 +- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c7815a6b70e0..c6ad4071c791 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -861,7 +861,7 @@ struct sched_class { void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); - struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); + struct task_struct * (*pick_next_task) (struct rq *rq); void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, diff --git a/kernel/sched.c b/kernel/sched.c index e51d75f4b4d7..b67a288a0f1f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3410,14 +3410,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq, now); + p = fair_sched_class.pick_next_task(rq); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq, now); + p = class->pick_next_task(rq); if (p) return p; /* diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fb4d614af2c3..0b23aaf074fa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -859,7 +859,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); } -static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_fair(struct rq *rq) { struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index f69e083e0d96..9f4c28f858fe 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 60591e2512b1..c0b0d6237bb6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -73,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } -static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) +static struct task_struct *pick_next_task_rt(struct rq *rq) { struct rt_prio_array *array = &rq->rt.active; struct task_struct *next; -- cgit From 31ee529cc2254e8b62880535ec8f21a4c5e1c091 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->put_prev_task() remove the 'u64 now' parameter from ->put_prev_task(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 2 +- kernel/sched_fair.c | 2 +- kernel/sched_idletask.c | 2 +- kernel/sched_rt.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c6ad4071c791..9afb66a49358 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -862,7 +862,7 @@ struct sched_class { void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); + void (*put_prev_task) (struct rq *rq, struct task_struct *p); unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, diff --git a/kernel/sched.c b/kernel/sched.c index 4f9f9e9d7265..664440160485 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3470,7 +3470,7 @@ need_resched_nonpreemptible: if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - prev->sched_class->put_prev_task(rq, prev, now); + prev->sched_class->put_prev_task(rq, prev); next = pick_next_task(rq, prev); sched_info_switch(prev, next); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0b23aaf074fa..103327b4275d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -878,7 +878,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) /* * Account for a descheduled task: */ -static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) { struct sched_entity *se = &prev->se; struct cfs_rq *cfs_rq; diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 9f4c28f858fe..3503fb2d9f96 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c @@ -33,7 +33,7 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) spin_lock_irq(&rq->lock); } -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c0b0d6237bb6..dcdcad632fd9 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -92,7 +92,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) return next; } -static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p) { update_curr_rt(rq); p->se.exec_start = 0; -- cgit From ee0827d8b5271094380410cf21d8c48c109a773a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 9 Aug 2007 11:16:49 +0200 Subject: sched: remove the 'u64 now' parameter from ->task_new() remove the 'u64 now' parameter from ->task_new(). ( identity transformation that causes no change in functionality. ) Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- kernel/sched.c | 2 +- kernel/sched_fair.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9afb66a49358..682ef87da6eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -872,7 +872,7 @@ struct sched_class { void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); - void (*task_new) (struct rq *rq, struct task_struct *p, u64 now); + void (*task_new) (struct rq *rq, struct task_struct *p); }; struct load_weight { diff --git a/kernel/sched.c b/kernel/sched.c index 664440160485..0619178efa01 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1672,7 +1672,7 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p, now); + p->sched_class->task_new(rq, p); inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 103327b4275d..4a2cbde1057f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1020,7 +1020,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) * monopolize the CPU. Note: the parent runqueue is locked, * the child is not running yet. */ -static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) +static void task_new_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); struct sched_entity *se = &p->se; -- cgit From 6a0ed91e361a93ee1efb4c20c4967024ed2a8dd7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 7 Aug 2007 23:43:14 +0300 Subject: hexdump: use const notation Trivial fix: mark the buffer to hexdump as const so callers could avoid casting their const buffers when calling print_hex_dump(). The patch is really trivial and I suggest to consider it as a fix (it fixes GCC warnings) and push it to current tree. Signed-off-by: Artem Bityutskiy Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- lib/hexdump.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4300bb462d29..b4f5b81b4257 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -224,7 +224,7 @@ extern void hex_dump_to_buffer(const void *buf, size_t len, char *linebuf, size_t linebuflen, bool ascii); extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - void *buf, size_t len, bool ascii); + const void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, void *buf, size_t len); #define hex_asc(x) "0123456789abcdef"[x] diff --git a/lib/hexdump.c b/lib/hexdump.c index 473f5aed6cae..16f2e2935e87 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -145,9 +145,9 @@ EXPORT_SYMBOL(hex_dump_to_buffer); */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - void *buf, size_t len, bool ascii) + const void *buf, size_t len, bool ascii) { - u8 *ptr = buf; + const u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[200]; -- cgit From ec05b297f91a443aa26b74059b573bfad49c9ebb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 30 Jul 2007 08:24:27 +0200 Subject: [PATCH] remove mm/filemap.c:file_send_actor() This patch removes the no longer used file_send_actor(). Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- include/linux/fs.h | 1 - mm/filemap.c | 20 -------------------- 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6bf139562947..16421f662a7a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1659,7 +1659,6 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); diff --git a/mm/filemap.c b/mm/filemap.c index 6cf700d41844..50021a60d01f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1218,26 +1218,6 @@ out: } EXPORT_SYMBOL(generic_file_aio_read); -int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) -{ - ssize_t written; - unsigned long count = desc->count; - struct file *file = desc->arg.data; - - if (size > count) - size = count; - - written = file->f_op->sendpage(file, page, offset, - size, &file->f_pos, sizeerror = written; - written = 0; - } - desc->count = count - written; - desc->written += written; - return written; -} - static ssize_t do_readahead(struct address_space *mapping, struct file *filp, unsigned long index, unsigned long nr) -- cgit From c7149d6bce2561aeaa48caaa1700aa8b3b22008f Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Tue, 7 Aug 2007 15:30:23 +0200 Subject: Fix remap handling by blktrace This patch provides more information concerning REMAP operations on block IOs. The additional information provides clearer details at the user level, and supports post-processing analysis in btt. o Adds in partition remaps on the same device. o Fixed up the remap information in DM to be in the right order o Sent up mapped-from and mapped-to device information Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- block/ll_rw_blk.c | 4 ++++ drivers/md/dm.c | 4 ++-- include/linux/blktrace_api.h | 3 ++- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 8c2caff87cc3..a15845c164f2 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3047,6 +3047,10 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; + + blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, + bdev->bd_dev, bio->bi_sector, + bio->bi_sector - p->start_sect); } } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 141ff9fa296e..2120155929a6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -580,8 +580,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, /* the bio has been remapped so dispatch it */ blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, - tio->io->bio->bi_bdev->bd_dev, sector, - clone->bi_sector); + tio->io->bio->bi_bdev->bd_dev, + clone->bi_sector, sector); generic_make_request(clone); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 90874a5d7d78..7b5d56b82b59 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -105,7 +105,7 @@ struct blk_io_trace { */ struct blk_io_trace_remap { __be32 device; - u32 __pad; + __be32 device_from; __be64 sector; }; @@ -272,6 +272,7 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, return; r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); r.sector = cpu_to_be64(to); __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -- cgit From 02a5e0acb3cb85d80d0fe834e366d38a92bbaa22 Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 11 Aug 2007 22:34:32 +0200 Subject: BLOCK: Hide the contents of linux/bio.h if CONFIG_BLOCK=n Hide the contents of linux/bio.h if CONFIG_BLOCK=n as there shouldn't be compiled code that uses it. Signed-off-by: David Howells Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4d85262b4fa4..1ddef34f43c3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -24,6 +24,8 @@ #include #include +#ifdef CONFIG_BLOCK + /* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include @@ -361,4 +363,5 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +#endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ -- cgit From 76ceb2f90f6efb6d1f3d88f855428bff947a3483 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 10 Aug 2007 13:00:55 -0700 Subject: Remove unused struct proc_dir_entry::set After /proc/sys rewrite it was left unused. Signed-off-by: Alexey Dobriyan Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 28e3664fdf1b..cd13a78c5db8 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -75,7 +75,6 @@ struct proc_dir_entry { write_proc_t *write_proc; atomic_t count; /* use count */ int deleted; /* delete flag */ - void *set; int pde_users; /* number of callers into module in progress */ spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; -- cgit From 42fd552e8647316757ded0176466c41d17934dcf Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 10 Aug 2007 13:01:05 -0700 Subject: fix serial buffer memory leak Patch c5c34d4862e18ef07c1276d233507f540fb5a532 (tty: flush flip buffer on ldisc input queue flush) introduces a race condition which can lead to memory leaks. The problem can be triggered when tcflush() is called when data are being pushed to the line discipline driver by flush_to_ldisc(). flush_to_ldisc() releases tty->buf.lock when calling the line discipline receive_buf function. At that poing tty_buffer_flush() kicks in and sets both tty->buf.head and tty->buf.tail to NULL. When flush_to_ldisc() finishes, it restores tty->buf.head but doesn't touch tty->buf.tail. This corrups the buffer queue, and the next call to tty_buffer_request_room() will allocate a new buffer and overwrite tty->buf.head. The previous buffer is then lost forever without being released. (Thanks to Laurent for the above text, for finding, disgnosing and reporting the bug) - Use tty->flags bits for the flush status. - Wait for the flag to clear again before returning - Fix the doc error noted - Fix flush of empty queue leaving stale flushpending [akpm@linux-foundation.org: cleanup] Signed-off-by: Alan Cox Acked-by: Paul Fulghum Cc: Laurent Pinchart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 56 +++++++++++++++++++++++++++++++++++++++++++++------ include/linux/tty.h | 2 ++ 2 files changed, 52 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index de37ebc3a4cf..51ea93cab6c4 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -369,25 +369,54 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b) } /** - * tty_buffer_flush - flush full tty buffers + * __tty_buffer_flush - flush full tty buffers * @tty: tty to flush * - * flush all the buffers containing receive data + * flush all the buffers containing receive data. Caller must + * hold the buffer lock and must have ensured no parallel flush to + * ldisc is running. * - * Locking: none + * Locking: Caller must hold tty->buf.lock */ -static void tty_buffer_flush(struct tty_struct *tty) +static void __tty_buffer_flush(struct tty_struct *tty) { struct tty_buffer *thead; - unsigned long flags; - spin_lock_irqsave(&tty->buf.lock, flags); while((thead = tty->buf.head) != NULL) { tty->buf.head = thead->next; tty_buffer_free(tty, thead); } tty->buf.tail = NULL; +} + +/** + * tty_buffer_flush - flush full tty buffers + * @tty: tty to flush + * + * flush all the buffers containing receive data. If the buffer is + * being processed by flush_to_ldisc then we defer the processing + * to that function + * + * Locking: none + */ + +static void tty_buffer_flush(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&tty->buf.lock, flags); + + /* If the data is being pushed to the tty layer then we can't + process it here. Instead set a flag and the flush_to_ldisc + path will process the flush request before it exits */ + if (test_bit(TTY_FLUSHING, &tty->flags)) { + set_bit(TTY_FLUSHPENDING, &tty->flags); + spin_unlock_irqrestore(&tty->buf.lock, flags); + wait_event(tty->read_wait, + test_bit(TTY_FLUSHPENDING, &tty->flags) == 0); + return; + } else + __tty_buffer_flush(tty); spin_unlock_irqrestore(&tty->buf.lock, flags); } @@ -3594,6 +3623,7 @@ static void flush_to_ldisc(struct work_struct *work) return; spin_lock_irqsave(&tty->buf.lock, flags); + set_bit(TTY_FLUSHING, &tty->flags); /* So we know a flush is running */ head = tty->buf.head; if (head != NULL) { tty->buf.head = NULL; @@ -3607,6 +3637,11 @@ static void flush_to_ldisc(struct work_struct *work) tty_buffer_free(tty, tbuf); continue; } + /* Ldisc or user is trying to flush the buffers + we are feeding to the ldisc, stop feeding the + line discipline as we want to empty the queue */ + if (test_bit(TTY_FLUSHPENDING, &tty->flags)) + break; if (!tty->receive_room) { schedule_delayed_work(&tty->buf.work, 1); break; @@ -3620,8 +3655,17 @@ static void flush_to_ldisc(struct work_struct *work) disc->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); } + /* Restore the queue head */ tty->buf.head = head; } + /* We may have a deferred request to flush the input buffer, + if so pull the chain under the lock and empty the queue */ + if (test_bit(TTY_FLUSHPENDING, &tty->flags)) { + __tty_buffer_flush(tty); + clear_bit(TTY_FLUSHPENDING, &tty->flags); + wake_up(&tty->read_wait); + } + clear_bit(TTY_FLUSHING, &tty->flags); spin_unlock_irqrestore(&tty->buf.lock, flags); tty_ldisc_deref(disc); diff --git a/include/linux/tty.h b/include/linux/tty.h index 691a1748d9d2..6570719eafdf 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -274,6 +274,8 @@ struct tty_struct { #define TTY_PTY_LOCK 16 /* pty private */ #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ +#define TTY_FLUSHING 19 /* Flushing to ldisc in progress */ +#define TTY_FLUSHPENDING 20 /* Queued buffer flush pending */ #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) -- cgit From eb9a9a56316f4fea98ee32873ccbf7098b7bd69b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 10 Aug 2007 13:01:07 -0700 Subject: hex_dump: add missing "const" qualifiers Add missing "const" qualifiers to the print_hex_dump_bytes() library routines. (akpm: rumoured to fix some compile warning somewhere) Signed-off-by: Alan Stern Cc: Artem Bityutskiy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- lib/hexdump.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index b4f5b81b4257..f592df74b3cf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -226,7 +226,7 @@ extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - void *buf, size_t len); + const void *buf, size_t len); #define hex_asc(x) "0123456789abcdef"[x] #ifdef DEBUG diff --git a/lib/hexdump.c b/lib/hexdump.c index 16f2e2935e87..bd5edaeaa80b 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -189,7 +189,7 @@ EXPORT_SYMBOL(print_hex_dump); * rowsize of 16, groupsize of 1, and ASCII output included. */ void print_hex_dump_bytes(const char *prefix_str, int prefix_type, - void *buf, size_t len) + const void *buf, size_t len) { print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, buf, len, 1); -- cgit From 844add7abca0d10e9733fc16119e53cb4c1987b4 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Fri, 10 Aug 2007 13:01:07 -0700 Subject: RCU: Remove prototype for nonexistent function synchronize_idle() synchronize_idle() sounds like an interesting function, but we don't actually have it, so don't prototype it. Introduced in commit 9b06e818985d139fd9e82c28297f7744e1b484e1, in 2005. Signed-off-by: Josh Triplett Acked-by: Paul E. McKenney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c6b7485eac7c..fe17d7d750c2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -281,7 +281,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head, extern void FASTCALL(call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head))); extern void synchronize_rcu(void); -void synchronize_idle(void); extern void rcu_barrier(void); #endif /* __KERNEL__ */ -- cgit From 6ddfca9548d8ecc26096a30667423ba919109533 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 10 Aug 2007 13:01:09 -0700 Subject: timer: remove clockevents_unregister_notifier I find a function(clockevents_unregister_notifier) which is not called by anything in tree. Signed-off-by: Miao Xie Acked-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clockchips.h | 1 - kernel/time/clockevents.c | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index e0bd46eb2414..def5a659b8a5 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -123,7 +123,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old, extern void clockevents_set_mode(struct clock_event_device *dev, enum clock_event_mode mode); extern int clockevents_register_notifier(struct notifier_block *nb); -extern void clockevents_unregister_notifier(struct notifier_block *nb); extern int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, ktime_t now); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 2ad1c37b8dfe..41dd3105ce7f 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb) return ret; } -/** - * clockevents_unregister_notifier - unregister a clock events change listener - */ -void clockevents_unregister_notifier(struct notifier_block *nb) -{ - spin_lock(&clockevents_lock); - raw_notifier_chain_unregister(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); -} - /* * Notify about a clock event change. Called with clockevents_lock * held. -- cgit From 3f3f7b74a7749c3a669ca146270c07568b548665 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 10 Aug 2007 22:31:01 +0200 Subject: x86_64: Don't mark __exitcall as __cold gcc currently doesn't support attributes on types, so we can't use it function pointers. This avoids some warnings on a gcc 4.3 build. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 1a4a283d19a9..74b1f43bf982 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -43,7 +43,7 @@ #define __init __attribute__ ((__section__ (".init.text"))) __cold #define __initdata __attribute__ ((__section__ (".init.data"))) #define __exitdata __attribute__ ((__section__(".exit.data"))) -#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold +#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) /* modpost check for section mismatches during the kernel build. * A section mismatch happens when there are references from a -- cgit From 6707de00fdec3e3225192fe3dcd21323a8936b1f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 12 Aug 2007 18:08:19 +0200 Subject: sched: make global code static This patch makes the following needlessly global code static: - arch_reinit_sched_domains() - struct attr_sched_mc_power_savings - struct attr_sched_smt_power_savings Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/cpu.h | 2 -- kernel/sched.c | 46 +++++++++++++++++++++++----------------------- 2 files changed, 23 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c2236bbff412..1d5ded0836ee 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -41,8 +41,6 @@ extern void cpu_remove_sysdev_attr(struct sysdev_attribute *attr); extern int cpu_add_sysdev_attr_group(struct attribute_group *attrs); extern void cpu_remove_sysdev_attr_group(struct attribute_group *attrs); -extern struct sysdev_attribute attr_sched_mc_power_savings; -extern struct sysdev_attribute attr_sched_smt_power_savings; extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/sched.c b/kernel/sched.c index 6247e4a8350f..c02659f1bd09 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6328,7 +6328,7 @@ int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) } #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) -int arch_reinit_sched_domains(void) +static int arch_reinit_sched_domains(void) { int err; @@ -6357,24 +6357,6 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) return ret ? ret : count; } -int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) -{ - int err = 0; - -#ifdef CONFIG_SCHED_SMT - if (smt_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_smt_power_savings.attr); -#endif -#ifdef CONFIG_SCHED_MC - if (!err && mc_capable()) - err = sysfs_create_file(&cls->kset.kobj, - &attr_sched_mc_power_savings.attr); -#endif - return err; -} -#endif - #ifdef CONFIG_SCHED_MC static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) { @@ -6385,8 +6367,8 @@ static ssize_t sched_mc_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 0); } -SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, - sched_mc_power_savings_store); +static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, + sched_mc_power_savings_store); #endif #ifdef CONFIG_SCHED_SMT @@ -6399,8 +6381,26 @@ static ssize_t sched_smt_power_savings_store(struct sys_device *dev, { return sched_power_savings_store(buf, count, 1); } -SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, - sched_smt_power_savings_store); +static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, + sched_smt_power_savings_store); +#endif + +int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls) +{ + int err = 0; + +#ifdef CONFIG_SCHED_SMT + if (smt_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_smt_power_savings.attr); +#endif +#ifdef CONFIG_SCHED_MC + if (!err && mc_capable()) + err = sysfs_create_file(&cls->kset.kobj, + &attr_sched_mc_power_savings.attr); +#endif + return err; +} #endif /* -- cgit From 7f353bf29e162459f2f1e2ca25e41011fae65241 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 10 Aug 2007 15:47:58 -0700 Subject: [NET]: Share correct feature code between bridging and bonding http://bugzilla.kernel.org/show_bug.cgi?id=8797 shows that the bonding driver may produce bogus combinations of the checksum flags and SG/TSO. For example, if you bond devices with NETIF_F_HW_CSUM and NETIF_F_IP_CSUM you'll end up with a bonding device that has neither flag set. If both have TSO then this produces an illegal combination. The bridge device on the other hand has the correct code to deal with this. In fact, the same code can be used for both. So this patch moves that logic into net/core/dev.c and uses it for both bonding and bridging. In the process I've made small adjustments such as only setting GSO_ROBUST if at least one constituent device supports it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 34 +++++++++++++--------------------- include/linux/netdevice.h | 2 ++ net/bridge/br_device.c | 2 +- net/bridge/br_if.c | 36 ++++-------------------------------- net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 54 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 070b78d959cc..1afda3230def 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1202,43 +1202,35 @@ static int bond_sethwaddr(struct net_device *bond_dev, return 0; } -#define BOND_INTERSECT_FEATURES \ - (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO) +#define BOND_VLAN_FEATURES \ + (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ + NETIF_F_HW_VLAN_FILTER) /* * Compute the common dev->feature set available to all slaves. Some - * feature bits are managed elsewhere, so preserve feature bits set on - * master device that are not part of the examined set. + * feature bits are managed elsewhere, so preserve those feature bits + * on the master device. */ static int bond_compute_features(struct bonding *bond) { - unsigned long features = BOND_INTERSECT_FEATURES; struct slave *slave; struct net_device *bond_dev = bond->dev; + unsigned long features = bond_dev->features; unsigned short max_hard_header_len = ETH_HLEN; int i; + features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); + features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM; + bond_for_each_slave(bond, slave, i) { - features &= (slave->dev->features & BOND_INTERSECT_FEATURES); + features = netdev_compute_features(features, + slave->dev->features); if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; } - if ((features & NETIF_F_SG) && - !(features & NETIF_F_ALL_CSUM)) - features &= ~NETIF_F_SG; - - /* - * features will include NETIF_F_TSO (NETIF_F_UFO) iff all - * slave devices support NETIF_F_TSO (NETIF_F_UFO), which - * implies that all slaves also support scatter-gather - * (NETIF_F_SG), which implies that features also includes - * NETIF_F_SG. So no need to check whether we have an - * illegal combination of NETIF_F_{TSO,UFO} and - * !NETIF_F_SG - */ - - features |= (bond_dev->features & ~BOND_INTERSECT_FEATURES); + features |= (bond_dev->features & BOND_VLAN_FEATURES); bond_dev->features = features; bond_dev->hard_header_len = max_hard_header_len; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4a616d73cc25..e679b2751665 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1131,6 +1131,8 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern void linkwatch_run_queue(void); +extern int netdev_compute_features(unsigned long all, unsigned long one); + static inline int net_gso_ok(int features, int gso_type) { int feature = gso_type << NETIF_F_GSO_SHIFT; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 5e1892d8d874..0eded176ce99 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -179,5 +179,5 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b40dada002bf..749f0e8f541d 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -349,43 +349,15 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features, checksum; + unsigned long features; - checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0; - features = br->feature_mask & ~NETIF_F_ALL_CSUM; + features = br->feature_mask; list_for_each_entry(p, &br->port_list, list) { - unsigned long feature = p->dev->features; - - /* if device needs checksumming, downgrade to hw checksumming */ - if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) - checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) - checksum ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (checksum & NETIF_F_IPV6_CSUM && !(feature & NETIF_F_IPV6_CSUM)) - checksum &= ~NETIF_F_IPV6_CSUM; - - if (!(feature & NETIF_F_IP_CSUM)) - checksum = 0; - - if (feature & NETIF_F_GSO) - feature |= NETIF_F_GSO_SOFTWARE; - feature |= NETIF_F_GSO; - - features &= feature; + features = netdev_compute_features(features, p->dev->features); } - if (!(checksum & NETIF_F_ALL_CSUM)) - features &= ~NETIF_F_SG; - if (!(features & NETIF_F_SG)) - features &= ~NETIF_F_GSO_MASK; - - br->dev->features = features | checksum | NETIF_F_LLTX | - NETIF_F_GSO_ROBUST; + br->dev->features = features; } /* called with RTNL */ diff --git a/net/core/dev.c b/net/core/dev.c index 6cc8a70350ac..a76021c71207 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3993,6 +3993,45 @@ static int __init netdev_dma_register(void) static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ +/** + * netdev_compute_feature - compute conjunction of two feature sets + * @all: first feature set + * @one: second feature set + * + * Computes a new feature set after adding a device with feature set + * @one to the master device with current feature set @all. Returns + * the new feature set. + */ +int netdev_compute_features(unsigned long all, unsigned long one) +{ + /* if device needs checksumming, downgrade to hw checksumming */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; + + /* if device can't do all checksum, downgrade to ipv4/ipv6 */ + if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) + all ^= NETIF_F_HW_CSUM + | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + + if (one & NETIF_F_GSO) + one |= NETIF_F_GSO_SOFTWARE; + one |= NETIF_F_GSO; + + /* If even one device supports robust GSO, enable it for all. */ + if (one & NETIF_F_GSO_ROBUST) + all |= NETIF_F_GSO_ROBUST; + + all &= one | NETIF_F_LLTX; + + if (!(all & NETIF_F_ALL_CSUM)) + all &= ~NETIF_F_SG; + if (!(all & NETIF_F_SG)) + all &= ~NETIF_F_GSO_MASK; + + return all; +} +EXPORT_SYMBOL(netdev_compute_features); + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not -- cgit From 118142080a75fc1ce599c73b7894a71b4813828e Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 16 Aug 2007 16:27:15 +0200 Subject: Cross-compilation between e.g. i386 -> 64bit could break -> work around it Adrian Bunk: scripts/mod/file2alias.c is compiled with HOSTCC and ensures that kernel_ulong_t is correct, but it can't cope with different padding on different architectures. Signed-off-by: Thomas Renninger Signed-off-by: Tony Luck --- include/linux/mod_devicetable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 2ada8ee316b3..4dc5fa8be781 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -159,7 +159,8 @@ struct ap_device_id { #define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 -#define ACPI_ID_LEN 9 +#define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ + /* to workaround crosscompile issues */ struct acpi_device_id { __u8 id[ACPI_ID_LEN]; -- cgit From c9b0ee2c2af33c2ca722aa05bbcb604487134e4c Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 31 Jul 2007 12:42:22 -0300 Subject: V4L/DVB (5968): videodev2.h: remove superfluous FBUF GLOBAL_INV_ALPHA support There is no need for a global inverted alpha capability since all the application has to do is to pass '255-alpha' as the global alpha value. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index c66c8a3410b9..ae9b24c12f6a 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -617,7 +617,6 @@ struct v4l2_framebuffer #define V4L2_FBUF_CAP_LOCAL_ALPHA 0x0010 #define V4L2_FBUF_CAP_GLOBAL_ALPHA 0x0020 #define V4L2_FBUF_CAP_LOCAL_INV_ALPHA 0x0040 -#define V4L2_FBUF_CAP_GLOBAL_INV_ALPHA 0x0080 /* Flags for the 'flags' field. */ #define V4L2_FBUF_FLAG_PRIMARY 0x0001 #define V4L2_FBUF_FLAG_OVERLAY 0x0002 @@ -625,7 +624,6 @@ struct v4l2_framebuffer #define V4L2_FBUF_FLAG_LOCAL_ALPHA 0x0008 #define V4L2_FBUF_FLAG_GLOBAL_ALPHA 0x0010 #define V4L2_FBUF_FLAG_LOCAL_INV_ALPHA 0x0020 -#define V4L2_FBUF_FLAG_GLOBAL_INV_ALPHA 0x0040 struct v4l2_clip { -- cgit From 1116fae5fdfa80c6744a9b5d75fb3ef687a69b19 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 20 Aug 2007 22:42:55 +0200 Subject: ide: config_drive_for_dma() fixes * Add DMA blacklist checking (->ide_dma_on check probably can go now). * Add ->atapi_dma flag checking and remove no longer needed ns87415_ide_dma_check() from ns87415 host driver. * Remove now needless __ide_dma_check() wrapper and symbol export. * Check drive->autodma instead of hwif->autodma (there should be no changes in behavior as all users of config_drive_for_dma() set both ->autodma flags). Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 26 ++++++++++---------------- drivers/ide/pci/ns87415.c | 9 --------- include/linux/ide.h | 1 - 3 files changed, 10 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 5fe1d72ab451..865a2740a6e3 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -349,9 +349,17 @@ EXPORT_SYMBOL_GPL(ide_destroy_dmatable); static int config_drive_for_dma (ide_drive_t *drive) { + ide_hwif_t *hwif = drive->hwif; struct hd_driveid *id = drive->id; - if ((id->capability & 1) && drive->hwif->autodma) { + /* consult the list of known "bad" drives */ + if (__ide_dma_bad_drive(drive)) + return -1; + + if (drive->media != ide_disk && hwif->atapi_dma == 0) + return -1; + + if ((id->capability & 1) && drive->autodma) { /* * Enable DMA on any drive that has * UltraDMA (mode 0/1/2/3/4/5/6) enabled @@ -513,20 +521,6 @@ int __ide_dma_on (ide_drive_t *drive) EXPORT_SYMBOL(__ide_dma_on); -/** - * __ide_dma_check - check DMA setup - * @drive: drive to check - * - * Don't use - due for extermination - */ - -int __ide_dma_check (ide_drive_t *drive) -{ - return config_drive_for_dma(drive); -} - -EXPORT_SYMBOL(__ide_dma_check); - /** * ide_dma_setup - begin a DMA phase * @drive: target device @@ -1021,7 +1015,7 @@ void ide_setup_dma (ide_hwif_t *hwif, unsigned long dma_base, unsigned int num_p if (!hwif->dma_host_on) hwif->dma_host_on = &ide_dma_host_on; if (!hwif->ide_dma_check) - hwif->ide_dma_check = &__ide_dma_check; + hwif->ide_dma_check = &config_drive_for_dma; if (!hwif->dma_setup) hwif->dma_setup = &ide_dma_setup; if (!hwif->dma_exec_cmd) diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c index 09941f37d635..465c935fdf25 100644 --- a/drivers/ide/pci/ns87415.c +++ b/drivers/ide/pci/ns87415.c @@ -187,14 +187,6 @@ static int ns87415_ide_dma_setup(ide_drive_t *drive) return 1; } -static int ns87415_ide_dma_check (ide_drive_t *drive) -{ - if (drive->media != ide_disk) - return -1; - - return __ide_dma_check(drive); -} - static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) { struct pci_dev *dev = hwif->pci_dev; @@ -266,7 +258,6 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) outb(0x60, hwif->dma_status); hwif->dma_setup = &ns87415_ide_dma_setup; - hwif->ide_dma_check = &ns87415_ide_dma_check; hwif->ide_dma_end = &ns87415_ide_dma_end; if (!noautodma) diff --git a/include/linux/ide.h b/include/linux/ide.h index d71d0121b7f9..7e15e0870290 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1312,7 +1312,6 @@ void ide_dma_host_off(ide_drive_t *); void ide_dma_off_quietly(ide_drive_t *); void ide_dma_host_on(ide_drive_t *); extern int __ide_dma_on(ide_drive_t *); -extern int __ide_dma_check(ide_drive_t *); extern int ide_dma_setup(ide_drive_t *); extern void ide_dma_start(ide_drive_t *); extern int __ide_dma_end(ide_drive_t *); -- cgit From a5b7e70d787f528386eda025d3e38f545017f241 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Mon, 20 Aug 2007 22:42:56 +0200 Subject: ide: add cable detection for early UDMA66 devices (take 3) * Move ide_in_drive_list() from ide-dma.c to ide-iops.c. * Add ivb_list[] table for listening early UDMA66 devices which don't conform to ATA4 standard wrt cable detection (bit14 is zero, only bit13 is valid) and use only device side cable detection for them since host side cable detection may be unreliable. * Add model "QUANTUM FIREBALLlct10 05" with firwmare "A03.0900" to the list (from Craig's bugreport). v2: * Improve kernel message basing on suggestion from Sergei. v3: * Don't print kernel message when no device side cable detection is done, plus some minor fixes. (Noticed by Sergei) Thanks to Craig for testing this patch. Cc: Craig Block Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-dma.c | 19 ------------------- drivers/ide/ide-iops.c | 39 ++++++++++++++++++++++++++++++++++++--- include/linux/ide.h | 3 ++- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 865a2740a6e3..ff644a5e12cd 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -134,25 +134,6 @@ static const struct drive_list_entry drive_blacklist [] = { }; -/** - * ide_in_drive_list - look for drive in black/white list - * @id: drive identifier - * @drive_table: list to inspect - * - * Look for a drive in the blacklist and the whitelist tables - * Returns 1 if the drive is found in the table. - */ - -int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) -{ - for ( ; drive_table->id_model ; drive_table++) - if ((!strcmp(drive_table->id_model, id->model)) && - (!drive_table->id_firmware || - strstr(id->fw_rev, drive_table->id_firmware))) - return 1; - return 0; -} - /** * ide_dma_intr - IDE DMA interrupt handler * @drive: the drive the interrupt is for diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 92578b6832e9..fe2a69fed72b 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -565,6 +565,34 @@ int ide_wait_stat (ide_startstop_t *startstop, ide_drive_t *drive, u8 good, u8 b EXPORT_SYMBOL(ide_wait_stat); +/** + * ide_in_drive_list - look for drive in black/white list + * @id: drive identifier + * @drive_table: list to inspect + * + * Look for a drive in the blacklist and the whitelist tables + * Returns 1 if the drive is found in the table. + */ + +int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) +{ + for ( ; drive_table->id_model; drive_table++) + if ((!strcmp(drive_table->id_model, id->model)) && + (!drive_table->id_firmware || + strstr(id->fw_rev, drive_table->id_firmware))) + return 1; + return 0; +} + +/* + * Early UDMA66 devices don't set bit14 to 1, only bit13 is valid. + * We list them here and depend on the device side cable detection for them. + */ +static const struct drive_list_entry ivb_list[] = { + { "QUANTUM FIREBALLlct10 05" , "A03.0900" }, + { NULL , NULL } +}; + /* * All hosts that use the 80c ribbon must use! * The name is derived from upper byte of word 93 and the 80c ribbon. @@ -573,11 +601,16 @@ u8 eighty_ninty_three (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; struct hd_driveid *id = drive->id; + int ivb = ide_in_drive_list(id, ivb_list); if (hwif->cbl == ATA_CBL_PATA40_SHORT) return 1; - if (hwif->cbl != ATA_CBL_PATA80) + if (ivb) + printk(KERN_DEBUG "%s: skipping word 93 validity check\n", + drive->name); + + if (hwif->cbl != ATA_CBL_PATA80 && !ivb) goto no_80w; /* Check for SATA but only if we are ATA5 or higher */ @@ -587,11 +620,11 @@ u8 eighty_ninty_three (ide_drive_t *drive) /* * FIXME: * - change master/slave IDENTIFY order - * - force bit13 (80c cable present) check + * - force bit13 (80c cable present) check also for !ivb devices * (unless the slave device is pre-ATA3) */ #ifndef CONFIG_IDEDMA_IVB - if (id->hw_config & 0x4000) + if ((id->hw_config & 0x4000) || (ivb && (id->hw_config & 0x2000))) #else if (id->hw_config & 0x6000) #endif diff --git a/include/linux/ide.h b/include/linux/ide.h index 7e15e0870290..c792b4fd1588 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1285,13 +1285,14 @@ void ide_init_sg_cmd(ide_drive_t *, struct request *); #define BAD_DMA_DRIVE 0 #define GOOD_DMA_DRIVE 1 -#ifdef CONFIG_BLK_DEV_IDEDMA struct drive_list_entry { const char *id_model; const char *id_firmware; }; int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); + +#ifdef CONFIG_BLK_DEV_IDEDMA int __ide_dma_bad_drive(ide_drive_t *); int __ide_dma_good_drive(ide_drive_t *); u8 ide_max_dma_mode(ide_drive_t *); -- cgit From 15f6ddc7d9cf96f2ee88897c7164198ed6e45a77 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 21 Aug 2007 19:15:31 -0500 Subject: [POWERPC] Fix PCI Device ID for MPC8544/8533 processors The initial user manuals for MPC8544/8533 had some issues with properly documenting the device IDs for MPC8544/8533. These processors are almost identical and both show up on the reference boards. Fix up the quirks for PCIe support to handle MPC8533/E. Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 2 ++ include/linux/pci_ids.h | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 9fb0ce5c7176..114c90f8f560 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -251,6 +251,8 @@ DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568E, quirk_fsl_pcie_transpare DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8568, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567E, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8567, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8533E, quirk_fsl_pcie_transparent); +DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8533, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544E, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8544, quirk_fsl_pcie_transparent); DECLARE_PCI_FIXUP_EARLY(0x1957, PCI_DEVICE_ID_MPC8641, quirk_fsl_pcie_transparent); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 07fc57429b58..8938d59013c6 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2092,8 +2092,10 @@ #define PCI_DEVICE_ID_MPC8568 0x0021 #define PCI_DEVICE_ID_MPC8567E 0x0022 #define PCI_DEVICE_ID_MPC8567 0x0023 -#define PCI_DEVICE_ID_MPC8544E 0x0030 -#define PCI_DEVICE_ID_MPC8544 0x0031 +#define PCI_DEVICE_ID_MPC8533E 0x0030 +#define PCI_DEVICE_ID_MPC8533 0x0031 +#define PCI_DEVICE_ID_MPC8544E 0x0032 +#define PCI_DEVICE_ID_MPC8544 0x0033 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 -- cgit From ad4c2aa6354fad5316565b1cff57f80db0e04db8 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 22 Aug 2007 14:01:18 -0700 Subject: Serial 8250: handle saving the clear-on-read bits from the LSR and MSR Reading the LSR clears the break, parity, frame error, and overrun bits in the 8250 chip, but these are not being saved in all places that read the LSR. Same goes for the MSR delta bits. Save the LSR bits off whenever the lsr is read so they can be handled later in the receive routine. Save the MSR bits to be handled in the modem status routine. Also, clear the stored bits and clear the interrupt registers before enabling interrupts, to avoid handling old values of the stored bits in the interrupt routines. [akpm@linux-foundation.org: clean up pre-existing code] Signed-off-by: Corey Minyard Cc: Russell King Cc: Yinghai Lu Cc: Bjorn Helgaas Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/8250.c | 85 ++++++++++++++++++++++++++++++---------------- include/linux/serial_reg.h | 1 + 2 files changed, 57 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 301313002f6b..f94109cbb46e 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -129,7 +129,16 @@ struct uart_8250_port { unsigned char mcr; unsigned char mcr_mask; /* mask of user bits */ unsigned char mcr_force; /* mask of forced bits */ - unsigned char lsr_break_flag; + + /* + * Some bits in registers are cleared on a read, so they must + * be saved whenever the register is read but the bits will not + * be immediately processed. + */ +#define LSR_SAVE_FLAGS UART_LSR_BRK_ERROR_BITS + unsigned char lsr_saved_flags; +#define MSR_SAVE_FLAGS UART_MSR_ANY_DELTA + unsigned char msr_saved_flags; /* * We provide a per-port pm hook. @@ -1238,6 +1247,7 @@ static void serial8250_start_tx(struct uart_port *port) if (up->bugs & UART_BUG_TXEN) { unsigned char lsr, iir; lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; iir = serial_in(up, UART_IIR) & 0x0f; if ((up->port.type == PORT_RM9000) ? (lsr & UART_LSR_THRE && @@ -1290,18 +1300,10 @@ receive_chars(struct uart_8250_port *up, unsigned int *status) flag = TTY_NORMAL; up->port.icount.rx++; -#ifdef CONFIG_SERIAL_8250_CONSOLE - /* - * Recover the break flag from console xmit - */ - if (up->port.line == up->port.cons->index) { - lsr |= up->lsr_break_flag; - up->lsr_break_flag = 0; - } -#endif + lsr |= up->lsr_saved_flags; + up->lsr_saved_flags = 0; - if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE | - UART_LSR_FE | UART_LSR_OE))) { + if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { /* * For statistics only */ @@ -1392,6 +1394,8 @@ static unsigned int check_modem_status(struct uart_8250_port *up) { unsigned int status = serial_in(up, UART_MSR); + status |= up->msr_saved_flags; + up->msr_saved_flags = 0; if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && up->port.info != NULL) { if (status & UART_MSR_TERI) @@ -1591,7 +1595,8 @@ static void serial8250_timeout(unsigned long data) static void serial8250_backup_timeout(unsigned long data) { struct uart_8250_port *up = (struct uart_8250_port *)data; - unsigned int iir, ier = 0; + unsigned int iir, ier = 0, lsr; + unsigned long flags; /* * Must disable interrupts or else we risk racing with the interrupt @@ -1610,9 +1615,13 @@ static void serial8250_backup_timeout(unsigned long data) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ + spin_lock_irqsave(&up->port.lock, flags); + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + spin_unlock_irqrestore(&up->port.lock, flags); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.info->xmit) || up->port.x_char) && - (serial_in(up, UART_LSR) & UART_LSR_THRE)) { + (lsr & UART_LSR_THRE)) { iir &= ~(UART_IIR_ID | UART_IIR_NO_INT); iir |= UART_IIR_THRI; } @@ -1631,13 +1640,14 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) { struct uart_8250_port *up = (struct uart_8250_port *)port; unsigned long flags; - unsigned int ret; + unsigned int lsr; spin_lock_irqsave(&up->port.lock, flags); - ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0; + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; spin_unlock_irqrestore(&up->port.lock, flags); - return ret; + return lsr & UART_LSR_TEMT ? TIOCSER_TEMT : 0; } static unsigned int serial8250_get_mctrl(struct uart_port *port) @@ -1708,8 +1718,7 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) do { status = serial_in(up, UART_LSR); - if (status & UART_LSR_BI) - up->lsr_break_flag = UART_LSR_BI; + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; if (--tmout == 0) break; @@ -1718,8 +1727,12 @@ static inline void wait_for_xmitr(struct uart_8250_port *up, int bits) /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { - tmout = 1000000; - while (!(serial_in(up, UART_MSR) & UART_MSR_CTS) && --tmout) { + unsigned int tmout; + for (tmout = 1000000; tmout; tmout--) { + unsigned int msr = serial_in(up, UART_MSR); + up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; + if (msr & UART_MSR_CTS) + break; udelay(1); touch_nmi_watchdog(); } @@ -1888,6 +1901,18 @@ static int serial8250_startup(struct uart_port *port) spin_unlock_irqrestore(&up->port.lock, flags); + /* + * Clear the interrupt registers again for luck, and clear the + * saved flags to avoid getting false values from polling + * routines or the previous session. + */ + serial_inp(up, UART_LSR); + serial_inp(up, UART_RX); + serial_inp(up, UART_IIR); + serial_inp(up, UART_MSR); + up->lsr_saved_flags = 0; + up->msr_saved_flags = 0; + /* * Finally, enable interrupts. Note: Modem status interrupts * are set via set_termios(), which will be occurring imminently @@ -1906,14 +1931,6 @@ static int serial8250_startup(struct uart_port *port) (void) inb_p(icp); } - /* - * And clear the interrupt registers again for luck. - */ - (void) serial_inp(up, UART_LSR); - (void) serial_inp(up, UART_RX); - (void) serial_inp(up, UART_IIR); - (void) serial_inp(up, UART_MSR); - return 0; } @@ -2484,6 +2501,16 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count) wait_for_xmitr(up, BOTH_EMPTY); serial_out(up, UART_IER, ier); + /* + * The receive handling will happen properly because the + * receive ready bit will still be set; it is not cleared + * on read. However, modem control will not, we must + * call it if we have saved something in the saved flags + * while processing with interrupts off. + */ + if (up->msr_saved_flags) + check_modem_status(up); + if (locked) spin_unlock(&up->port.lock); local_irq_restore(flags); diff --git a/include/linux/serial_reg.h b/include/linux/serial_reg.h index 1c5ed7d92b0f..96c0d93fc2ca 100644 --- a/include/linux/serial_reg.h +++ b/include/linux/serial_reg.h @@ -118,6 +118,7 @@ #define UART_LSR_PE 0x04 /* Parity error indicator */ #define UART_LSR_OE 0x02 /* Overrun error indicator */ #define UART_LSR_DR 0x01 /* Receiver data ready */ +#define UART_LSR_BRK_ERROR_BITS 0x1E /* BI, FE, PE, OE bits */ #define UART_MSR 6 /* In: Modem Status Register */ #define UART_MSR_DCD 0x80 /* Data Carrier Detect */ -- cgit From 34b4e4aa3c470ce8fa2bd78abb1741b4b58baad7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 22 Aug 2007 14:01:28 -0700 Subject: fix NULL pointer dereference in __vm_enough_memory() The new exec code inserts an accounted vma into an mm struct which is not current->mm. The existing memory check code has a hard coded assumption that this does not happen as does the security code. As the correct mm is known we pass the mm to the security method and the helper function. A new security test is added for the case where we need to pass the mm and the existing one is modified to pass current->mm to avoid the need to change large amounts of code. (Thanks to Tobias for fixing rejects and testing) Signed-off-by: Alan Cox Cc: WU Fengguang Cc: James Morris Cc: Tobias Diedrich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- include/linux/security.h | 20 +++++++++++++++----- mm/mmap.c | 6 +++--- mm/nommu.c | 2 +- security/commoncap.c | 4 ++-- security/dummy.c | 4 ++-- security/selinux/hooks.c | 4 ++-- 7 files changed, 26 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 655094dc9440..1692dd6cb915 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1042,7 +1042,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, } /* mmap.c */ -extern int __vm_enough_memory(long pages, int cap_sys_admin); +extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); extern struct vm_area_struct *vma_merge(struct mm_struct *, diff --git a/include/linux/security.h b/include/linux/security.h index c11dc8aa0351..1a15526e9f67 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -54,7 +54,7 @@ extern int cap_inode_removexattr(struct dentry *dentry, char *name); extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags); extern void cap_task_reparent_to_init (struct task_struct *p); extern int cap_syslog (int type); -extern int cap_vm_enough_memory (long pages); +extern int cap_vm_enough_memory (struct mm_struct *mm, long pages); struct msghdr; struct sk_buff; @@ -1125,6 +1125,7 @@ struct request_sock; * Return 0 if permission is granted. * @vm_enough_memory: * Check permissions for allocating a new virtual mapping. + * @mm contains the mm struct it is being added to. * @pages contains the number of pages. * Return 0 if permission is granted. * @@ -1169,7 +1170,7 @@ struct security_operations { int (*quota_on) (struct dentry * dentry); int (*syslog) (int type); int (*settime) (struct timespec *ts, struct timezone *tz); - int (*vm_enough_memory) (long pages); + int (*vm_enough_memory) (struct mm_struct *mm, long pages); int (*bprm_alloc_security) (struct linux_binprm * bprm); void (*bprm_free_security) (struct linux_binprm * bprm); @@ -1469,10 +1470,14 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) return security_ops->settime(ts, tz); } - static inline int security_vm_enough_memory(long pages) { - return security_ops->vm_enough_memory(pages); + return security_ops->vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return security_ops->vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) @@ -2219,7 +2224,12 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { - return cap_vm_enough_memory(pages); + return cap_vm_enough_memory(current->mm, pages); +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +{ + return cap_vm_enough_memory(mm, pages); } static inline int security_bprm_alloc (struct linux_binprm *bprm) diff --git a/mm/mmap.c b/mm/mmap.c index b6537211b9cc..0d40e66c841b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -93,7 +93,7 @@ atomic_t vm_committed_space = ATOMIC_INIT(0); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; @@ -166,7 +166,7 @@ int __vm_enough_memory(long pages, int cap_sys_admin) /* Don't let a single process grow too big: leave 3% of the size of this process for other processes */ - allowed -= current->mm->total_vm / 32; + allowed -= mm->total_vm / 32; /* * cast `allowed' as a signed long because vm_committed_space @@ -2077,7 +2077,7 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) if (__vma && __vma->vm_start < vma->vm_end) return -ENOMEM; if ((vma->vm_flags & VM_ACCOUNT) && - security_vm_enough_memory(vma_pages(vma))) + security_vm_enough_memory_mm(mm, vma_pages(vma))) return -ENOMEM; vma_link(mm, vma, prev, rb_link, rb_parent); return 0; diff --git a/mm/nommu.c b/mm/nommu.c index 9eef6a398555..8ed0cb43118a 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1270,7 +1270,7 @@ EXPORT_SYMBOL(get_unmapped_area); * Note this is a helper function intended to be used by LSMs which * wish to use this logic. */ -int __vm_enough_memory(long pages, int cap_sys_admin) +int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { unsigned long free, allowed; diff --git a/security/commoncap.c b/security/commoncap.c index 338606eb7238..7520361663e8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -315,13 +315,13 @@ int cap_syslog (int type) return 0; } -int cap_vm_enough_memory(long pages) +int cap_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (cap_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } EXPORT_SYMBOL(cap_capable); diff --git a/security/dummy.c b/security/dummy.c index 19d813d5e083..853ec2292798 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -108,13 +108,13 @@ static int dummy_settime(struct timespec *ts, struct timezone *tz) return 0; } -static int dummy_vm_enough_memory(long pages) +static int dummy_vm_enough_memory(struct mm_struct *mm, long pages) { int cap_sys_admin = 0; if (dummy_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } static int dummy_bprm_alloc_security (struct linux_binprm *bprm) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6237933f7d82..d8bc4172819c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1584,7 +1584,7 @@ static int selinux_syslog(int type) * Do not audit the selinux permission check, as this is applied to all * processes that allocate mappings. */ -static int selinux_vm_enough_memory(long pages) +static int selinux_vm_enough_memory(struct mm_struct *mm, long pages) { int rc, cap_sys_admin = 0; struct task_security_struct *tsec = current->security; @@ -1600,7 +1600,7 @@ static int selinux_vm_enough_memory(long pages) if (rc == 0) cap_sys_admin = 1; - return __vm_enough_memory(pages, cap_sys_admin); + return __vm_enough_memory(mm, pages, cap_sys_admin); } /* binprm security operations */ -- cgit From 2301060e2b19aa4830060524ef66abdf32b26a26 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 22 Aug 2007 14:01:35 -0700 Subject: m68k/mac: Make mac_hid_mouse_emulate_buttons() declaration visible m68k/mac: Make mac_hid_mouse_emulate_buttons() declaration visible drivers/char/keyboard.c: In function 'kbd_keycode': drivers/char/keyboard.c:1142: error: implicit declaration of function 'mac_hid_mouse_emulate_buttons' The forward declaration of mac_hid_mouse_emulate_buttons() is not visible on m68k because it's hidden in the middle of a big #ifdef block. Move it to , correct the type of the second parameter, and include where needed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/keyboard.c | 4 ---- drivers/macintosh/mac_hid.c | 1 + include/linux/kbd_kern.h | 3 +++ 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c index 2ce0af1bd588..d95f316afb5a 100644 --- a/drivers/char/keyboard.c +++ b/drivers/char/keyboard.c @@ -1022,10 +1022,6 @@ static const unsigned short x86_keycodes[256] = 308,310,313,314,315,317,318,319,320,357,322,323,324,325,276,330, 332,340,365,342,343,344,345,346,356,270,341,368,369,370,371,372 }; -#ifdef CONFIG_MAC_EMUMOUSEBTN -extern int mac_hid_mouse_emulate_buttons(int, int, int); -#endif /* CONFIG_MAC_EMUMOUSEBTN */ - #ifdef CONFIG_SPARC static int sparc_l1_a_state = 0; extern void sun_do_break(void); diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 76c1e8e4a487..33dee3a773ed 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct input_dev *emumousebtn; diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h index 506ad20c18f8..8bdb16bfe5fb 100644 --- a/include/linux/kbd_kern.h +++ b/include/linux/kbd_kern.h @@ -161,4 +161,7 @@ static inline void con_schedule_flip(struct tty_struct *t) schedule_delayed_work(&t->buf.work, 0); } +/* mac_hid.c */ +extern int mac_hid_mouse_emulate_buttons(int, unsigned int, int); + #endif -- cgit From 0aa42632d3a0024700b25f57fd0fca56f6abad24 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Aug 2007 14:02:01 -0700 Subject: selection.h: add tty_struct forward declaration In file included from drivers/video/console/newport_con.c:16: include/linux/selection.h:16: warning: "struct tty_struct" declared inside parameter list include/linux/selection.h:16: warning: its scope is only this definition or declaration, which is probably not what you want Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/selection.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/selection.h b/include/linux/selection.h index ed3408b400f1..f9457861937c 100644 --- a/include/linux/selection.h +++ b/include/linux/selection.h @@ -10,6 +10,8 @@ #include #include +struct tty_struct; + extern struct vc_data *sel_cons; extern void clear_selection(void); -- cgit From b377fd3982ad957c796758a90e2988401a884241 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 22 Aug 2007 14:02:05 -0700 Subject: Apply memory policies to top two highest zones when highest zone is ZONE_MOVABLE The NUMA layer only supports NUMA policies for the highest zone. When ZONE_MOVABLE is configured with kernelcore=, the the highest zone becomes ZONE_MOVABLE. The result is that policies are only applied to allocations like anonymous pages and page cache allocated from ZONE_MOVABLE when the zone is used. This patch applies policies to the two highest zones when the highest zone is ZONE_MOVABLE. As ZONE_MOVABLE consists of pages from the highest "real" zone, it's always functionally equivalent. The patch has been tested on a variety of machines both NUMA and non-NUMA covering x86, x86_64 and ppc64. No abnormal results were seen in kernbench, tbench, dbench or hackbench. It passes regression tests from the numactl package with and without kernelcore= once numactl tests are patched to wait for vmstat counters to update. akpm: this is the nasty hack to fix NUMA mempolicies in the presence of ZONE_MOVABLE and kernelcore= in 2.6.23. Christoph says "For .24 either merge the mobility or get the other solution that Mel is working on. That solution would only use a single zonelist per node and filter on the fly. That may help performance and also help to make memory policies work better." Signed-off-by: Mel Gorman Acked-by: Lee Schermerhorn Tested-by: Lee Schermerhorn Acked-by: Christoph Lameter Cc: Andi Kleen Cc: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- include/linux/mmzone.h | 18 ++++++++++++++++++ mm/mempolicy.c | 2 +- mm/page_alloc.c | 13 +++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e147cf50529f..5bdd656e88cf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -166,7 +166,7 @@ extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { - if (k > policy_zone) + if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 3ea68cd3b61f..4e5627379b09 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -410,6 +410,24 @@ struct zonelist { #endif }; +#ifdef CONFIG_NUMA +/* + * Only custom zonelists like MPOL_BIND need to be filtered as part of + * policies. As described in the comment for struct zonelist_cache, these + * zonelists will not have a zlcache so zlcache_ptr will not be set. Use + * that to determine if the zonelists needs to be filtered or not. + */ +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return !zonelist->zlcache_ptr; +} +#else +static inline int alloc_should_filter_zonelist(struct zonelist *zonelist) +{ + return 0; +} +#endif /* CONFIG_NUMA */ + #ifdef CONFIG_ARCH_POPULATES_NODE_MAP struct node_active_region { unsigned long start_pfn; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 71b84b45154a..172abffeb2e3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -149,7 +149,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) lower zones etc. Avoid empty zones because the memory allocator doesn't like them. If you implement node hot removal you have to fix that. */ - k = policy_zone; + k = MAX_NR_ZONES - 1; while (1) { for_each_node_mask(nd, *nodes) { struct zone *z = &NODE_DATA(nd)->node_zones[k]; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3da85b81dabb..6427653023aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1157,6 +1157,7 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order, nodemask_t *allowednodes = NULL;/* zonelist_cache approximation */ int zlc_active = 0; /* set if using zonelist_cache */ int did_zlc_setup = 0; /* just call zlc_setup() one time */ + enum zone_type highest_zoneidx = -1; /* Gets set for policy zonelists */ zonelist_scan: /* @@ -1166,6 +1167,18 @@ zonelist_scan: z = zonelist->zones; do { + /* + * In NUMA, this could be a policy zonelist which contains + * zones that may not be allowed by the current gfp_mask. + * Check the zone is allowed by the current flags + */ + if (unlikely(alloc_should_filter_zonelist(zonelist))) { + if (highest_zoneidx == -1) + highest_zoneidx = gfp_zone(gfp_mask); + if (zone_idx(*z) > highest_zoneidx) + continue; + } + if (NUMA_BUILD && zlc_active && !zlc_zone_worth_trying(zonelist, z, allowednodes)) continue; -- cgit