From 889dfafe836ac9bb711f73d07a4c044cae177c0b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 4 Nov 2005 18:54:30 +0300 Subject: [PATCH] improve scheduler fairness a bit Do not transfer remaining time slice to another cpu on process exit. Signed-off-by: Oleg Nesterov Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 340dd238c16d..b4f4eb613537 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1468,7 +1468,7 @@ void fastcall sched_exit(task_t *p) * the sleep_avg of the parent as well. */ rq = task_rq_lock(p->parent, &flags); - if (p->first_time_slice) { + if (p->first_time_slice && task_cpu(p) == task_cpu(p->parent)) { p->parent->time_slice += p->time_slice; if (unlikely(p->parent->time_slice > task_timeslice(p))) p->parent->time_slice = task_timeslice(p); -- cgit From 7fd93cf30c531fd8b014e827e7a85fcfc010b2c6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 7 Nov 2005 00:57:59 -0800 Subject: [PATCH] posix-timers `unlikely' rejig !unlikely(expr) hurts my brain. likely(!expr) is more straightforward. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/posix-cpu-timers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 91a894264941..84af54c39e1b 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -497,7 +497,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(prof_ticks(t), left); if (cputime_eq(t->it_prof_expires, cputime_zero) || @@ -512,7 +512,7 @@ static void process_timer_rebalance(struct task_struct *p, left = cputime_div(cputime_sub(expires.cpu, val.cpu), nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ticks = cputime_add(virt_ticks(t), left); if (cputime_eq(t->it_virt_expires, cputime_zero) || @@ -527,7 +527,7 @@ static void process_timer_rebalance(struct task_struct *p, nsleft = expires.sched - val.sched; do_div(nsleft, nthreads); do { - if (!unlikely(t->flags & PF_EXITING)) { + if (likely(!(t->flags & PF_EXITING))) { ns = t->sched_time + nsleft; if (t->it_sched_expires == 0 || t->it_sched_expires > ns) { -- cgit From a4c4af7c8dc1eccdfb8c57e1684f08179b4407e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 7 Nov 2005 00:58:38 -0800 Subject: [PATCH] cpu hoptlug: avoid usage of smp_processor_id() in preemptible code Replace smp_processor_id() with any_online_cpu(cpu_online_map) in order to avoid lots of "BUG: using smp_processor_id() in preemptible [00000001] code:..." messages in case taking a cpu online fails. All the traces start at the last notifier_call_chain(...) in kernel/cpu.c. Since we hold the cpu_control semaphore it shouldn't be any problem to access cpu_online_map. The reason why cpu_up failed is simply that the cpu that was supposed to be taken online wasn't even there. That is because on s390 we never know when a new cpu comes and therefore cpu_possible_map consists of only ones and doesn't reflect reality. Signed-off-by: Heiko Carstens Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 3 ++- kernel/softirq.c | 3 ++- kernel/softlockup.c | 3 ++- kernel/workqueue.c | 2 +- 4 files changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index b4f4eb613537..013f1448006b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4680,7 +4680,8 @@ static int migration_call(struct notifier_block *nfb, unsigned long action, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind it from offline cpu so it can run. Fall thru. */ - kthread_bind(cpu_rq(cpu)->migration_thread,smp_processor_id()); + kthread_bind(cpu_rq(cpu)->migration_thread, + any_online_cpu(cpu_online_map)); kthread_stop(cpu_rq(cpu)->migration_thread); cpu_rq(cpu)->migration_thread = NULL; break; diff --git a/kernel/softirq.c b/kernel/softirq.c index f766b2fc48be..ad3295cdded5 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -470,7 +470,8 @@ static int __devinit cpu_callback(struct notifier_block *nfb, #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(ksoftirqd, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(ksoftirqd, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 75976209cea7..a2dcceb9437d 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -123,7 +123,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: /* Unbind so it can run. Fall thru. */ - kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); + kthread_bind(per_cpu(watchdog_task, hotcpu), + any_online_cpu(cpu_online_map)); case CPU_DEAD: p = per_cpu(watchdog_task, hotcpu); per_cpu(watchdog_task, hotcpu) = NULL; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cee222231bc..42df83d7fad2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -524,7 +524,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, list_for_each_entry(wq, &workqueues, list) { /* Unbind so it can run. */ kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread, - smp_processor_id()); + any_online_cpu(cpu_online_map)); cleanup_workqueue_thread(wq, hotcpu); } break; -- cgit From dc19d507b17135069d9c5d6093d4458dc60e1861 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 7 Nov 2005 00:58:40 -0800 Subject: [PATCH] swsusp cleanups This cleans spaces between * and pointer up, and adds "int" in "unsigned int". Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/snapshot.c | 19 +++++++++---------- kernel/power/swsusp.c | 36 ++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 42a628704398..723f5179883e 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -168,9 +168,8 @@ static unsigned count_data_pages(void) { struct zone *zone; unsigned long zone_pfn; - unsigned n; + unsigned int n = 0; - n = 0; for_each_zone (zone) { if (is_highmem(zone)) continue; @@ -250,10 +249,10 @@ static inline void fill_pb_page(struct pbe *pbpage) * of memory pages allocated with alloc_pagedir() */ -void create_pbe_list(struct pbe *pblist, unsigned nr_pages) +void create_pbe_list(struct pbe *pblist, unsigned int nr_pages) { struct pbe *pbpage, *p; - unsigned num = PBES_PER_PAGE; + unsigned int num = PBES_PER_PAGE; for_each_pb_page (pbpage, pblist) { if (num >= nr_pages) @@ -293,9 +292,9 @@ static void *alloc_image_page(void) * On each page we set up a list of struct_pbe elements. */ -struct pbe *alloc_pagedir(unsigned nr_pages) +struct pbe *alloc_pagedir(unsigned int nr_pages) { - unsigned num; + unsigned int num; struct pbe *pblist, *pbe; if (!nr_pages) @@ -329,7 +328,7 @@ void swsusp_free(void) for_each_zone(zone) { for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) if (pfn_valid(zone_pfn + zone->zone_start_pfn)) { - struct page * page; + struct page *page; page = pfn_to_page(zone_pfn + zone->zone_start_pfn); if (PageNosave(page) && PageNosaveFree(page)) { ClearPageNosave(page); @@ -348,7 +347,7 @@ void swsusp_free(void) * free pages. */ -static int enough_free_mem(unsigned nr_pages) +static int enough_free_mem(unsigned int nr_pages) { pr_debug("swsusp: available memory: %u pages\n", nr_free_pages()); return nr_free_pages() > (nr_pages + PAGES_FOR_IO + @@ -356,7 +355,7 @@ static int enough_free_mem(unsigned nr_pages) } -static struct pbe *swsusp_alloc(unsigned nr_pages) +static struct pbe *swsusp_alloc(unsigned int nr_pages) { struct pbe *pblist, *p; @@ -380,7 +379,7 @@ static struct pbe *swsusp_alloc(unsigned nr_pages) asmlinkage int swsusp_save(void) { - unsigned nr_pages; + unsigned int nr_pages; pr_debug("swsusp: critical section: \n"); if (save_highmem()) { diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index 12db1d2ad61f..a1300717dc3f 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -122,8 +122,8 @@ static struct swsusp_info swsusp_info; static unsigned short swapfile_used[MAX_SWAPFILES]; static unsigned short root_swap; -static int write_page(unsigned long addr, swp_entry_t * loc); -static int bio_read_page(pgoff_t page_off, void * page); +static int write_page(unsigned long addr, swp_entry_t *loc); +static int bio_read_page(pgoff_t page_off, void *page); static u8 key_iv[MAXKEY+MAXIV]; @@ -355,7 +355,7 @@ static void lock_swapdevices(void) * This is a partial improvement, since we will at least return other * errors, though we need to eventually fix the damn code. */ -static int write_page(unsigned long addr, swp_entry_t * loc) +static int write_page(unsigned long addr, swp_entry_t *loc) { swp_entry_t entry; int error = 0; @@ -383,7 +383,7 @@ static int write_page(unsigned long addr, swp_entry_t * loc) static void data_free(void) { swp_entry_t entry; - struct pbe * p; + struct pbe *p; for_each_pbe(p, pagedir_nosave) { entry = p->swap_address; @@ -492,8 +492,8 @@ static void free_pagedir_entries(void) static int write_pagedir(void) { int error = 0; - unsigned n = 0; - struct pbe * pbe; + unsigned int n = 0; + struct pbe *pbe; printk( "Writing pagedir..."); for_each_pb_page (pbe, pagedir_nosave) { @@ -543,7 +543,7 @@ static int write_suspend_image(void) * We should only consider resume_device. */ -int enough_swap(unsigned nr_pages) +int enough_swap(unsigned int nr_pages) { struct sysinfo i; @@ -694,7 +694,7 @@ static int check_pagedir(struct pbe *pblist) * restore from the loaded pages later. We relocate them here. */ -static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) +static struct pbe *swsusp_pagedir_relocate(struct pbe *pblist) { struct zone *zone; unsigned long zone_pfn; @@ -770,7 +770,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) static atomic_t io_done = ATOMIC_INIT(0); -static int end_io(struct bio * bio, unsigned int num, int err) +static int end_io(struct bio *bio, unsigned int num, int err) { if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) panic("I/O error reading memory image"); @@ -778,7 +778,7 @@ static int end_io(struct bio * bio, unsigned int num, int err) return 0; } -static struct block_device * resume_bdev; +static struct block_device *resume_bdev; /** * submit - submit BIO request. @@ -791,10 +791,10 @@ static struct block_device * resume_bdev; * Then submit it and wait. */ -static int submit(int rw, pgoff_t page_off, void * page) +static int submit(int rw, pgoff_t page_off, void *page) { int error = 0; - struct bio * bio; + struct bio *bio; bio = bio_alloc(GFP_ATOMIC, 1); if (!bio) @@ -823,12 +823,12 @@ static int submit(int rw, pgoff_t page_off, void * page) return error; } -static int bio_read_page(pgoff_t page_off, void * page) +static int bio_read_page(pgoff_t page_off, void *page) { return submit(READ, page_off, page); } -static int bio_write_page(pgoff_t page_off, void * page) +static int bio_write_page(pgoff_t page_off, void *page) { return submit(WRITE, page_off, page); } @@ -838,7 +838,7 @@ static int bio_write_page(pgoff_t page_off, void * page) * I really don't think that it's foolproof but more than nothing.. */ -static const char * sanity_check(void) +static const char *sanity_check(void) { dump_info(); if (swsusp_info.version_code != LINUX_VERSION_CODE) @@ -864,7 +864,7 @@ static const char * sanity_check(void) static int check_header(void) { - const char * reason = NULL; + const char *reason = NULL; int error; if ((error = bio_read_page(swp_offset(swsusp_header.swsusp_info), &swsusp_info))) @@ -912,7 +912,7 @@ static int check_sig(void) static int data_read(struct pbe *pblist) { - struct pbe * p; + struct pbe *p; int error = 0; int i = 0; int mod = swsusp_info.image_pages / 100; @@ -950,7 +950,7 @@ static int data_read(struct pbe *pblist) static int read_pagedir(struct pbe *pblist) { struct pbe *pbpage, *p; - unsigned i = 0; + unsigned int i = 0; int error; if (!pblist) -- cgit From 47b90ffe5c10ab9b5cfd14087b28b13109673ee5 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 7 Nov 2005 00:58:41 -0800 Subject: [PATCH] swsusp: remove unused variable Remove unused variable, and make code less evil that way. Fix whitespace around for-loop-like macro. Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/swsusp.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index a1300717dc3f..e1ab28b9b217 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -85,18 +85,11 @@ unsigned int nr_copy_pages __nosavedata = 0; /* Suspend pagedir is allocated before final copy, therefore it must be freed after resume - Warning: this is evil. There are actually two pagedirs at time of - resume. One is "pagedir_save", which is empty frame allocated at - time of suspend, that must be freed. Second is "pagedir_nosave", - allocated at time of resume, that travels through memory not to - collide with anything. - Warning: this is even more evil than it seems. Pagedirs this file talks about are completely different from page directories used by MMU hardware. */ suspend_pagedir_t *pagedir_nosave __nosavedata = NULL; -suspend_pagedir_t *pagedir_save; #define SWSUSP_SIG "S1SUSPEND" @@ -385,7 +378,7 @@ static void data_free(void) swp_entry_t entry; struct pbe *p; - for_each_pbe(p, pagedir_nosave) { + for_each_pbe (p, pagedir_nosave) { entry = p->swap_address; if (entry.val) swap_free(entry); -- cgit From 9f46080c41d5f3f7c00b4e169ba4b0b2865258bf Mon Sep 17 00:00:00 2001 From: Matt Helsley Date: Mon, 7 Nov 2005 00:59:16 -0800 Subject: [PATCH] Process Events Connector This patch adds a connector that reports fork, exec, id change, and exit events for all processes to userspace. It replaces the fork_advisor patch that ELSA is currently using. Applications that may find these events useful include accounting/auditing (e.g. ELSA), system activity monitoring (e.g. top), security, and resource management (e.g. CKRM). Signed-off-by: Matt Helsley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/connector/Kconfig | 8 ++ drivers/connector/Makefile | 1 + drivers/connector/cn_proc.c | 222 ++++++++++++++++++++++++++++++++++++++++++++ fs/exec.c | 2 + include/linux/cn_proc.h | 127 +++++++++++++++++++++++++ include/linux/connector.h | 6 ++ kernel/exit.c | 2 + kernel/fork.c | 2 + kernel/sys.c | 9 ++ 9 files changed, 379 insertions(+) create mode 100644 drivers/connector/cn_proc.c create mode 100644 include/linux/cn_proc.h (limited to 'kernel') diff --git a/drivers/connector/Kconfig b/drivers/connector/Kconfig index 0bc2059c1e08..e0bdc0db9640 100644 --- a/drivers/connector/Kconfig +++ b/drivers/connector/Kconfig @@ -10,4 +10,12 @@ config CONNECTOR Connector support can also be built as a module. If so, the module will be called cn.ko. +config PROC_EVENTS + boolean "Report process events to userspace" + depends on CONNECTOR=y + default y + ---help--- + Provide a connector that reports process events to userspace. Send + events such as fork, exec, id change (uid, gid, suid, etc), and exit. + endmenu diff --git a/drivers/connector/Makefile b/drivers/connector/Makefile index 12ca79e8234d..1f255e46e916 100644 --- a/drivers/connector/Makefile +++ b/drivers/connector/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_CONNECTOR) += cn.o +obj-$(CONFIG_PROC_EVENTS) += cn_proc.o cn-y += cn_queue.o connector.o diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c new file mode 100644 index 000000000000..fcdf0fff13a6 --- /dev/null +++ b/drivers/connector/cn_proc.c @@ -0,0 +1,222 @@ +/* + * cn_proc.c - process events connector + * + * Copyright (C) Matt Helsley, IBM Corp. 2005 + * Based on cn_fork.c by Guillaume Thouvenin + * Original copyright notice follows: + * Copyright (C) 2005 BULL SA. + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include + +#include + +#define CN_PROC_MSG_SIZE (sizeof(struct cn_msg) + sizeof(struct proc_event)) + +static atomic_t proc_event_num_listeners = ATOMIC_INIT(0); +static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; + +/* proc_counts is used as the sequence number of the netlink message */ +static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; + +static inline void get_seq(__u32 *ts, int *cpu) +{ + *ts = get_cpu_var(proc_event_counts)++; + *cpu = smp_processor_id(); + put_cpu_var(proc_counts); +} + +void proc_fork_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg*)buffer; + ev = (struct proc_event*)msg->data; + get_seq(&msg->seq, &ev->cpu); + ev->what = PROC_EVENT_FORK; + ev->event_data.fork.parent_pid = task->real_parent->pid; + ev->event_data.fork.parent_tgid = task->real_parent->tgid; + ev->event_data.fork.child_pid = task->pid; + ev->event_data.fork.child_tgid = task->tgid; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + /* If cn_netlink_send() failed, the data is not sent */ + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + +void proc_exec_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg*)buffer; + ev = (struct proc_event*)msg->data; + get_seq(&msg->seq, &ev->cpu); + ev->what = PROC_EVENT_EXEC; + ev->event_data.exec.process_pid = task->pid; + ev->event_data.exec.process_tgid = task->tgid; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + +void proc_id_connector(struct task_struct *task, int which_id) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg*)buffer; + ev = (struct proc_event*)msg->data; + ev->what = which_id; + ev->event_data.id.process_pid = task->pid; + ev->event_data.id.process_tgid = task->tgid; + if (which_id == PROC_EVENT_UID) { + ev->event_data.id.r.ruid = task->uid; + ev->event_data.id.e.euid = task->euid; + } else if (which_id == PROC_EVENT_GID) { + ev->event_data.id.r.rgid = task->gid; + ev->event_data.id.e.egid = task->egid; + } else + return; + get_seq(&msg->seq, &ev->cpu); + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + +void proc_exit_connector(struct task_struct *task) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg*)buffer; + ev = (struct proc_event*)msg->data; + get_seq(&msg->seq, &ev->cpu); + ev->what = PROC_EVENT_EXIT; + ev->event_data.exit.process_pid = task->pid; + ev->event_data.exit.process_tgid = task->tgid; + ev->event_data.exit.exit_code = task->exit_code; + ev->event_data.exit.exit_signal = task->exit_signal; + + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = 0; /* not used */ + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + +/* + * Send an acknowledgement message to userspace + * + * Use 0 for success, EFOO otherwise. + * Note: this is the negative of conventional kernel error + * values because it's not being returned via syscall return + * mechanisms. + */ +static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) +{ + struct cn_msg *msg; + struct proc_event *ev; + __u8 buffer[CN_PROC_MSG_SIZE]; + + if (atomic_read(&proc_event_num_listeners) < 1) + return; + + msg = (struct cn_msg*)buffer; + ev = (struct proc_event*)msg->data; + msg->seq = rcvd_seq; + ev->cpu = -1; + ev->what = PROC_EVENT_NONE; + ev->event_data.ack.err = err; + memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); + msg->ack = rcvd_ack + 1; + msg->len = sizeof(*ev); + cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL); +} + +/** + * cn_proc_mcast_ctl + * @data: message sent from userspace via the connector + */ +static void cn_proc_mcast_ctl(void *data) +{ + struct cn_msg *msg = data; + enum proc_cn_mcast_op *mc_op = NULL; + int err = 0; + + if (msg->len != sizeof(*mc_op)) + return; + + mc_op = (enum proc_cn_mcast_op*)msg->data; + switch (*mc_op) { + case PROC_CN_MCAST_LISTEN: + atomic_inc(&proc_event_num_listeners); + break; + case PROC_CN_MCAST_IGNORE: + atomic_dec(&proc_event_num_listeners); + break; + default: + err = EINVAL; + break; + } + cn_proc_ack(err, msg->seq, msg->ack); +} + +/* + * cn_proc_init - initialization entry point + * + * Adds the connector callback to the connector driver. + */ +static int __init cn_proc_init(void) +{ + int err; + + if ((err = cn_add_callback(&cn_proc_event_id, "cn_proc", + &cn_proc_mcast_ctl))) { + printk(KERN_WARNING "cn_proc failed to register\n"); + return err; + } + return 0; +} + +module_init(cn_proc_init); diff --git a/fs/exec.c b/fs/exec.c index 10d493fea7ce..ce76b33f25ac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -1096,6 +1097,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) fput(bprm->file); bprm->file = NULL; current->did_exec = 1; + proc_exec_connector(current); return retval; } read_lock(&binfmt_lock); diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h new file mode 100644 index 000000000000..70ab56317380 --- /dev/null +++ b/include/linux/cn_proc.h @@ -0,0 +1,127 @@ +/* + * cn_proc.h - process events connector + * + * Copyright (C) Matt Helsley, IBM Corp. 2005 + * Based on cn_fork.h by Nguyen Anh Quynh and Guillaume Thouvenin + * Original copyright notice follows: + * Copyright (C) 2005 Nguyen Anh Quynh + * Copyright (C) 2005 Guillaume Thouvenin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef CN_PROC_H +#define CN_PROC_H + +#include +#include + +/* + * Userspace sends this enum to register with the kernel that it is listening + * for events on the connector. + */ +enum proc_cn_mcast_op { + PROC_CN_MCAST_LISTEN = 1, + PROC_CN_MCAST_IGNORE = 2 +}; + +/* + * From the user's point of view, the process + * ID is the thread group ID and thread ID is the internal + * kernel "pid". So, fields are assigned as follow: + * + * In user space - In kernel space + * + * parent process ID = parent->tgid + * parent thread ID = parent->pid + * child process ID = child->tgid + * child thread ID = child->pid + */ + +struct proc_event { + enum what { + /* Use successive bits so the enums can be used to record + * sets of events as well + */ + PROC_EVENT_NONE = 0x00000000, + PROC_EVENT_FORK = 0x00000001, + PROC_EVENT_EXEC = 0x00000002, + PROC_EVENT_UID = 0x00000004, + PROC_EVENT_GID = 0x00000040, + /* "next" should be 0x00000400 */ + /* "last" is the last process event: exit */ + PROC_EVENT_EXIT = 0x80000000 + } what; + __u32 cpu; + union { /* must be last field of proc_event struct */ + struct { + __u32 err; + } ack; + + struct fork_proc_event { + pid_t parent_pid; + pid_t parent_tgid; + pid_t child_pid; + pid_t child_tgid; + } fork; + + struct exec_proc_event { + pid_t process_pid; + pid_t process_tgid; + } exec; + + struct id_proc_event { + pid_t process_pid; + pid_t process_tgid; + union { + uid_t ruid; /* current->uid */ + gid_t rgid; /* current->gid */ + } r; + union { + uid_t euid; + gid_t egid; + } e; + } id; + + struct exit_proc_event { + pid_t process_pid; + pid_t process_tgid; + __u32 exit_code, exit_signal; + } exit; + } event_data; +}; + +#ifdef __KERNEL__ +#ifdef CONFIG_PROC_EVENTS +void proc_fork_connector(struct task_struct *task); +void proc_exec_connector(struct task_struct *task); +void proc_id_connector(struct task_struct *task, int which_id); +void proc_exit_connector(struct task_struct *task); +#else +static inline void proc_fork_connector(struct task_struct *task) +{} + +static inline void proc_exec_connector(struct task_struct *task) +{} + +static inline void proc_id_connector(struct task_struct *task, + int which_id) +{} + +static inline void proc_exit_connector(struct task_struct *task) +{} +#endif /* CONFIG_PROC_EVENTS */ +#endif /* __KERNEL__ */ +#endif /* CN_PROC_H */ diff --git a/include/linux/connector.h b/include/linux/connector.h index 95952cc1f525..c5769c6585f4 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -27,6 +27,12 @@ #define CN_IDX_CONNECTOR 0xffffffff #define CN_VAL_CONNECTOR 0xffffffff +/* + * Process Events connector unique ids -- used for message routing + */ +#define CN_IDX_PROC 0x1 +#define CN_VAL_PROC 0x1 + #define CN_NETLINK_USERS 1 /* diff --git a/kernel/exit.c b/kernel/exit.c index 537394b25e8d..452a1d116178 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -863,6 +864,7 @@ fastcall NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); tsk->exit_code = code; + proc_exit_connector(tsk); exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); diff --git a/kernel/fork.c b/kernel/fork.c index 8a069612eac3..efac2c58ec7d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1143,6 +1144,7 @@ static task_t *copy_process(unsigned long clone_flags, __get_cpu_var(process_counts)++; } + proc_fork_connector(p); if (!current->signal->tty && p->signal->tty) p->signal->tty = NULL; diff --git a/kernel/sys.c b/kernel/sys.c index 2fa1ed18123c..1e1f41b3fdf6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -623,6 +624,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) current->egid = new_egid; current->gid = new_rgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -662,6 +664,7 @@ asmlinkage long sys_setgid(gid_t gid) return -EPERM; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -751,6 +754,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) current->fsuid = current->euid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RE); } @@ -798,6 +802,7 @@ asmlinkage long sys_setuid(uid_t uid) current->suid = new_suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_ID); } @@ -846,6 +851,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) current->suid = suid; key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); return security_task_post_setuid(old_ruid, old_euid, old_suid, LSM_SETID_RES); } @@ -898,6 +904,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) current->sgid = sgid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); return 0; } @@ -940,6 +947,7 @@ asmlinkage long sys_setfsuid(uid_t uid) } key_fsuid_changed(current); + proc_id_connector(current, PROC_EVENT_UID); security_task_post_setuid(old_fsuid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS); @@ -968,6 +976,7 @@ asmlinkage long sys_setfsgid(gid_t gid) } current->fsgid = gid; key_fsgid_changed(current); + proc_id_connector(current, PROC_EVENT_GID); } return old_fsgid; } -- cgit From d55b5fdaf40846221d543937b786956e27837fda Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 7 Nov 2005 00:59:31 -0800 Subject: [PATCH] aio: remove aio_max_nr accounting race AIO was adding a new context's max requests to the global total before testing if that resulting total was over the global limit. This let innocent tasks get their new limit tested along with a racing guilty task that was crossing the limit. This serializes the _nr accounting with a spinlock It also switches to using unsigned long for the global totals. Individual contexts are still limited to an unsigned int's worth of requests by the syscall interface. The problem and fix were verified with a simple program that spun creating and destroying a context while holding on to another long lived context. Before the patch a task creating a tiny context could get a spurious EAGAIN if it raced with a task creating a very large context that overran the limit. Signed-off-by: Zach Brown Cc: Benjamin LaHaise Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 31 +++++++++++++++++++++---------- include/linux/aio.h | 5 +++-- kernel/sysctl.c | 4 ++-- 3 files changed, 26 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/fs/aio.c b/fs/aio.c index edfca5b75535..20bb919eb195 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -42,8 +42,9 @@ #endif /*------ sysctl variables----*/ -atomic_t aio_nr = ATOMIC_INIT(0); /* current system wide number of aio requests */ -unsigned aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ +static DEFINE_SPINLOCK(aio_nr_lock); +unsigned long aio_nr; /* current system wide number of aio requests */ +unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ /*----end sysctl variables---*/ static kmem_cache_t *kiocb_cachep; @@ -208,7 +209,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-EINVAL); } - if (nr_events > aio_max_nr) + if ((unsigned long)nr_events > aio_max_nr) return ERR_PTR(-EAGAIN); ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL); @@ -233,8 +234,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) goto out_freectx; /* limit the number of system wide aios */ - atomic_add(ctx->max_reqs, &aio_nr); /* undone by __put_ioctx */ - if (unlikely(atomic_read(&aio_nr) > aio_max_nr)) + spin_lock(&aio_nr_lock); + if (aio_nr + ctx->max_reqs > aio_max_nr || + aio_nr + ctx->max_reqs < aio_nr) + ctx->max_reqs = 0; + else + aio_nr += ctx->max_reqs; + spin_unlock(&aio_nr_lock); + if (ctx->max_reqs == 0) goto out_cleanup; /* now link into global list. kludge. FIXME */ @@ -248,8 +255,6 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ctx; out_cleanup: - atomic_sub(ctx->max_reqs, &aio_nr); - ctx->max_reqs = 0; /* prevent __put_ioctx from sub'ing aio_nr */ __put_ioctx(ctx); return ERR_PTR(-EAGAIN); @@ -374,7 +379,12 @@ void fastcall __put_ioctx(struct kioctx *ctx) pr_debug("__put_ioctx: freeing %p\n", ctx); kmem_cache_free(kioctx_cachep, ctx); - atomic_sub(nr_events, &aio_nr); + if (nr_events) { + spin_lock(&aio_nr_lock); + BUG_ON(aio_nr - nr_events > aio_nr); + aio_nr -= nr_events; + spin_unlock(&aio_nr_lock); + } } /* aio_get_req @@ -1258,8 +1268,9 @@ asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) goto out; ret = -EINVAL; - if (unlikely(ctx || (int)nr_events <= 0)) { - pr_debug("EINVAL: io_setup: ctx or nr_events > max\n"); + if (unlikely(ctx || nr_events == 0)) { + pr_debug("EINVAL: io_setup: ctx %lu nr_events %u\n", + ctx, nr_events); goto out; } diff --git a/include/linux/aio.h b/include/linux/aio.h index 0decf66117c1..403d71dcb7c8 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -183,6 +183,7 @@ struct kioctx { struct list_head active_reqs; /* used for cancellation */ struct list_head run_list; /* used for kicked reqs */ + /* sys_io_setup currently limits this to an unsigned int */ unsigned max_reqs; struct aio_ring_info ring_info; @@ -234,7 +235,7 @@ static inline struct kiocb *list_kiocb(struct list_head *h) } /* for sysctl: */ -extern atomic_t aio_nr; -extern unsigned aio_max_nr; +extern unsigned long aio_nr; +extern unsigned long aio_max_nr; #endif /* __LINUX__AIO_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8e56e2495542..e1351200ce85 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -952,7 +952,7 @@ static ctl_table fs_table[] = { .data = &aio_nr, .maxlen = sizeof(aio_nr), .mode = 0444, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, { .ctl_name = FS_AIO_MAX_NR, @@ -960,7 +960,7 @@ static ctl_table fs_table[] = { .data = &aio_max_nr, .maxlen = sizeof(aio_max_nr), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_doulongvec_minmax, }, #ifdef CONFIG_INOTIFY { -- cgit From 796f8d9b98fc92a5e9aaea8cf932957850332f51 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Mon, 7 Nov 2005 00:59:33 -0800 Subject: [PATCH] FUTEX_WAKE_OP: enhanced error handling The code for FUTEX_WAKE_OP calls an arch callback, futex_atomic_op_inuser(). That callback can return an error code, but currently the caller assumes any error is EFAULT, and will try various things to resolve the fault before eventually giving up with EFAULT (regardless of the original error code). This is not a theoretical case - arch callbacks currently return -ENOSYS if the opcode they are given is bogus. This patch alters the code to detect non-EFAULT errors and return them directly to the user. Of course, whether -ENOSYS is the correct return value for the bogus opcode case, or whether EINVAL would be more appropriate is another question. Signed-off-by: David Gibson Cc: Rusty Russell Cc: Ingo Molnar Cc: Jamie Lokier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/futex.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/futex.c b/kernel/futex.c index 3b4d5ad44cc6..aca8d10704f6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -365,6 +365,11 @@ retry: if (bh1 != bh2) spin_unlock(&bh2->lock); + if (unlikely(op_ret != -EFAULT)) { + ret = op_ret; + goto out; + } + /* futex_atomic_op_inuser needs to both read and write * *(int __user *)uaddr2, but we can't modify it * non-atomically. Therefore, if get_user below is not -- cgit From 8c65b4a60450590e79a28e9717ceffa9e4debb3f Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Mon, 7 Nov 2005 00:59:43 -0800 Subject: [PATCH] fix remaining missing includes Fix more include file problems that surfaced since I submitted the previous fix-missing-includes.patch. This should now allow not to include sched.h from module.h, which is done by a followup patch. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-aaec2000/clock.c | 1 + arch/arm/mach-epxa10db/mm.c | 1 + arch/arm/mach-pxa/corgi_lcd.c | 1 + arch/ppc/syslib/ppc_sys.c | 1 + drivers/base/power/sysfs.c | 1 + drivers/char/agp/amd64-agp.c | 1 + drivers/hwmon/hwmon.c | 1 + drivers/infiniband/core/agent.c | 3 +++ drivers/infiniband/core/packer.c | 2 ++ drivers/infiniband/core/sysfs.c | 3 +++ drivers/infiniband/core/ud_header.c | 1 + drivers/infiniband/core/verbs.c | 1 + drivers/infiniband/hw/mthca/mthca_catas.c | 3 +++ drivers/infiniband/hw/mthca/mthca_srq.c | 3 +++ drivers/media/dvb/frontends/cx24110.c | 1 + drivers/message/i2o/exec-osm.c | 1 + drivers/mfd/mcp-core.c | 2 ++ drivers/pci/hotplug/pciehprm_nonacpi.c | 1 + drivers/pci/pci-driver.c | 1 + drivers/scsi/atari_dma_emul.c | 2 ++ drivers/scsi/raid_class.c | 2 ++ drivers/scsi/scsi_transport_sas.c | 2 ++ drivers/scsi/sym53c8xx_2/sym_hipd.c | 1 + fs/9p/error.c | 1 + include/asm-alpha/pgtable.h | 3 +++ include/asm-cris/processor.h | 2 ++ include/asm-frv/pgtable.h | 2 ++ include/asm-generic/pgtable.h | 1 + include/asm-i386/elf.h | 2 ++ include/asm-i386/pgtable.h | 3 +++ include/asm-ia64/pgtable.h | 1 + include/asm-m32r/pgtable.h | 3 +++ include/asm-mips/elf.h | 2 ++ include/asm-mips/pgtable.h | 3 +++ include/asm-parisc/pgtable.h | 3 ++- include/asm-powerpc/elf.h | 2 ++ include/asm-ppc/pgtable.h | 1 + include/asm-ppc64/pgtable.h | 1 + include/asm-s390/elf.h | 1 + include/asm-s390/pgtable.h | 1 + include/asm-sh/elf.h | 1 + include/asm-sh/pgtable.h | 2 ++ include/asm-sh64/pgtable.h | 2 ++ include/asm-x86_64/elf.h | 2 ++ include/asm-x86_64/pgtable.h | 2 ++ include/asm-xtensa/elf.h | 2 ++ include/asm-xtensa/pgtable.h | 3 +++ include/linux/irq.h | 1 + include/linux/memory.h | 3 +++ include/linux/sem.h | 2 ++ include/linux/wait.h | 1 + kernel/module.c | 1 + 52 files changed, 89 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/arm/mach-aaec2000/clock.c b/arch/arm/mach-aaec2000/clock.c index 99e019169dda..0340ddc4824e 100644 --- a/arch/arm/mach-aaec2000/clock.c +++ b/arch/arm/mach-aaec2000/clock.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-epxa10db/mm.c b/arch/arm/mach-epxa10db/mm.c index e8832d0910ee..cfd0d2182d44 100644 --- a/arch/arm/mach-epxa10db/mm.c +++ b/arch/arm/mach-epxa10db/mm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include diff --git a/arch/arm/mach-pxa/corgi_lcd.c b/arch/arm/mach-pxa/corgi_lcd.c index 54162ba95414..698eb06545c4 100644 --- a/arch/arm/mach-pxa/corgi_lcd.c +++ b/arch/arm/mach-pxa/corgi_lcd.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ppc/syslib/ppc_sys.c b/arch/ppc/syslib/ppc_sys.c index 62ee86e80711..603f01190816 100644 --- a/arch/ppc/syslib/ppc_sys.c +++ b/arch/ppc/syslib/ppc_sys.c @@ -14,6 +14,7 @@ * option) any later version. */ +#include #include int (*ppc_sys_device_fixup) (struct platform_device * pdev); diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 89c57875f3e5..f3a0c562bcb5 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -3,6 +3,7 @@ */ #include +#include #include "power.h" diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 0e6c3a31d344..78ce98a69f37 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include /* PAGE_SIZE */ #include "agp.h" diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 6f48579799b5..dddd3eb9b387 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -16,6 +16,7 @@ #include #include #include +#include #define HWMON_ID_PREFIX "hwmon" #define HWMON_ID_FORMAT HWMON_ID_PREFIX "%d" diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c index 7545775d38ef..34b724afd28d 100644 --- a/drivers/infiniband/core/agent.c +++ b/drivers/infiniband/core/agent.c @@ -37,6 +37,9 @@ * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $ */ +#include +#include + #include "agent.h" #include "smi.h" diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c index 35df5010e723..c972d7235764 100644 --- a/drivers/infiniband/core/packer.c +++ b/drivers/infiniband/core/packer.c @@ -33,6 +33,8 @@ * $Id: packer.c 1349 2004-12-16 21:09:43Z roland $ */ +#include + #include static u64 value_read(int offset, int size, void *structure) diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index b8120650e711..08648b1a387e 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -36,6 +36,9 @@ #include "core_priv.h" +#include +#include + #include struct ib_port { diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c index 527b23450ab3..997c07db6d8f 100644 --- a/drivers/infiniband/core/ud_header.c +++ b/drivers/infiniband/core/ud_header.c @@ -34,6 +34,7 @@ */ #include +#include #include diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 72d3ef786db5..4186cc888ea5 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -40,6 +40,7 @@ #include #include +#include #include #include diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 7ac52af43b99..25ebab64bc42 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -32,6 +32,9 @@ * $Id$ */ +#include +#include + #include "mthca_dev.h" enum { diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 292f55be8cbd..26d5161fde07 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -32,6 +32,9 @@ * $Id: mthca_srq.c 3047 2005-08-10 03:59:35Z roland $ */ +#include +#include + #include "mthca_dev.h" #include "mthca_cmd.h" #include "mthca_memfree.h" diff --git a/drivers/media/dvb/frontends/cx24110.c b/drivers/media/dvb/frontends/cx24110.c index d4b97989e3ed..654d7dc879d9 100644 --- a/drivers/media/dvb/frontends/cx24110.c +++ b/drivers/media/dvb/frontends/cx24110.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "dvb_frontend.h" #include "cx24110.h" diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c index b675b4ebbebd..9c339a2505b0 100644 --- a/drivers/message/i2o/exec-osm.c +++ b/drivers/message/i2o/exec-osm.c @@ -33,6 +33,7 @@ #include #include #include +#include /* wait_event_interruptible_timeout() needs this */ #include /* HZ */ #include "core.h" diff --git a/drivers/mfd/mcp-core.c b/drivers/mfd/mcp-core.c index c75d713c01e4..55ba23075c90 100644 --- a/drivers/mfd/mcp-core.c +++ b/drivers/mfd/mcp-core.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include diff --git a/drivers/pci/hotplug/pciehprm_nonacpi.c b/drivers/pci/hotplug/pciehprm_nonacpi.c index 33b2c69a0829..76c727c74cc0 100644 --- a/drivers/pci/hotplug/pciehprm_nonacpi.c +++ b/drivers/pci/hotplug/pciehprm_nonacpi.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index ae986e590b48..94e68c54d273 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "pci.h" /* diff --git a/drivers/scsi/atari_dma_emul.c b/drivers/scsi/atari_dma_emul.c index 7026045527fd..8d5d2a5da961 100644 --- a/drivers/scsi/atari_dma_emul.c +++ b/drivers/scsi/atari_dma_emul.c @@ -19,6 +19,8 @@ * this code. */ +#include +#include #include #define hades_dma_ctrl (*(unsigned char *) 0xffff8717) diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c index f1ea5027865f..caa0c3629626 100644 --- a/drivers/scsi/raid_class.c +++ b/drivers/scsi/raid_class.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 0cc766a9aa65..edabbd05d258 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -26,6 +26,8 @@ #include #include #include +#include +#include #include #include diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index a1a58e1d5ad3..a7420cad4547 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -39,6 +39,7 @@ */ #include +#include /* for timeouts in units of HZ */ #include "sym_glue.h" #include "sym_nvram.h" diff --git a/fs/9p/error.c b/fs/9p/error.c index fee5d19179c5..834cb179e388 100644 --- a/fs/9p/error.c +++ b/fs/9p/error.c @@ -33,6 +33,7 @@ #include #include +#include #include "debug.h" #include "error.h" diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index 8393bf374b2b..a985cd29b6db 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -17,6 +17,9 @@ #include /* For TASK_SIZE */ #include +struct mm_struct; +struct vm_area_struct; + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. diff --git a/include/asm-cris/processor.h b/include/asm-cris/processor.h index e8b2abb2ae59..dce41009eeb0 100644 --- a/include/asm-cris/processor.h +++ b/include/asm-cris/processor.h @@ -16,6 +16,8 @@ #include #include +struct task_struct; + /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ diff --git a/include/asm-frv/pgtable.h b/include/asm-frv/pgtable.h index b247e99dff49..844666377dcb 100644 --- a/include/asm-frv/pgtable.h +++ b/include/asm-frv/pgtable.h @@ -26,6 +26,8 @@ #include #include #include +struct mm_struct; +struct vm_area_struct; #endif #ifndef __ASSEMBLY__ diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 7dca30a26c53..358e4d309ceb 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -128,6 +128,7 @@ do { \ #endif #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT +struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { pte_t old_pte = *ptep; diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index fa11117d3cfa..4153d80e4d2b 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -119,6 +119,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t; */ #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) +struct task_struct; + extern int dump_task_regs (struct task_struct *, elf_gregset_t *); extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); extern int dump_task_extended_fpu (struct task_struct *, struct user_fxsr_struct *); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 03f3c8ac6383..088a945bf26b 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -25,6 +25,9 @@ #include #include +struct mm_struct; +struct vm_area_struct; + /* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h index 21e32a06bc82..c34ba80c1c31 100644 --- a/include/asm-ia64/pgtable.h +++ b/include/asm-ia64/pgtable.h @@ -127,6 +127,7 @@ # ifndef __ASSEMBLY__ +#include /* for mm_struct */ #include #include #include diff --git a/include/asm-m32r/pgtable.h b/include/asm-m32r/pgtable.h index 1cd5fd4a5b2c..75740debcd01 100644 --- a/include/asm-m32r/pgtable.h +++ b/include/asm-m32r/pgtable.h @@ -27,6 +27,9 @@ #include #include +struct mm_struct; +struct vm_area_struct; + extern pgd_t swapper_pg_dir[1024]; extern void paging_init(void); diff --git a/include/asm-mips/elf.h b/include/asm-mips/elf.h index 7420f12742bb..d2c9a25f8459 100644 --- a/include/asm-mips/elf.h +++ b/include/asm-mips/elf.h @@ -275,6 +275,8 @@ do { \ #endif /* CONFIG_64BIT */ +struct task_struct; + extern void dump_regs(elf_greg_t *, struct pt_regs *regs); extern int dump_task_regs (struct task_struct *, elf_gregset_t *); extern int dump_task_fpu(struct task_struct *, elf_fpregset_t *); diff --git a/include/asm-mips/pgtable.h b/include/asm-mips/pgtable.h index 34facd996503..702a28fa7a34 100644 --- a/include/asm-mips/pgtable.h +++ b/include/asm-mips/pgtable.h @@ -19,6 +19,9 @@ #include #include +struct mm_struct; +struct vm_area_struct; + #define PAGE_NONE __pgprot(_PAGE_PRESENT | _CACHE_CACHABLE_NONCOHERENT) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ PAGE_CACHABLE_DEFAULT) diff --git a/include/asm-parisc/pgtable.h b/include/asm-parisc/pgtable.h index c28fb6f48c6c..b4554711c3e7 100644 --- a/include/asm-parisc/pgtable.h +++ b/include/asm-parisc/pgtable.h @@ -12,6 +12,7 @@ */ #include +#include /* for vm_area_struct */ #include #include #include @@ -418,7 +419,6 @@ extern void paging_init (void); #define PG_dcache_dirty PG_arch_1 -struct vm_area_struct; /* forward declaration (include/linux/mm.h) */ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); /* Encode and de-code a swap entry */ @@ -464,6 +464,7 @@ static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned extern spinlock_t pa_dbit_lock; +struct mm_struct; static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t old_pte; diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h index d140577d0a05..feac3458d71f 100644 --- a/include/asm-powerpc/elf.h +++ b/include/asm-powerpc/elf.h @@ -1,11 +1,13 @@ #ifndef _ASM_POWERPC_ELF_H #define _ASM_POWERPC_ELF_H +#include /* for task_struct */ #include #include #include #include #include +#include /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index b28a713ba862..6d1c39e8a6af 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -12,6 +12,7 @@ #include /* For TASK_SIZE */ #include #include +struct mm_struct; extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index fde93ec36abc..a9783ba7fe98 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -13,6 +13,7 @@ #include #include #include +struct mm_struct; #endif /* __ASSEMBLY__ */ #ifdef CONFIG_PPC_64K_PAGES diff --git a/include/asm-s390/elf.h b/include/asm-s390/elf.h index 3b8bd46832a1..372d51cccd53 100644 --- a/include/asm-s390/elf.h +++ b/include/asm-s390/elf.h @@ -96,6 +96,7 @@ * ELF register definitions.. */ +#include /* for task_struct */ #include #include #include /* for save_access_regs */ diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index df94f89038cc..9be741bb1496 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -36,6 +36,7 @@ #include struct vm_area_struct; /* forward declaration (include/linux/mm.h) */ +struct mm_struct; extern pgd_t swapper_pg_dir[] __attribute__ ((aligned (4096))); extern void paging_init(void); diff --git a/include/asm-sh/elf.h b/include/asm-sh/elf.h index 8fe00a1981ce..1b63dfeea4f2 100644 --- a/include/asm-sh/elf.h +++ b/include/asm-sh/elf.h @@ -111,6 +111,7 @@ typedef struct user_fpu_struct elf_fpregset_t; #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +struct task_struct; extern int dump_task_regs (struct task_struct *, elf_gregset_t *); extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index dee36bcbcf98..bb0efb31a8cb 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h @@ -284,6 +284,8 @@ typedef pte_t *pte_addr_t; #define GET_IOSPACE(pfn) 0 #define GET_PFN(pfn) (pfn) +struct mm_struct; + /* * No page table caches to initialise */ diff --git a/include/asm-sh64/pgtable.h b/include/asm-sh64/pgtable.h index 51b05818e4eb..a1906a772df9 100644 --- a/include/asm-sh64/pgtable.h +++ b/include/asm-sh64/pgtable.h @@ -24,6 +24,8 @@ #include #include +struct vm_area_struct; + extern void paging_init(void); /* We provide our own get_unmapped_area to avoid cache synonym issue */ diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h index a60a35e79222..43862cd6a569 100644 --- a/include/asm-x86_64/elf.h +++ b/include/asm-x86_64/elf.h @@ -149,6 +149,8 @@ extern void set_personality_64bit(void); */ #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) +struct task_struct; + extern int dump_task_regs (struct task_struct *, elf_gregset_t *); extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 7a07196a7202..7309fffeec9a 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -105,6 +105,8 @@ static inline void pgd_clear (pgd_t * pgd) #define ptep_get_and_clear(mm,addr,xp) __pte(xchg(&(xp)->pte, 0)) +struct mm_struct; + static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) { pte_t pte; diff --git a/include/asm-xtensa/elf.h b/include/asm-xtensa/elf.h index 64f1f53874fe..de0667453b2e 100644 --- a/include/asm-xtensa/elf.h +++ b/include/asm-xtensa/elf.h @@ -209,6 +209,8 @@ extern void xtensa_elf_core_copy_regs (xtensa_gregset_t *, struct pt_regs *); #define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX_32BIT) +struct task_struct; + extern void do_copy_regs (xtensa_gregset_t*, struct pt_regs*, struct task_struct*); extern void do_restore_regs (xtensa_gregset_t*, struct pt_regs*, diff --git a/include/asm-xtensa/pgtable.h b/include/asm-xtensa/pgtable.h index 987e3b802313..7b15afb70c56 100644 --- a/include/asm-xtensa/pgtable.h +++ b/include/asm-xtensa/pgtable.h @@ -278,6 +278,8 @@ static inline void update_pte(pte_t *ptep, pte_t pteval) #endif } +struct mm_struct; + static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { @@ -294,6 +296,7 @@ set_pmd(pmd_t *pmdp, pmd_t pmdval) #endif } +struct vm_area_struct; static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, diff --git a/include/linux/irq.h b/include/linux/irq.h index 69681c3b1f05..c516382fbec2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -10,6 +10,7 @@ */ #include +#include /* cpu_online_map */ #if !defined(CONFIG_ARCH_S390) diff --git a/include/linux/memory.h b/include/linux/memory.h index 0def328ab5cf..9a424383e6c6 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -54,6 +54,9 @@ struct memory_block { */ #define MEM_MAPPING_INVALID (1<<3) +struct notifier_block; +struct mem_section; + #ifndef CONFIG_MEMORY_HOTPLUG static inline int memory_dev_init(void) { diff --git a/include/linux/sem.h b/include/linux/sem.h index 106f9757339a..3c1f1120fe88 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -79,6 +79,8 @@ struct seminfo { #ifdef __KERNEL__ +struct task_struct; + /* One semaphore structure for each semaphore in the system. */ struct sem { int semval; /* current value */ diff --git a/include/linux/wait.h b/include/linux/wait.h index d38c9fecdc36..d28518236b62 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -54,6 +54,7 @@ struct __wait_queue_head { }; typedef struct __wait_queue_head wait_queue_head_t; +struct task_struct; /* * Macros for declaration and initialisaton of the datatypes diff --git a/kernel/module.c b/kernel/module.c index ff5c500ab625..2ea929d51ad0 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include -- cgit From 481bed454247538e9f57d4ea37b153ccba24ba7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Nov 2005 00:59:47 -0800 Subject: [PATCH] consolidate sys_ptrace() The sys_ptrace boilerplate code (everything outside the big switch statement for the arch-specific requests) is shared by most architectures. This patch moves it to kernel/ptrace.c and leaves the arch-specific code as arch_ptrace. Some architectures have a too different ptrace so we have to exclude them. They continue to keep their implementations. For sh64 I had to add a sh64_ptrace wrapper because it does some initialization on the first call. For um I removed an ifdefed SUBARCH_PTRACE_SPECIAL block, but SUBARCH_PTRACE_SPECIAL isn't defined anywhere in the tree. Signed-off-by: Christoph Hellwig Acked-by: Paul Mackerras Acked-by: Ralf Baechle Acked-By: David Howells Acked-by: Russell King Acked-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/kernel/ptrace.c | 49 +--------------------- arch/arm26/kernel/ptrace.c | 49 +--------------------- arch/cris/arch-v10/kernel/ptrace.c | 51 +---------------------- arch/cris/arch-v32/kernel/ptrace.c | 51 +---------------------- arch/frv/kernel/ptrace.c | 43 +------------------- arch/h8300/kernel/ptrace.c | 39 +----------------- arch/i386/kernel/ptrace.c | 44 +------------------- arch/m68k/kernel/ptrace.c | 47 ++------------------- arch/m68knommu/kernel/ptrace.c | 39 +----------------- arch/mips/kernel/ptrace.c | 55 +++---------------------- arch/parisc/kernel/ptrace.c | 50 ++--------------------- arch/powerpc/kernel/ptrace.c | 43 +------------------- arch/sh/kernel/ptrace.c | 44 +------------------- arch/sh64/kernel/ptrace.c | 83 +++++++++++++------------------------- arch/sh64/kernel/syscalls.S | 2 +- arch/um/kernel/ptrace.c | 50 +---------------------- arch/v850/kernel/ptrace.c | 43 +------------------- arch/x86_64/kernel/ptrace.c | 43 +------------------- arch/xtensa/kernel/ptrace.c | 55 +------------------------ include/asm-alpha/ptrace.h | 3 ++ include/asm-ia64/ptrace.h | 3 ++ include/asm-m32r/ptrace.h | 3 ++ include/asm-s390/ptrace.h | 2 + include/asm-sparc/ptrace.h | 3 ++ include/asm-sparc64/ptrace.h | 3 ++ include/linux/ptrace.h | 2 + kernel/ptrace.c | 82 +++++++++++++++++++++++++++++++++++++ 27 files changed, 163 insertions(+), 818 deletions(-) (limited to 'kernel') diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 9bd8609a2926..9a340e790da5 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -648,7 +648,7 @@ static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp) #endif -static int do_ptrace(int request, struct task_struct *child, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { unsigned long tmp; int ret; @@ -782,53 +782,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat return ret; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) -{ - struct task_struct *child; - int ret; - - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret == 0) - ret = do_ptrace(request, child, addr, data); - -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); - return ret; -} - asmlinkage void syscall_trace(int why, struct pt_regs *regs) { unsigned long ip; diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c index cf7e977d18c8..4e6b7356a722 100644 --- a/arch/arm26/kernel/ptrace.c +++ b/arch/arm26/kernel/ptrace.c @@ -546,7 +546,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void *ufp) sizeof(struct user_fp)) ? -EFAULT : 0; } -static int do_ptrace(int request, struct task_struct *child, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { unsigned long tmp; int ret; @@ -665,53 +665,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat return ret; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) -{ - struct task_struct *child; - int ret; - - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret == 0) - ret = do_ptrace(request, child, addr, data); - -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); - return ret; -} - asmlinkage void syscall_trace(int why, struct pt_regs *regs) { unsigned long ip; diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index 130dd214e41d..6cbd34a27b90 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -76,55 +76,11 @@ ptrace_disable(struct task_struct *child) * (in user space) where the result of the ptrace call is written (instead of * being returned). */ -asmlinkage int -sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret; unsigned long __user *datap = (unsigned long __user *)data; - lock_kernel(); - ret = -EPERM; - - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - - if (child) - get_task_struct(child); - - read_unlock(&tasklist_lock); - - if (!child) - goto out; - - ret = -EPERM; - - if (pid == 1) /* Leave the init process alone! */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* Read word at location address. */ case PTRACE_PEEKTEXT: @@ -289,10 +245,7 @@ sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; } diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index 208489da2a87..5528b83a622b 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -99,55 +99,11 @@ ptrace_disable(struct task_struct *child) } -asmlinkage int -sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret; unsigned long __user *datap = (unsigned long __user *)data; - lock_kernel(); - ret = -EPERM; - - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - - if (child) - get_task_struct(child); - - read_unlock(&tasklist_lock); - - if (!child) - goto out; - - ret = -EPERM; - - if (pid == 1) /* Leave the init process alone! */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* Read word at location address. */ case PTRACE_PEEKTEXT: @@ -347,10 +303,7 @@ sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; } diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index cb335a14a315..f953484e7d59 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -106,48 +106,11 @@ void ptrace_enable(struct task_struct *child) child->thread.frame0->__status |= REG__STATUS_STEP; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; unsigned long tmp; int ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -351,10 +314,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index a569fe4aa284..0ff6f79b0fed 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -57,43 +57,10 @@ void ptrace_disable(struct task_struct *child) h8300_disable_trace(child); } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { @@ -251,10 +218,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index efd11f09c996..5ffbb4b7ad05 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child, return 0; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; struct user * dummy = NULL; int i, ret; unsigned long __user *datap = (unsigned long __user *)data; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out_tsk: return ret; } diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index f7f1d2e5b90b..7e54422685cf 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -121,48 +121,11 @@ void ptrace_disable(struct task_struct *child) child->thread.work.syscall_trace = 0; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; unsigned long tmp; int i, ret = 0; - lock_kernel(); - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - ret = -EPERM; - goto out; - } - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - goto out; - } - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (unlikely(!child)) { - ret = -ESRCH; - goto out; - } - - /* you may not mess with init */ - if (unlikely(pid == 1)) { - ret = -EPERM; - goto out_tsk; - } - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -317,14 +280,10 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; out_eio: - ret = -EIO; - goto out_tsk; + return -EIO; } asmlinkage void syscall_trace(void) diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c index 621d7b91ccfe..262ab8c72e5f 100644 --- a/arch/m68knommu/kernel/ptrace.c +++ b/arch/m68knommu/kernel/ptrace.c @@ -101,43 +101,10 @@ void ptrace_disable(struct task_struct *child) put_reg(child, PT_SR, tmp); } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(truct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -357,10 +324,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index f1b0f3e1f95b..510da5fda567 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -174,51 +174,10 @@ int ptrace_setfpregs (struct task_struct *child, __u32 __user *data) return 0; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret; -#if 0 - printk("ptrace(r=%d,pid=%d,addr=%08lx,data=%08lx)\n", - (int) request, (int) pid, (unsigned long) addr, - (unsigned long) data); -#endif - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - if ((ret = security_ptrace(current->parent, current))) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -319,7 +278,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) if (!cpu_has_dsp) { tmp = 0; ret = -EIO; - goto out_tsk; + goto out; } if (child->thread.dsp.used_dsp) { dregs = __get_dsp_regs(child); @@ -333,14 +292,14 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) if (!cpu_has_dsp) { tmp = 0; ret = -EIO; - goto out_tsk; + goto out; } tmp = child->thread.dsp.dspcontrol; break; default: tmp = 0; ret = -EIO; - goto out_tsk; + goto out; } ret = put_user(tmp, (unsigned long __user *) data); break; @@ -495,11 +454,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } - -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out: return ret; } diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 18130c3748f3..b6fe202a620d 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -78,52 +78,13 @@ void ptrace_disable(struct task_struct *child) pa_psw(child)->l = 0; } -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; long ret; #ifdef DEBUG_PTRACE long oaddr=addr, odata=data; #endif - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* no messing around with init! */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: { @@ -383,11 +344,11 @@ long sys_ptrace(long request, long pid, long addr, long data) case PTRACE_GETEVENTMSG: ret = put_user(child->ptrace_message, (unsigned int __user *) data); - goto out_tsk; + goto out; default: ret = ptrace_request(child, request, addr, data); - goto out_tsk; + goto out; } out_wake_notrap: @@ -396,10 +357,7 @@ out_wake: wake_up_process(child); ret = 0; out_tsk: - put_task_struct(child); -out: - unlock_kernel(); - DBG("sys_ptrace(%ld, %d, %lx, %lx) returning %ld\n", + DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n", request, pid, oaddr, odata, ret); return ret; } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 568ea335d616..3d2abd95c7ae 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -248,46 +248,10 @@ void ptrace_disable(struct task_struct *child) clear_single_step(child); } -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret = -EPERM; - lock_kernel(); - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -540,10 +504,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; } diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c index 1fbe5a428e31..1a8be06519ec 100644 --- a/arch/sh/kernel/ptrace.c +++ b/arch/sh/kernel/ptrace.c @@ -80,48 +80,11 @@ void ptrace_disable(struct task_struct *child) /* nothing to do.. */ } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; struct user * dummy = NULL; int ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -289,10 +252,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + return ret; } diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c index 71f2eec00b99..cd22e9471316 100644 --- a/arch/sh64/kernel/ptrace.c +++ b/arch/sh64/kernel/ptrace.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -121,61 +122,11 @@ put_fpu_long(struct task_struct *task, unsigned long addr, unsigned long data) return 0; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; - extern void poke_real_address_q(unsigned long long addr, unsigned long long data); -#define WPC_DBRMODE 0x0d104008 - static int first_call = 1; int ret; - lock_kernel(); - - if (first_call) { - /* Set WPC.DBRMODE to 0. This makes all debug events get - * delivered through RESVEC, i.e. into the handlers in entry.S. - * (If the kernel was downloaded using a remote gdb, WPC.DBRMODE - * would normally be left set to 1, which makes debug events get - * delivered through DBRVEC, i.e. into the remote gdb's - * handlers. This prevents ptrace getting them, and confuses - * the remote gdb.) */ - printk("DBRMODE set to 0 to permit native debugging\n"); - poke_real_address_q(WPC_DBRMODE, 0); - first_call = 0; - } - - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -313,13 +264,33 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } +asmlinkage int sh64_ptrace(long request, long pid, long addr, long data) +{ + extern void poke_real_address_q(unsigned long long addr, unsigned long long data); +#define WPC_DBRMODE 0x0d104008 + static int first_call = 1; + + lock_kernel(); + if (first_call) { + /* Set WPC.DBRMODE to 0. This makes all debug events get + * delivered through RESVEC, i.e. into the handlers in entry.S. + * (If the kernel was downloaded using a remote gdb, WPC.DBRMODE + * would normally be left set to 1, which makes debug events get + * delivered through DBRVEC, i.e. into the remote gdb's + * handlers. This prevents ptrace getting them, and confuses + * the remote gdb.) */ + printk("DBRMODE set to 0 to permit native debugging\n"); + poke_real_address_q(WPC_DBRMODE, 0); + first_call = 0; + } + unlock_kernel(); + + return sys_ptrace(request, pid, addr, data); +} + asmlinkage void syscall_trace(void) { struct task_struct *tsk = current; diff --git a/arch/sh64/kernel/syscalls.S b/arch/sh64/kernel/syscalls.S index a3d037805f1c..c0079d54c850 100644 --- a/arch/sh64/kernel/syscalls.S +++ b/arch/sh64/kernel/syscalls.S @@ -46,7 +46,7 @@ sys_call_table: .long sys_setuid16 .long sys_getuid16 .long sys_stime /* 25 */ - .long sys_ptrace + .long sh64_ptrace .long sys_alarm .long sys_fstat .long sys_pause diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index 71af4d503899..98e09395c093 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -43,53 +43,10 @@ void ptrace_disable(struct task_struct *child) extern int peek_user(struct task_struct * child, long addr, long data); extern int poke_user(struct task_struct * child, long addr, long data); -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int i, ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - -#ifdef SUBACH_PTRACE_SPECIAL - SUBARCH_PTRACE_SPECIAL(child,request,addr,data); -#endif - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -282,10 +239,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } - out_tsk: - put_task_struct(child); - out: - unlock_kernel(); + return ret; } diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c index d6077ff47d22..18492d02aaf6 100644 --- a/arch/v850/kernel/ptrace.c +++ b/arch/v850/kernel/ptrace.c @@ -113,45 +113,10 @@ static int set_single_step (struct task_struct *t, int val) return 1; } -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int rval; - lock_kernel(); - - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) { - rval = -EPERM; - goto out; - } - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - rval = 0; - goto out; - } - rval = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - rval = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - rval = ptrace_attach(child); - goto out_tsk; - } - rval = ptrace_check_attach(child, request == PTRACE_KILL); - if (rval < 0) - goto out_tsk; - switch (request) { unsigned long val, copied; @@ -248,11 +213,7 @@ long sys_ptrace(long request, long pid, long addr, long data) rval = -EIO; goto out; } - -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out: return rval; } diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index bbf64b59a21e..a87b6cebe80f 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -313,48 +313,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno) } -asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; long i, ret; unsigned ui; - /* This lock_kernel fixes a subtle race with suid exec */ - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -608,10 +571,6 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); return ret; } diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 14460743de07..ab5c4c65b5c4 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -45,58 +45,10 @@ void ptrace_disable(struct task_struct *child) /* Nothing to do.. */ } -long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; int ret = -EPERM; - lock_kernel(); - -#if 0 - if ((int)request != 1) - printk("ptrace(r=%d,pid=%d,addr=%08lx,data=%08lx)\n", - (int) request, (int) pid, (unsigned long) addr, - (unsigned long) data); -#endif - - if (request == PTRACE_TRACEME) { - - /* Are we already being traced? */ - - if (current->ptrace & PT_PTRACED) - goto out; - - if ((ret = security_ptrace(current->parent, current))) - goto out; - - /* Set the ptrace bit in the process flags. */ - - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - if ((ret = ptrace_check_attach(child, request == PTRACE_KILL)) < 0) - goto out_tsk; - switch (request) { case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: @@ -375,10 +327,7 @@ long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); goto out; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out: return ret; } diff --git a/include/asm-alpha/ptrace.h b/include/asm-alpha/ptrace.h index d462c5e14c13..072375c135b4 100644 --- a/include/asm-alpha/ptrace.h +++ b/include/asm-alpha/ptrace.h @@ -67,6 +67,9 @@ struct switch_stack { }; #ifdef __KERNEL__ + +#define __ARCH_SYS_PTRACE 1 + #define user_mode(regs) (((regs)->ps & 8) != 0) #define instruction_pointer(regs) ((regs)->pc) #define profile_pc(regs) instruction_pointer(regs) diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h index a79d1a7ecc77..2c703d6e0c86 100644 --- a/include/asm-ia64/ptrace.h +++ b/include/asm-ia64/ptrace.h @@ -229,6 +229,9 @@ struct switch_stack { }; #ifdef __KERNEL__ + +#define __ARCH_SYS_PTRACE 1 + /* * We use the ia64_psr(regs)->ri to determine which of the three * instructions in bundle (16 bytes) took the sample. Generate diff --git a/include/asm-m32r/ptrace.h b/include/asm-m32r/ptrace.h index 976417126b2d..55cd7ecfde43 100644 --- a/include/asm-m32r/ptrace.h +++ b/include/asm-m32r/ptrace.h @@ -145,6 +145,9 @@ struct pt_regs { #define PTRACE_O_TRACESYSGOOD 0x00000001 #ifdef __KERNEL__ + +#define __ARCH_SYS_PTRACE 1 + #if defined(CONFIG_ISA_M32R2) || defined(CONFIG_CHIP_VDEC2) #define user_mode(regs) ((M32R_PSW_BPM & (regs)->psw) != 0) #elif defined(CONFIG_ISA_M32R) diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h index fc7c96edc697..a949cc077cc7 100644 --- a/include/asm-s390/ptrace.h +++ b/include/asm-s390/ptrace.h @@ -468,6 +468,8 @@ struct user_regs_struct }; #ifdef __KERNEL__ +#define __ARCH_SYS_PTRACE 1 + #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define profile_pc(regs) instruction_pointer(regs) diff --git a/include/asm-sparc/ptrace.h b/include/asm-sparc/ptrace.h index a8ecb2d6977a..714497099a42 100644 --- a/include/asm-sparc/ptrace.h +++ b/include/asm-sparc/ptrace.h @@ -60,6 +60,9 @@ struct sparc_stackf { #define STACKFRAME_SZ sizeof(struct sparc_stackf) #ifdef __KERNEL__ + +#define __ARCH_SYS_PTRACE 1 + #define user_mode(regs) (!((regs)->psr & PSR_PS)) #define instruction_pointer(regs) ((regs)->pc) unsigned long profile_pc(struct pt_regs *); diff --git a/include/asm-sparc64/ptrace.h b/include/asm-sparc64/ptrace.h index 6194f771e9fc..7eba90c6c753 100644 --- a/include/asm-sparc64/ptrace.h +++ b/include/asm-sparc64/ptrace.h @@ -94,6 +94,9 @@ struct sparc_trapf { #define STACKFRAME32_SZ sizeof(struct sparc_stackf32) #ifdef __KERNEL__ + +#define __ARCH_SYS_PTRACE 1 + #define force_successful_syscall_return() \ do { current_thread_info()->syscall_noerror = 1; \ } while (0) diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index dc6f3647bfbc..b2b3dba1298d 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -78,6 +78,8 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ + +extern long arch_ptrace(struct task_struct *child, long request, long addr, long data); extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len); extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len); extern int ptrace_attach(struct task_struct *tsk); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 863eee8bff47..5b8dd98a230e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -406,3 +406,85 @@ int ptrace_request(struct task_struct *child, long request, return ret; } + +#ifndef __ARCH_SYS_PTRACE +static int ptrace_get_task_struct(long request, long pid, + struct task_struct **childp) +{ + struct task_struct *child; + int ret; + + /* + * Callers use child == NULL as an indication to exit early even + * when the return value is 0, so make sure it is non-NULL here. + */ + *childp = NULL; + + if (request == PTRACE_TRACEME) { + /* + * Are we already being traced? + */ + if (current->ptrace & PT_PTRACED) + return -EPERM; + ret = security_ptrace(current->parent, current); + if (ret) + return -EPERM; + /* + * Set the ptrace bit in the process ptrace flags. + */ + current->ptrace |= PT_PTRACED; + return 0; + } + + /* + * You may not mess with init + */ + if (pid == 1) + return -EPERM; + + ret = -ESRCH; + read_lock(&tasklist_lock); + child = find_task_by_pid(pid); + if (child) + get_task_struct(child); + read_unlock(&tasklist_lock); + if (!child) + return -ESRCH; + + *childp = child; + return 0; +} + +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +{ + struct task_struct *child; + long ret; + + /* + * This lock_kernel fixes a subtle race with suid exec + */ + lock_kernel(); + ret = ptrace_get_task_struct(request, pid, &child); + if (!child) + goto out; + + if (request == PTRACE_ATTACH) { + ret = ptrace_attach(child); + goto out; + } + + ret = ptrace_check_attach(child, request == PTRACE_KILL); + if (ret < 0) + goto out_put_task_struct; + + ret = arch_ptrace(child, request, addr, data); + if (ret < 0) + goto out_put_task_struct; + + out_put_task_struct: + put_task_struct(child); + out: + unlock_kernel(); + return ret; +} +#endif /* __ARCH_SYS_PTRACE */ -- cgit From e65845235c8120be63001fc1a4ac00c819194bbe Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:07 -0800 Subject: [PATCH] Kprobes: Track kprobe on a per_cpu basis - base changes Changes to the base kprobe infrastructure to track kprobe execution on a per-cpu basis. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 31 ++++++++++++++++++++++--------- kernel/kprobes.c | 43 ++++++++++++++++++++++++++++--------------- 2 files changed, 50 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e30afdca7917..6720305a31e8 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -33,6 +33,7 @@ #include #include #include +#include #include @@ -106,6 +107,9 @@ struct jprobe { kprobe_opcode_t *entry; /* probe handling code to jump to */ }; +DECLARE_PER_CPU(struct kprobe *, current_kprobe); +DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + #ifdef ARCH_SUPPORTS_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs); #else /* ARCH_SUPPORTS_KRETPROBES */ @@ -146,13 +150,6 @@ struct kretprobe_instance { void lock_kprobes(void); void unlock_kprobes(void); -/* kprobe running now on this CPU? */ -static inline int kprobe_running(void) -{ - extern unsigned int kprobe_cpu; - return kprobe_cpu == smp_processor_id(); -} - extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_copy_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); @@ -167,6 +164,22 @@ extern void free_insn_slot(kprobe_opcode_t *slot); struct kprobe *get_kprobe(void *addr); struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); +/* kprobe_running() will just return the current_kprobe on this CPU */ +static inline struct kprobe *kprobe_running(void) +{ + return (__get_cpu_var(current_kprobe)); +} + +static inline void reset_current_kprobe(void) +{ + __get_cpu_var(current_kprobe) = NULL; +} + +static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) +{ + return (&__get_cpu_var(kprobe_ctlblk)); +} + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int setjmp_pre_handler(struct kprobe *, struct pt_regs *); @@ -183,9 +196,9 @@ void add_rp_inst(struct kretprobe_instance *ri); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri); #else /* CONFIG_KPROBES */ -static inline int kprobe_running(void) +static inline struct kprobe *kprobe_running(void) { - return 0; + return NULL; } static inline int register_kprobe(struct kprobe *p) { diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ce4915dd683a..6da8f9b33d1e 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -51,7 +51,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; unsigned int kprobe_cpu = NR_CPUS; static DEFINE_SPINLOCK(kprobe_lock); -static struct kprobe *curr_kprobe; +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; /* * kprobe->ainsn.insn points to the copy of the instruction to be @@ -188,6 +188,17 @@ void __kprobes unlock_kprobes(void) local_irq_restore(flags); } +/* We have preemption disabled.. so it is safe to use __ versions */ +static inline void set_kprobe_instance(struct kprobe *kp) +{ + __get_cpu_var(kprobe_instance) = kp; +} + +static inline void reset_kprobe_instance(void) +{ + __get_cpu_var(kprobe_instance) = NULL; +} + /* You have to be holding the kprobe_lock */ struct kprobe __kprobes *get_kprobe(void *addr) { @@ -213,11 +224,11 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) list_for_each_entry(kp, &p->list, list) { if (kp->pre_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) return 1; } - curr_kprobe = NULL; + reset_kprobe_instance(); } return 0; } @@ -229,9 +240,9 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, list_for_each_entry(kp, &p->list, list) { if (kp->post_handler) { - curr_kprobe = kp; + set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); - curr_kprobe = NULL; + reset_kprobe_instance(); } } return; @@ -240,12 +251,14 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr) { + struct kprobe *cur = __get_cpu_var(kprobe_instance); + /* * if we faulted "during" the execution of a user specified * probe handler, invoke just that probe's fault handler */ - if (curr_kprobe && curr_kprobe->fault_handler) { - if (curr_kprobe->fault_handler(curr_kprobe, regs, trapnr)) + if (cur && cur->fault_handler) { + if (cur->fault_handler(cur, regs, trapnr)) return 1; } return 0; @@ -253,15 +266,15 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) { - struct kprobe *kp = curr_kprobe; - if (curr_kprobe && kp->break_handler) { - if (kp->break_handler(kp, regs)) { - curr_kprobe = NULL; - return 1; - } + struct kprobe *cur = __get_cpu_var(kprobe_instance); + int ret = 0; + + if (cur && cur->break_handler) { + if (cur->break_handler(cur, regs)) + ret = 1; } - curr_kprobe = NULL; - return 0; + reset_kprobe_instance(); + return ret; } struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) -- cgit From 3516a46042508a495fac13c2e73530d936ebe015 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:13 -0800 Subject: [PATCH] Kprobes: Use RCU for (un)register synchronization - base changes Changes to the base kprobes infrastructure to use RCU for synchronization during kprobe registration and unregistration. These changes coupled with the arch kprobe changes (next in series): a. serialize registration and unregistration of kprobes. b. enable lockless execution of handlers. Handlers can now run in parallel. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kprobes.h | 9 ++--- kernel/kprobes.c | 103 ++++++++++++++++++++---------------------------- 2 files changed, 46 insertions(+), 66 deletions(-) (limited to 'kernel') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 6720305a31e8..cff281cf70cf 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include @@ -146,10 +148,7 @@ struct kretprobe_instance { }; #ifdef CONFIG_KPROBES -/* Locks kprobe: irq must be disabled */ -void lock_kprobes(void); -void unlock_kprobes(void); - +extern spinlock_t kretprobe_lock; extern int arch_prepare_kprobe(struct kprobe *p); extern void arch_copy_kprobe(struct kprobe *p); extern void arch_arm_kprobe(struct kprobe *p); @@ -160,7 +159,7 @@ extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); extern void free_insn_slot(kprobe_opcode_t *slot); -/* Get the kprobe at this addr (if any). Must have called lock_kprobes */ +/* Get the kprobe at this addr (if any) - called under a rcu_read_lock() */ struct kprobe *get_kprobe(void *addr); struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6da8f9b33d1e..cfef426e4cdc 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -32,7 +32,6 @@ * added function-return probes. */ #include -#include #include #include #include @@ -49,8 +48,8 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; -unsigned int kprobe_cpu = NR_CPUS; -static DEFINE_SPINLOCK(kprobe_lock); +static DEFINE_SPINLOCK(kprobe_lock); /* Protects kprobe_table */ +DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; /* @@ -153,41 +152,6 @@ void __kprobes free_insn_slot(kprobe_opcode_t *slot) } } -/* Locks kprobe: irqs must be disabled */ -void __kprobes lock_kprobes(void) -{ - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we take the kprobe_lock - * and before we get a chance to update kprobe_cpu, this to prevent - * deadlock when we have a kprobe on ISR routine and a kprobe on task - * routine - */ - local_irq_save(flags); - - spin_lock(&kprobe_lock); - kprobe_cpu = smp_processor_id(); - - local_irq_restore(flags); -} - -void __kprobes unlock_kprobes(void) -{ - unsigned long flags = 0; - - /* Avoiding local interrupts to happen right after we update - * kprobe_cpu and before we get a a chance to release kprobe_lock, - * this to prevent deadlock when we have a kprobe on ISR routine and - * a kprobe on task routine - */ - local_irq_save(flags); - - kprobe_cpu = NR_CPUS; - spin_unlock(&kprobe_lock); - - local_irq_restore(flags); -} - /* We have preemption disabled.. so it is safe to use __ versions */ static inline void set_kprobe_instance(struct kprobe *kp) { @@ -199,15 +163,20 @@ static inline void reset_kprobe_instance(void) __get_cpu_var(kprobe_instance) = NULL; } -/* You have to be holding the kprobe_lock */ +/* + * This routine is called either: + * - under the kprobe_lock spinlock - during kprobe_[un]register() + * OR + * - under an rcu_read_lock() - from arch/xxx/kernel/kprobes.c + */ struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; struct hlist_node *node; + struct kprobe *p; head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; - hlist_for_each(node, head) { - struct kprobe *p = hlist_entry(node, struct kprobe, hlist); + hlist_for_each_entry_rcu(p, node, head, hlist) { if (p->addr == addr) return p; } @@ -222,7 +191,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->pre_handler) { set_kprobe_instance(kp); if (kp->pre_handler(kp, regs)) @@ -238,7 +207,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, { struct kprobe *kp; - list_for_each_entry(kp, &p->list, list) { + list_for_each_entry_rcu(kp, &p->list, list) { if (kp->post_handler) { set_kprobe_instance(kp); kp->post_handler(kp, regs, flags); @@ -277,6 +246,7 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return ret; } +/* Called with kretprobe_lock held */ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { struct hlist_node *node; @@ -286,6 +256,7 @@ struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) return NULL; } +/* Called with kretprobe_lock held */ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe *rp) { @@ -296,6 +267,7 @@ static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe return NULL; } +/* Called with kretprobe_lock held */ void __kprobes add_rp_inst(struct kretprobe_instance *ri) { /* @@ -314,6 +286,7 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri) hlist_add_head(&ri->uflist, &ri->rp->used_instances); } +/* Called with kretprobe_lock held */ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) { /* remove rp inst off the rprobe_inst_table */ @@ -347,13 +320,13 @@ void __kprobes kprobe_flush_task(struct task_struct *tk) struct hlist_node *node, *tmp; unsigned long flags = 0; - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { if (ri->task == tk) recycle_rp_inst(ri); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } /* @@ -364,9 +337,12 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); + unsigned long flags = 0; /*TODO: consider to only swap the RA after the last pre_handler fired */ + spin_lock_irqsave(&kretprobe_lock, flags); arch_prepare_kretprobe(rp, regs); + spin_unlock_irqrestore(&kretprobe_lock, flags); return 0; } @@ -397,13 +373,13 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) struct kprobe *kp; if (p->break_handler) { - list_for_each_entry(kp, &old_p->list, list) { + list_for_each_entry_rcu(kp, &old_p->list, list) { if (kp->break_handler) return -EEXIST; } - list_add_tail(&p->list, &old_p->list); + list_add_tail_rcu(&p->list, &old_p->list); } else - list_add(&p->list, &old_p->list); + list_add_rcu(&p->list, &old_p->list); return 0; } @@ -421,18 +397,18 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->break_handler = aggr_break_handler; INIT_LIST_HEAD(&ap->list); - list_add(&p->list, &ap->list); + list_add_rcu(&p->list, &ap->list); INIT_HLIST_NODE(&ap->hlist); - hlist_del(&p->hlist); - hlist_add_head(&ap->hlist, + hlist_del_rcu(&p->hlist); + hlist_add_head_rcu(&ap->hlist, &kprobe_table[hash_ptr(ap->addr, KPROBE_HASH_BITS)]); } /* * This is the second or subsequent kprobe at the address - handle * the intricacies - * TODO: Move kcalloc outside the spinlock + * TODO: Move kcalloc outside the spin_lock */ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) @@ -458,7 +434,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) { arch_disarm_kprobe(p); - hlist_del(&p->hlist); + hlist_del_rcu(&p->hlist); spin_unlock_irqrestore(&kprobe_lock, flags); arch_remove_kprobe(p); } @@ -466,11 +442,10 @@ static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags) static inline void cleanup_aggr_kprobe(struct kprobe *old_p, struct kprobe *p, unsigned long flags) { - list_del(&p->list); - if (list_empty(&old_p->list)) { + list_del_rcu(&p->list); + if (list_empty(&old_p->list)) cleanup_kprobe(old_p, flags); - kfree(old_p); - } else + else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -493,9 +468,9 @@ int __kprobes register_kprobe(struct kprobe *p) if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; + p->nmissed = 0; spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); - p->nmissed = 0; if (old_p) { ret = register_aggr_kprobe(old_p, p); goto out; @@ -503,7 +478,7 @@ int __kprobes register_kprobe(struct kprobe *p) arch_copy_kprobe(p); INIT_HLIST_NODE(&p->hlist); - hlist_add_head(&p->hlist, + hlist_add_head_rcu(&p->hlist, &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); arch_arm_kprobe(p); @@ -524,10 +499,16 @@ void __kprobes unregister_kprobe(struct kprobe *p) spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); if (old_p) { + /* cleanup_*_kprobe() does the spin_unlock_irqrestore */ if (old_p->pre_handler == aggr_pre_handler) cleanup_aggr_kprobe(old_p, p, flags); else cleanup_kprobe(p, flags); + + synchronize_sched(); + if (old_p->pre_handler == aggr_pre_handler && + list_empty(&old_p->list)) + kfree(old_p); } else spin_unlock_irqrestore(&kprobe_lock, flags); } @@ -604,13 +585,13 @@ void __kprobes unregister_kretprobe(struct kretprobe *rp) unregister_kprobe(&rp->kp); /* No race here */ - spin_lock_irqsave(&kprobe_lock, flags); + spin_lock_irqsave(&kretprobe_lock, flags); free_rp_inst(rp); while ((ri = get_used_rp_inst(rp)) != NULL) { ri->rp = NULL; hlist_del(&ri->uflist); } - spin_unlock_irqrestore(&kprobe_lock, flags); + spin_unlock_irqrestore(&kretprobe_lock, flags); } static int __init init_kprobes(void) -- cgit From d217d5450f11d8c907c0458d175b0dc999b4d06d Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:14 -0800 Subject: [PATCH] Kprobes: preempt_disable/enable() simplification Reorganize the preempt_disable/enable calls to eliminate the extra preempt depth. Changes based on Paul McKenney's review suggestions for the kprobes RCU changeset. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 25 +++++++++++++++---------- arch/ia64/kernel/kprobes.c | 37 ++++++++++++++++++++----------------- arch/ppc64/kernel/kprobes.c | 25 +++++++++++++++---------- arch/sparc64/kernel/kprobes.c | 21 +++++++++++++-------- arch/x86_64/kernel/kprobes.c | 29 +++++++++++++++-------------- include/linux/kprobes.h | 2 +- kernel/kprobes.c | 2 +- 7 files changed, 80 insertions(+), 61 deletions(-) (limited to 'kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index ad469299267a..32b0c24ab9a6 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -153,7 +153,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) int ret = 0; kprobe_opcode_t *addr = NULL; unsigned long *lp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check if the application is using LDT entry for its code segment and * calculate the address by reading the base address from the LDT entry. @@ -221,11 +228,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobe_handler() - */ - preempt_disable(); set_current_kprobe(p, regs, kcb); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -239,6 +241,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -310,8 +313,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -455,7 +458,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -467,14 +469,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, break; case DIE_GPF: case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - rcu_read_unlock(); return ret; } @@ -537,6 +541,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) *regs = kcb->jprobe_saved_regs; memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); return 1; } return 0; diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index fddbac32d44a..96736a119c91 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -389,11 +389,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption - */ + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ return 1; } @@ -604,7 +604,14 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) int ret = 0; struct pt_regs *regs = args->regs; kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Handle recursion cases */ if (kprobe_running()) { @@ -659,11 +666,6 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobes_handler() - */ - preempt_disable(); set_current_kprobe(p, kcb); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -681,6 +683,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -716,9 +719,6 @@ static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (!cur) - return 0; - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; @@ -737,7 +737,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch(val) { case DIE_BREAK: if (pre_kprobes_handler(args)) @@ -748,12 +747,15 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: - if (kprobes_fault_handler(args->regs, args->trapnr)) + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && + kprobes_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); default: break; } - rcu_read_unlock(); return ret; } @@ -785,6 +787,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); *regs = kcb->jprobe_saved_regs; + preempt_enable_no_resched(); return 1; } diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index e0a25b35437f..511af54e6230 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -148,7 +148,14 @@ static inline int kprobe_handler(struct pt_regs *regs) struct kprobe *p; int ret = 0; unsigned int *addr = (unsigned int *)regs->nip; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check we're not actually recursing */ if (kprobe_running()) { @@ -207,11 +214,6 @@ static inline int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobe_handler(). - */ - preempt_disable(); kcb->kprobe_status = KPROBE_HIT_ACTIVE; set_current_kprobe(p, regs, kcb); if (p->pre_handler && p->pre_handler(p, regs)) @@ -224,6 +226,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -296,8 +299,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -385,7 +388,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch (val) { case DIE_BPT: if (kprobe_handler(args->regs)) @@ -396,14 +398,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - rcu_read_unlock(); return ret; } @@ -440,6 +444,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) * saved regs... */ memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs)); + preempt_enable_no_resched(); return 1; } diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index 58a815e90373..96bd09b098f4 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -113,7 +113,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) struct kprobe *p; void *addr = (void *) regs->tpc; int ret = 0; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); if (kprobe_running()) { p = get_kprobe(addr); @@ -159,11 +166,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobes_handler() - */ - preempt_disable(); set_current_kprobe(p, regs, kcb); kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) @@ -175,6 +177,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -321,7 +324,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch (val) { case DIE_DEBUG: if (kprobe_handler(args->regs)) @@ -333,14 +335,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, break; case DIE_GPF: case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - rcu_read_unlock(); return ret; } @@ -426,6 +430,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) &(kcb->jprobe_saved_stack), sizeof(kcb->jprobe_saved_stack)); + preempt_enable_no_resched(); return 1; } return 0; diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 9bef2c8dc12c..dddeb678b440 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -286,16 +286,19 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, } } -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled thorough out this function. - */ int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check we're not actually recursing */ if (kprobe_running()) { @@ -359,11 +362,6 @@ int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobe_handler() - */ - preempt_disable(); set_current_kprobe(p, regs, kcb); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -377,6 +375,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -448,8 +447,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -594,7 +593,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -606,14 +604,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, break; case DIE_GPF: case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - rcu_read_unlock(); return ret; } @@ -675,6 +675,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) *regs = kcb->jprobe_saved_regs; memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); return 1; } return 0; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index cff281cf70cf..e373c4a9de53 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -159,7 +159,7 @@ extern void show_registers(struct pt_regs *regs); extern kprobe_opcode_t *get_insn_slot(void); extern void free_insn_slot(kprobe_opcode_t *slot); -/* Get the kprobe at this addr (if any) - called under a rcu_read_lock() */ +/* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index cfef426e4cdc..5beda378cc75 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -167,7 +167,7 @@ static inline void reset_kprobe_instance(void) * This routine is called either: * - under the kprobe_lock spinlock - during kprobe_[un]register() * OR - * - under an rcu_read_lock() - from arch/xxx/kernel/kprobes.c + * - with preemption disabled - from arch/xxx/kernel/kprobes.c */ struct kprobe __kprobes *get_kprobe(void *addr) { -- cgit From 1e5d533142c1c178a31d4cc81837eb078f9269bc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 7 Nov 2005 01:01:06 -0800 Subject: [PATCH] more kernel-doc cleanups, additions Various core kernel-doc cleanups: - add missing function parameters in ipc, irq/manage, kernel/sys, kernel/sysctl, and mm/slab; - move description to just above function for kernel_restart() Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/util.c | 9 +++++---- kernel/irq/manage.c | 1 + kernel/sys.c | 15 +++++++++------ kernel/sysctl.c | 1 + mm/slab.c | 1 + 5 files changed, 17 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/ipc/util.c b/ipc/util.c index 10e836d0d89e..23f1cec150c1 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -410,7 +410,8 @@ void ipc_rcu_getref(void *ptr) } /** - * ipc_schedule_free - free ipc + rcu space + * ipc_schedule_free - free ipc + rcu space + * @head: RCU callback structure for queued work * * Since RCU callback function is called in bh, * we need to defer the vfree to schedule_work @@ -427,10 +428,10 @@ static void ipc_schedule_free(struct rcu_head *head) } /** - * ipc_immediate_free - free ipc + rcu space - * - * Free from the RCU callback context + * ipc_immediate_free - free ipc + rcu space + * @head: RCU callback structure that contains pointer to be freed * + * Free from the RCU callback context */ static void ipc_immediate_free(struct rcu_head *head) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1cfdb08ddf20..3bd7226d15fa 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -24,6 +24,7 @@ cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) + * @irq: interrupt number to wait for * * This function waits for any pending IRQ handlers for this interrupt * to complete before returning. If you use this function while diff --git a/kernel/sys.c b/kernel/sys.c index 1e1f41b3fdf6..3e332131000e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -376,18 +376,21 @@ void emergency_restart(void) } EXPORT_SYMBOL_GPL(emergency_restart); -/** - * kernel_restart - reboot the system - * - * Shutdown everything and perform a clean reboot. - * This is not safe to call in interrupt context. - */ void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); } + +/** + * kernel_restart - reboot the system + * @cmd: pointer to buffer containing command to execute for restart + * or NULL + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ void kernel_restart(char *cmd) { kernel_restart_prepare(cmd); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e1351200ce85..c4f35f96884d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1997,6 +1997,7 @@ int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, * @filp: the file structure * @buffer: the user buffer * @lenp: the size of the user buffer + * @ppos: pointer to the file position * * Reads/writes up to table->maxlen/sizeof(unsigned int) integer * values from/to the user buffer, treated as an ASCII string. diff --git a/mm/slab.c b/mm/slab.c index 1db4d7313853..e291f5e1afbb 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3263,6 +3263,7 @@ static void drain_array_locked(kmem_cache_t *cachep, /** * cache_reap - Reclaim memory from caches. + * @unused: unused parameter * * Called from workqueue/eventd every few seconds. * Purpose: -- cgit From b8887e6e8c04bcefb512cdb08fc7e9c310ac847e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 7 Nov 2005 01:01:07 -0800 Subject: [PATCH] kernel-docs: fix kernel-doc format problems Convert to proper kernel-doc format. Some have extra blank lines (not allowed immed. after the function name) or need blank lines (after all parameters). Function summary must be only one line. Colon (":") in a function description does weird things (causes kernel-doc to think that it's a new section head sadly). Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/ll_rw_blk.c | 1 - fs/fs-writeback.c | 5 +++-- include/linux/kernel.h | 1 - kernel/sys.c | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 2747741677fb..5f52e30b43f8 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -706,7 +706,6 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); /** * blk_queue_find_tag - find a request by its tag and queue - * * @q: The request queue for the device * @tag: The tag of the request * diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 1361a4a64157..785c7213a54f 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -606,7 +606,7 @@ EXPORT_SYMBOL(sync_inode); * O_SYNC flag set, to flush dirty writes to disk. * * @what is a bitmask, specifying which part of the inode's data should be - * written and waited upon: + * written and waited upon. * * OSYNC_DATA: i_mapping's dirty data * OSYNC_METADATA: the buffers at i_mapping->private_list @@ -672,8 +672,9 @@ int writeback_acquire(struct backing_dev_info *bdi) /** * writeback_in_progress: determine whether there is writeback in progress - * against a backing device. * @bdi: the device's backing_dev_info structure. + * + * Determine whether there is writeback in progress against a backing device. */ int writeback_in_progress(struct backing_dev_info *bdi) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f1925ccc9fe1..b6419489b27b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -266,7 +266,6 @@ extern void dump_stack(void); /** * container_of - cast a member of a structure out to the containing structure - * * @ptr: the pointer to the member. * @type: the type of the container struct this is embedded in. * @member: the name of the member within the struct. diff --git a/kernel/sys.c b/kernel/sys.c index 3e332131000e..bce933ebb29f 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -386,7 +386,7 @@ void kernel_restart_prepare(char *cmd) /** * kernel_restart - reboot the system * @cmd: pointer to buffer containing command to execute for restart - * or NULL + * or %NULL * * Shutdown everything and perform a clean reboot. * This is not safe to call in interrupt context. -- cgit From 47bdfb96de47d25bea423b5adbfe1c2e1ceaa296 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 01:01:39 -0800 Subject: [PATCH] unexport console_unblank I didn't find any possible modular usage of console_unblank in the kernel. This patch was already ACK'ed by Alan Cox. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/printk.c b/kernel/printk.c index 3cb9708209bc..e9be027bc930 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -806,7 +806,6 @@ void console_unblank(void) c->unblank(); release_console_sem(); } -EXPORT_SYMBOL(console_unblank); /* * Return the console tty driver structure and its associated index -- cgit From 4664957b8ec78533f542900cecf7c38fbdc0d8da Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 01:01:42 -0800 Subject: [PATCH] unexport idle_cpu I didn't find any possible modular usage in the kernel. Signed-off-by: Adrian Bunk Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 013f1448006b..3ce26954be12 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3563,8 +3563,6 @@ int idle_cpu(int cpu) return cpu_curr(cpu) == cpu_rq(cpu)->idle; } -EXPORT_SYMBOL_GPL(idle_cpu); - /** * idle_task - return the idle task for a given cpu. * @cpu: the processor in question. -- cgit From b26b9bc58263acda274f82a9dde8b6d96559878a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 01:01:43 -0800 Subject: [PATCH] unexport uts_sem I didn't find any possible modular usage in the kernel. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sys.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sys.c b/kernel/sys.c index bce933ebb29f..c43b3e22bbda 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1497,8 +1497,6 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -EXPORT_SYMBOL(uts_sem); - asmlinkage long sys_newuname(struct new_utsname __user * name) { int errno = 0; -- cgit