aboutsummaryrefslogtreecommitdiff
path: root/kernel/module.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/module.c')
-rw-r--r--kernel/module.c222
1 files changed, 146 insertions, 76 deletions
diff --git a/kernel/module.c b/kernel/module.c
index c968d3606dca..a8014bfb5a4e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -59,8 +59,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/module.h>
-EXPORT_TRACEPOINT_SYMBOL(module_get);
-
#if 0
#define DEBUGP printk
#else
@@ -79,6 +77,10 @@ EXPORT_TRACEPOINT_SYMBOL(module_get);
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
+#ifdef CONFIG_KGDB_KDB
+struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
+#endif /* CONFIG_KGDB_KDB */
+
/* Block module loading/unloading? */
int modules_disabled = 0;
@@ -370,27 +372,33 @@ EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
-static void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
+static inline void __percpu *mod_percpu(struct module *mod)
{
- void *ptr;
+ return mod->percpu;
+}
+static int percpu_modalloc(struct module *mod,
+ unsigned long size, unsigned long align)
+{
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
- name, align, PAGE_SIZE);
+ mod->name, align, PAGE_SIZE);
align = PAGE_SIZE;
}
- ptr = __alloc_reserved_percpu(size, align);
- if (!ptr)
+ mod->percpu = __alloc_reserved_percpu(size, align);
+ if (!mod->percpu) {
printk(KERN_WARNING
"Could not allocate %lu bytes percpu data\n", size);
- return ptr;
+ return -ENOMEM;
+ }
+ mod->percpu_size = size;
+ return 0;
}
-static void percpu_modfree(void *freeme)
+static void percpu_modfree(struct module *mod)
{
- free_percpu(freeme);
+ free_percpu(mod->percpu);
}
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
@@ -400,24 +408,62 @@ static unsigned int find_pcpusec(Elf_Ehdr *hdr,
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
-static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+static void percpu_modcopy(struct module *mod,
+ const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
- memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+ memcpy(per_cpu_ptr(mod->percpu, cpu), from, size);
+}
+
+/**
+ * is_module_percpu_address - test whether address is from module static percpu
+ * @addr: address to test
+ *
+ * Test whether @addr belongs to module static percpu area.
+ *
+ * RETURNS:
+ * %true if @addr is from module static percpu area
+ */
+bool is_module_percpu_address(unsigned long addr)
+{
+ struct module *mod;
+ unsigned int cpu;
+
+ preempt_disable();
+
+ list_for_each_entry_rcu(mod, &modules, list) {
+ if (!mod->percpu_size)
+ continue;
+ for_each_possible_cpu(cpu) {
+ void *start = per_cpu_ptr(mod->percpu, cpu);
+
+ if ((void *)addr >= start &&
+ (void *)addr < start + mod->percpu_size) {
+ preempt_enable();
+ return true;
+ }
+ }
+ }
+
+ preempt_enable();
+ return false;
}
#else /* ... !CONFIG_SMP */
-static inline void *percpu_modalloc(unsigned long size, unsigned long align,
- const char *name)
+static inline void __percpu *mod_percpu(struct module *mod)
{
return NULL;
}
-static inline void percpu_modfree(void *pcpuptr)
+static inline int percpu_modalloc(struct module *mod,
+ unsigned long size, unsigned long align)
+{
+ return -ENOMEM;
+}
+static inline void percpu_modfree(struct module *mod)
{
- BUG();
}
static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
@@ -425,12 +471,16 @@ static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
{
return 0;
}
-static inline void percpu_modcopy(void *pcpudst, const void *src,
- unsigned long size)
+static inline void percpu_modcopy(struct module *mod,
+ const void *from, unsigned long size)
{
/* pcpusec should be 0, and size of that section should be 0. */
BUG_ON(size != 0);
}
+bool is_module_percpu_address(unsigned long addr)
+{
+ return false;
+}
#endif /* CONFIG_SMP */
@@ -467,17 +517,22 @@ MODINFO_ATTR(srcversion);
static char last_unloaded_module[MODULE_NAME_LEN+1];
#ifdef CONFIG_MODULE_UNLOAD
+
+EXPORT_TRACEPOINT_SYMBOL(module_get);
+
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
int cpu;
INIT_LIST_HEAD(&mod->modules_which_use_me);
- for_each_possible_cpu(cpu)
- per_cpu_ptr(mod->refptr, cpu)->count = 0;
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(mod->refptr, cpu)->incs = 0;
+ per_cpu_ptr(mod->refptr, cpu)->decs = 0;
+ }
/* Hold reference count during initialization. */
- __this_cpu_write(mod->refptr->count, 1);
+ __this_cpu_write(mod->refptr->incs, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -510,33 +565,26 @@ int use_module(struct module *a, struct module *b)
struct module_use *use;
int no_warn, err;
- if (b == NULL || already_uses(a, b)) return 1;
-
- /* If we're interrupted or time out, we fail. */
- if (wait_event_interruptible_timeout(
- module_wq, (err = strong_try_module_get(b)) != -EBUSY,
- 30 * HZ) <= 0) {
- printk("%s: gave up waiting for init of module %s.\n",
- a->name, b->name);
+ if (b == NULL || already_uses(a, b))
return 0;
- }
- /* If strong_try_module_get() returned a different error, we fail. */
+ /* If we're interrupted or time out, we fail. */
+ err = strong_try_module_get(b);
if (err)
- return 0;
+ return err;
DEBUGP("Allocating new usage for %s.\n", a->name);
use = kmalloc(sizeof(*use), GFP_ATOMIC);
if (!use) {
printk("%s: out of memory loading\n", a->name);
module_put(b);
- return 0;
+ return -ENOMEM;
}
use->module_which_uses = a;
list_add(&use->list, &b->modules_which_use_me);
no_warn = sysfs_create_link(b->holders_dir, &a->mkobj.kobj, a->name);
- return 1;
+ return 0;
}
EXPORT_SYMBOL_GPL(use_module);
@@ -616,12 +664,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
unsigned int module_refcount(struct module *mod)
{
- unsigned int total = 0;
+ unsigned int incs = 0, decs = 0;
int cpu;
for_each_possible_cpu(cpu)
- total += per_cpu_ptr(mod->refptr, cpu)->count;
- return total;
+ decs += per_cpu_ptr(mod->refptr, cpu)->decs;
+ /*
+ * ensure the incs are added up after the decs.
+ * module_put ensures incs are visible before decs with smp_wmb.
+ *
+ * This 2-count scheme avoids the situation where the refcount
+ * for CPU0 is read, then CPU0 increments the module refcount,
+ * then CPU1 drops that refcount, then the refcount for CPU1 is
+ * read. We would record a decrement but not its corresponding
+ * increment so we would see a low count (disaster).
+ *
+ * Rare situation? But module_refcount can be preempted, and we
+ * might be tallying up 4096+ CPUs. So it is not impossible.
+ */
+ smp_rmb();
+ for_each_possible_cpu(cpu)
+ incs += per_cpu_ptr(mod->refptr, cpu)->incs;
+ return incs - decs;
}
EXPORT_SYMBOL(module_refcount);
@@ -657,16 +721,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
- /* Create stop_machine threads since free_module relies on
- * a non-failing stop_machine call. */
- ret = stop_machine_create();
- if (ret)
- return ret;
-
- if (mutex_lock_interruptible(&module_mutex) != 0) {
- ret = -EINTR;
- goto out_stop;
- }
+ if (mutex_lock_interruptible(&module_mutex) != 0)
+ return -EINTR;
mod = find_module(name);
if (!mod) {
@@ -726,8 +782,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
out:
mutex_unlock(&module_mutex);
-out_stop:
- stop_machine_destroy();
return ret;
}
@@ -798,10 +852,10 @@ void module_put(struct module *module)
{
if (module) {
preempt_disable();
- __this_cpu_dec(module->refptr->count);
+ smp_wmb(); /* see comment in module_refcount */
+ __this_cpu_inc(module->refptr->decs);
- trace_module_put(module, _RET_IP_,
- __this_cpu_read(module->refptr->count));
+ trace_module_put(module, _RET_IP_);
/* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter);
@@ -823,7 +877,7 @@ static inline void module_unload_free(struct module *mod)
int use_module(struct module *a, struct module *b)
{
- return strong_try_module_get(b) == 0;
+ return strong_try_module_get(b);
}
EXPORT_SYMBOL_GPL(use_module);
@@ -994,17 +1048,39 @@ static const struct kernel_symbol *resolve_symbol(Elf_Shdr *sechdrs,
struct module *owner;
const struct kernel_symbol *sym;
const unsigned long *crc;
+ DEFINE_WAIT(wait);
+ int err;
+ long timeleft = 30 * HZ;
+again:
sym = find_symbol(name, &owner, &crc,
!(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
- /* use_module can fail due to OOM,
- or module initialization or unloading */
- if (sym) {
- if (!check_version(sechdrs, versindex, name, mod, crc, owner)
- || !use_module(mod, owner))
- sym = NULL;
+ if (!sym)
+ return NULL;
+
+ if (!check_version(sechdrs, versindex, name, mod, crc, owner))
+ return NULL;
+
+ prepare_to_wait(&module_wq, &wait, TASK_INTERRUPTIBLE);
+ err = use_module(mod, owner);
+ if (likely(!err) || err != -EBUSY || signal_pending(current)) {
+ finish_wait(&module_wq, &wait);
+ return err ? NULL : sym;
}
- return sym;
+
+ /* Module is still loading. Drop lock and wait. */
+ mutex_unlock(&module_mutex);
+ timeleft = schedule_timeout(timeleft);
+ mutex_lock(&module_mutex);
+ finish_wait(&module_wq, &wait);
+
+ /* Module might be gone entirely, or replaced. Re-lookup. */
+ if (timeleft)
+ goto again;
+
+ printk(KERN_WARNING "%s: gave up waiting for init of module %s.\n",
+ mod->name, owner->name);
+ return NULL;
}
/*
@@ -1125,7 +1201,7 @@ struct module_notes_attrs {
struct bin_attribute attrs[0];
};
-static ssize_t module_notes_read(struct kobject *kobj,
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
@@ -1400,8 +1476,7 @@ static void free_module(struct module *mod)
/* This may be NULL, but that's OK */
module_free(mod, mod->module_init);
kfree(mod->args);
- if (mod->percpu)
- percpu_modfree(mod->percpu);
+ percpu_modfree(mod);
#if defined(CONFIG_MODULE_UNLOAD)
if (mod->refptr)
free_percpu(mod->refptr);
@@ -1520,7 +1595,7 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
default:
/* Divert to percpu allocation if a percpu var. */
if (sym[i].st_shndx == pcpuindex)
- secbase = (unsigned long)mod->percpu;
+ secbase = (unsigned long)mod_percpu(mod);
else
secbase = sechdrs[sym[i].st_shndx].sh_addr;
sym[i].st_value += secbase;
@@ -1954,7 +2029,7 @@ static noinline struct module *load_module(void __user *umod,
unsigned int modindex, versindex, infoindex, pcpuindex;
struct module *mod;
long err = 0;
- void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+ void *ptr = NULL; /* Stops spurious gcc warning */
unsigned long symoffs, stroffs, *strmap;
mm_segment_t old_fs;
@@ -2094,15 +2169,11 @@ static noinline struct module *load_module(void __user *umod,
if (pcpuindex) {
/* We have a special allocation for this section. */
- percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
- sechdrs[pcpuindex].sh_addralign,
- mod->name);
- if (!percpu) {
- err = -ENOMEM;
+ err = percpu_modalloc(mod, sechdrs[pcpuindex].sh_size,
+ sechdrs[pcpuindex].sh_addralign);
+ if (err)
goto free_mod;
- }
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
- mod->percpu = percpu;
}
/* Determine total sizes, and put offsets in sh_entsize. For now
@@ -2317,7 +2388,7 @@ static noinline struct module *load_module(void __user *umod,
sort_extable(mod->extable, mod->extable + mod->num_exentries);
/* Finally, copy percpu area over. */
- percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
+ percpu_modcopy(mod, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);
add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
@@ -2409,8 +2480,7 @@ static noinline struct module *load_module(void __user *umod,
module_free(mod, mod->module_core);
/* mod will be freed with core. Don't access it beyond this line! */
free_percpu:
- if (percpu)
- percpu_modfree(percpu);
+ percpu_modfree(mod);
free_mod:
kfree(args);
kfree(strmap);