diff options
Diffstat (limited to 'arch/arm/kernel/module-plts.c')
| -rw-r--r-- | arch/arm/kernel/module-plts.c | 243 | 
1 files changed, 144 insertions, 99 deletions
diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c index 0c7efc3446c0..3a5cba90c971 100644 --- a/arch/arm/kernel/module-plts.c +++ b/arch/arm/kernel/module-plts.c @@ -9,6 +9,7 @@  #include <linux/elf.h>  #include <linux/kernel.h>  #include <linux/module.h> +#include <linux/sort.h>  #include <asm/cache.h>  #include <asm/opcodes.h> @@ -30,154 +31,198 @@ struct plt_entries {  	u32	lit[PLT_ENT_COUNT];  }; -static bool in_init(const struct module *mod, u32 addr) +u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)  { -	return addr - (u32)mod->init_layout.base < mod->init_layout.size; +	struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr; +	int idx = 0; + +	/* +	 * Look for an existing entry pointing to 'val'. Given that the +	 * relocations are sorted, this will be the last entry we allocated. +	 * (if one exists). +	 */ +	if (mod->arch.plt_count > 0) { +		plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT; +		idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT; + +		if (plt->lit[idx] == val) +			return (u32)&plt->ldr[idx]; + +		idx = (idx + 1) % PLT_ENT_COUNT; +		if (!idx) +			plt++; +	} + +	mod->arch.plt_count++; +	BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size); + +	if (!idx) +		/* Populate a new set of entries */ +		*plt = (struct plt_entries){ +			{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, +			{ val, } +		}; +	else +		plt->lit[idx] = val; + +	return (u32)&plt->ldr[idx];  } -u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val) +#define cmp_3way(a,b)	((a) < (b) ? -1 : (a) > (b)) + +static int cmp_rel(const void *a, const void *b)  { -	struct plt_entries *plt, *plt_end; -	int c, *count; - -	if (in_init(mod, loc)) { -		plt = (void *)mod->arch.init_plt->sh_addr; -		plt_end = (void *)plt + mod->arch.init_plt->sh_size; -		count = &mod->arch.init_plt_count; -	} else { -		plt = (void *)mod->arch.core_plt->sh_addr; -		plt_end = (void *)plt + mod->arch.core_plt->sh_size; -		count = &mod->arch.core_plt_count; -	} +	const Elf32_Rel *x = a, *y = b; +	int i; -	/* Look for an existing entry pointing to 'val' */ -	for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) { -		int i; - -		if (!c) { -			/* Populate a new set of entries */ -			*plt = (struct plt_entries){ -				{ [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, }, -				{ val, } -			}; -			++*count; -			return (u32)plt->ldr; -		} -		for (i = 0; i < PLT_ENT_COUNT; i++) { -			if (!plt->lit[i]) { -				plt->lit[i] = val; -				++*count; -			} -			if (plt->lit[i] == val) -				return (u32)&plt->ldr[i]; -		} +	/* sort by type and symbol index */ +	i = cmp_3way(ELF32_R_TYPE(x->r_info), ELF32_R_TYPE(y->r_info)); +	if (i == 0) +		i = cmp_3way(ELF32_R_SYM(x->r_info), ELF32_R_SYM(y->r_info)); +	return i; +} + +static bool is_zero_addend_relocation(Elf32_Addr base, const Elf32_Rel *rel) +{ +	u32 *tval = (u32 *)(base + rel->r_offset); + +	/* +	 * Do a bitwise compare on the raw addend rather than fully decoding +	 * the offset and doing an arithmetic comparison. +	 * Note that a zero-addend jump/call relocation is encoded taking the +	 * PC bias into account, i.e., -8 for ARM and -4 for Thumb2. +	 */ +	switch (ELF32_R_TYPE(rel->r_info)) { +		u16 upper, lower; + +	case R_ARM_THM_CALL: +	case R_ARM_THM_JUMP24: +		upper = __mem_to_opcode_thumb16(((u16 *)tval)[0]); +		lower = __mem_to_opcode_thumb16(((u16 *)tval)[1]); + +		return (upper & 0x7ff) == 0x7ff && (lower & 0x2fff) == 0x2ffe; + +	case R_ARM_CALL: +	case R_ARM_PC24: +	case R_ARM_JUMP24: +		return (__mem_to_opcode_arm(*tval) & 0xffffff) == 0xfffffe;  	}  	BUG();  } -static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num, -			   u32 mask) +static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)  { -	u32 *loc1, *loc2; -	int i; +	const Elf32_Rel *prev; -	for (i = 0; i < num; i++) { -		if (rel[i].r_info != rel[num].r_info) -			continue; +	/* +	 * Entries are sorted by type and symbol index. That means that, +	 * if a duplicate entry exists, it must be in the preceding +	 * slot. +	 */ +	if (!num) +		return false; -		/* -		 * Identical relocation types against identical symbols can -		 * still result in different PLT entries if the addend in the -		 * place is different. So resolve the target of the relocation -		 * to compare the values. -		 */ -		loc1 = (u32 *)(base + rel[i].r_offset); -		loc2 = (u32 *)(base + rel[num].r_offset); -		if (((*loc1 ^ *loc2) & mask) == 0) -			return 1; -	} -	return 0; +	prev = rel + num - 1; +	return cmp_rel(rel + num, prev) == 0 && +	       is_zero_addend_relocation(base, prev);  }  /* Count how many PLT entries we may need */ -static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num) +static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base, +			       const Elf32_Rel *rel, int num)  {  	unsigned int ret = 0; +	const Elf32_Sym *s;  	int i; -	/* -	 * Sure, this is order(n^2), but it's usually short, and not -	 * time critical -	 */ -	for (i = 0; i < num; i++) +	for (i = 0; i < num; i++) {  		switch (ELF32_R_TYPE(rel[i].r_info)) {  		case R_ARM_CALL:  		case R_ARM_PC24:  		case R_ARM_JUMP24: -			if (!duplicate_rel(base, rel, i, -					   __opcode_to_mem_arm(0x00ffffff))) -				ret++; -			break; -#ifdef CONFIG_THUMB2_KERNEL  		case R_ARM_THM_CALL:  		case R_ARM_THM_JUMP24: -			if (!duplicate_rel(base, rel, i, -					   __opcode_to_mem_thumb32(0x07ff2fff))) +			/* +			 * We only have to consider branch targets that resolve +			 * to undefined symbols. This is not simply a heuristic, +			 * it is a fundamental limitation, since the PLT itself +			 * is part of the module, and needs to be within range +			 * as well, so modules can never grow beyond that limit. +			 */ +			s = syms + ELF32_R_SYM(rel[i].r_info); +			if (s->st_shndx != SHN_UNDEF) +				break; + +			/* +			 * Jump relocations with non-zero addends against +			 * undefined symbols are supported by the ELF spec, but +			 * do not occur in practice (e.g., 'jump n bytes past +			 * the entry point of undefined function symbol f'). +			 * So we need to support them, but there is no need to +			 * take them into consideration when trying to optimize +			 * this code. So let's only check for duplicates when +			 * the addend is zero. +			 */ +			if (!is_zero_addend_relocation(base, rel + i) || +			    !duplicate_rel(base, rel, i))  				ret++; -#endif  		} +	}  	return ret;  }  int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  			      char *secstrings, struct module *mod)  { -	unsigned long core_plts = 0, init_plts = 0; +	unsigned long plts = 0;  	Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; +	Elf32_Sym *syms = NULL;  	/*  	 * To store the PLTs, we expand the .text section for core module code -	 * and the .init.text section for initialization code. +	 * and for initialization code.  	 */ -	for (s = sechdrs; s < sechdrs_end; ++s) -		if (strcmp(".core.plt", secstrings + s->sh_name) == 0) -			mod->arch.core_plt = s; -		else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) -			mod->arch.init_plt = s; - -	if (!mod->arch.core_plt || !mod->arch.init_plt) { -		pr_err("%s: sections missing\n", mod->name); +	for (s = sechdrs; s < sechdrs_end; ++s) { +		if (strcmp(".plt", secstrings + s->sh_name) == 0) +			mod->arch.plt = s; +		else if (s->sh_type == SHT_SYMTAB) +			syms = (Elf32_Sym *)s->sh_addr; +	} + +	if (!mod->arch.plt) { +		pr_err("%s: module PLT section missing\n", mod->name); +		return -ENOEXEC; +	} +	if (!syms) { +		pr_err("%s: module symtab section missing\n", mod->name);  		return -ENOEXEC;  	}  	for (s = sechdrs + 1; s < sechdrs_end; ++s) { -		const Elf32_Rel *rels = (void *)ehdr + s->sh_offset; +		Elf32_Rel *rels = (void *)ehdr + s->sh_offset;  		int numrels = s->sh_size / sizeof(Elf32_Rel);  		Elf32_Shdr *dstsec = sechdrs + s->sh_info;  		if (s->sh_type != SHT_REL)  			continue; -		if (strstr(secstrings + s->sh_name, ".init")) -			init_plts += count_plts(dstsec->sh_addr, rels, numrels); -		else -			core_plts += count_plts(dstsec->sh_addr, rels, numrels); +		/* ignore relocations that operate on non-exec sections */ +		if (!(dstsec->sh_flags & SHF_EXECINSTR)) +			continue; + +		/* sort by type and symbol index */ +		sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL); + +		plts += count_plts(syms, dstsec->sh_addr, rels, numrels);  	} -	mod->arch.core_plt->sh_type = SHT_NOBITS; -	mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; -	mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES; -	mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE, -					       sizeof(struct plt_entries)); -	mod->arch.core_plt_count = 0; - -	mod->arch.init_plt->sh_type = SHT_NOBITS; -	mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; -	mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES; -	mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE, -					       sizeof(struct plt_entries)); -	mod->arch.init_plt_count = 0; -	pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__, -		 mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size); +	mod->arch.plt->sh_type = SHT_NOBITS; +	mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; +	mod->arch.plt->sh_addralign = L1_CACHE_BYTES; +	mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE, +					  sizeof(struct plt_entries)); +	mod->arch.plt_count = 0; + +	pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);  	return 0;  }  |