diff options
Diffstat (limited to 'kernel/trace/trace_stack.c')
| -rw-r--r-- | kernel/trace/trace_stack.c | 112 | 
1 files changed, 112 insertions, 0 deletions
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 5d16f73898db..ec9a34a97129 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -53,6 +53,104 @@ static void print_max_stack(void)  	}  } +/* + * The stack tracer looks for a maximum stack at each call from a function. It + * registers a callback from ftrace, and in that callback it examines the stack + * size. It determines the stack size from the variable passed in, which is the + * address of a local variable in the stack_trace_call() callback function. + * The stack size is calculated by the address of the local variable to the top + * of the current stack. If that size is smaller than the currently saved max + * stack size, nothing more is done. + * + * If the size of the stack is greater than the maximum recorded size, then the + * following algorithm takes place. + * + * For architectures (like x86) that store the function's return address before + * saving the function's local variables, the stack will look something like + * this: + * + *   [ top of stack ] + *    0: sys call entry frame + *   10: return addr to entry code + *   11: start of sys_foo frame + *   20: return addr to sys_foo + *   21: start of kernel_func_bar frame + *   30: return addr to kernel_func_bar + *   31: [ do trace stack here ] + * + * The save_stack_trace() is called returning all the functions it finds in the + * current stack. Which would be (from the bottom of the stack to the top): + * + *   return addr to kernel_func_bar + *   return addr to sys_foo + *   return addr to entry code + * + * Now to figure out how much each of these functions' local variable size is, + * a search of the stack is made to find these values. When a match is made, it + * is added to the stack_dump_trace[] array. The offset into the stack is saved + * in the stack_trace_index[] array. The above example would show: + * + *        stack_dump_trace[]        |   stack_trace_index[] + *        ------------------        +   ------------------- + *  return addr to kernel_func_bar  |          30 + *  return addr to sys_foo          |          20 + *  return addr to entry            |          10 + * + * The print_max_stack() function above, uses these values to print the size of + * each function's portion of the stack. + * + *  for (i = 0; i < nr_entries; i++) { + *     size = i == nr_entries - 1 ? stack_trace_index[i] : + *                    stack_trace_index[i] - stack_trace_index[i+1] + *     print "%d %d %d %s\n", i, stack_trace_index[i], size, stack_dump_trace[i]); + *  } + * + * The above shows + * + *     depth size  location + *     ----- ----  -------- + *  0    30   10   kernel_func_bar + *  1    20   10   sys_foo + *  2    10   10   entry code + * + * Now for architectures that might save the return address after the functions + * local variables (saving the link register before calling nested functions), + * this will cause the stack to look a little different: + * + * [ top of stack ] + *  0: sys call entry frame + * 10: start of sys_foo_frame + * 19: return addr to entry code << lr saved before calling kernel_func_bar + * 20: start of kernel_func_bar frame + * 29: return addr to sys_foo_frame << lr saved before calling next function + * 30: [ do trace stack here ] + * + * Although the functions returned by save_stack_trace() may be the same, the + * placement in the stack will be different. Using the same algorithm as above + * would yield: + * + *        stack_dump_trace[]        |   stack_trace_index[] + *        ------------------        +   ------------------- + *  return addr to kernel_func_bar  |          30 + *  return addr to sys_foo          |          29 + *  return addr to entry            |          19 + * + * Where the mapping is off by one: + * + *   kernel_func_bar stack frame size is 29 - 19 not 30 - 29! + * + * To fix this, if the architecture sets ARCH_RET_ADDR_AFTER_LOCAL_VARS the + * values in stack_trace_index[] are shifted by one to and the number of + * stack trace entries is decremented by one. + * + *        stack_dump_trace[]        |   stack_trace_index[] + *        ------------------        +   ------------------- + *  return addr to kernel_func_bar  |          29 + *  return addr to sys_foo          |          19 + * + * Although the entry function is not displayed, the first function (sys_foo) + * will still include the stack size of it. + */  static void check_stack(unsigned long ip, unsigned long *stack)  {  	unsigned long this_size, flags; unsigned long *p, *top, *start; @@ -158,6 +256,20 @@ static void check_stack(unsigned long ip, unsigned long *stack)  			i++;  	} +#ifdef ARCH_FTRACE_SHIFT_STACK_TRACER +	/* +	 * Some archs will store the link register before calling +	 * nested functions. This means the saved return address +	 * comes after the local storage, and we need to shift +	 * for that. +	 */ +	if (x > 1) { +		memmove(&stack_trace_index[0], &stack_trace_index[1], +			sizeof(stack_trace_index[0]) * (x - 1)); +		x--; +	} +#endif +  	stack_trace_nr_entries = x;  	if (task_stack_end_corrupted(current)) {  |