From d6cffbbe9a7e51eb705182965a189457c17ba8a3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 10 Feb 2017 10:35:02 +0300 Subject: proc/sysctl: prune stale dentries during unregistering Currently unregistering sysctl table does not prune its dentries. Stale dentries could slowdown sysctl operations significantly. For example, command: # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done creates a millions of stale denties around sysctls of loopback interface: # sysctl fs.dentry-state fs.dentry-state = 25812579 24724135 45 0 0 0 All of them have matching names thus lookup have to scan though whole hash chain and call d_compare (proc_sys_compare) which checks them under system-wide spinlock (sysctl_lock). # time sysctl -a > /dev/null real 1m12.806s user 0m0.016s sys 1m12.400s Currently only memory reclaimer could remove this garbage. But without significant memory pressure this never happens. This patch collects sysctl inodes into list on sysctl table header and prunes all their dentries once that table unregisters. Konstantin Khlebnikov writes: > On 10.02.2017 10:47, Al Viro wrote: >> how about >> the matching stats *after* that patch? > > dcache size doesn't grow endlessly, so stats are fine > > # sysctl fs.dentry-state > fs.dentry-state = 92712 58376 45 0 0 0 > > # time sysctl -a &>/dev/null > > real 0m0.013s > user 0m0.004s > sys 0m0.008s Signed-off-by: Konstantin Khlebnikov Suggested-by: Al Viro Signed-off-by: Eric W. Biederman --- fs/proc/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/proc/inode.c') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 842a5ff5b85c..7ad9ed7958af 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -43,10 +43,11 @@ static void proc_evict_inode(struct inode *inode) de = PDE(inode); if (de) pde_put(de); + head = PROC_I(inode)->sysctl; if (head) { RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); - sysctl_head_put(head); + proc_sys_evict_inode(inode, head); } } -- cgit From 796f571b0c5cf3efd2f652779770fa7bbbc2bb03 Mon Sep 17 00:00:00 2001 From: Lafcadio Wluiki Date: Fri, 24 Feb 2017 15:00:23 -0800 Subject: procfs: use an enum for possible hidepid values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the hidepid parameter was checked by comparing literal integers 0, 1, 2. Let's add a proper enum for this, to make the checking more expressive: 0 → HIDEPID_OFF 1 → HIDEPID_NO_ACCESS 2 → HIDEPID_INVISIBLE This changes the internal labelling only, the userspace-facing interface remains unmodified, and still works with literal integers 0, 1, 2. No functional changes. Link: http://lkml.kernel.org/r/1484572984-13388-2-git-send-email-djalal@gmail.com Signed-off-by: Lafcadio Wluiki Signed-off-by: Djalal Harouni Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 8 ++++---- fs/proc/inode.c | 2 +- fs/proc/root.c | 3 ++- include/linux/pid_namespace.h | 6 ++++++ 4 files changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/proc/inode.c') diff --git a/fs/proc/base.c b/fs/proc/base.c index 4ecb5edc3c61..b8f06273353e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -697,11 +697,11 @@ static int proc_pid_permission(struct inode *inode, int mask) task = get_proc_task(inode); if (!task) return -ESRCH; - has_perms = has_pid_permissions(pid, task, 1); + has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { - if (pid->hide_pid == 2) { + if (pid->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process @@ -1737,7 +1737,7 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) stat->gid = GLOBAL_ROOT_GID; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { - if (!has_pid_permissions(pid, task, 2)) { + if (!has_pid_permissions(pid, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, @@ -3168,7 +3168,7 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) int len; cond_resched(); - if (!has_pid_permissions(ns, iter.task, 2)) + if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%d", iter.tgid); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 7ad9ed7958af..2cc7a8030275 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -107,7 +107,7 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) if (!gid_eq(pid->pid_gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, pid->pid_gid)); - if (pid->hide_pid != 0) + if (pid->hide_pid != HIDEPID_OFF) seq_printf(seq, ",hidepid=%u", pid->hide_pid); return 0; diff --git a/fs/proc/root.c b/fs/proc/root.c index 1988440b2049..b90da888b81a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -58,7 +58,8 @@ int proc_parse_options(char *options, struct pid_namespace *pid) case Opt_hidepid: if (match_int(&args[0], &option)) return 0; - if (option < 0 || option > 2) { + if (option < HIDEPID_OFF || + option > HIDEPID_INVISIBLE) { pr_err("proc: hidepid value must be between 0 and 2.\n"); return 0; } diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 34cce96741bc..c2a989dee876 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -21,6 +21,12 @@ struct pidmap { struct fs_pin; +enum { /* definitions for pid_namespace's hide_pid field */ + HIDEPID_OFF = 0, + HIDEPID_NO_ACCESS = 1, + HIDEPID_INVISIBLE = 2, +}; + struct pid_namespace { struct kref kref; struct pidmap pidmap[PIDMAP_ENTRIES]; -- cgit