| /* |
| * Chromium OS alt-syscall tables |
| * |
| * Copyright (C) 2015 Google, Inc. |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License version 2 as |
| * published by the Free Software Foundation. |
| */ |
| |
| #include <linux/alt-syscall.h> |
| #include <linux/compat.h> |
| #include <linux/fs.h> |
| #include <linux/init.h> |
| #include <linux/kernel.h> |
| #include <linux/kcmp.h> |
| #include <linux/module.h> |
| #include <linux/prctl.h> |
| #include <linux/sched/types.h> |
| #include <linux/slab.h> |
| #include <linux/socket.h> |
| #include <linux/syscalls.h> |
| #include <linux/timex.h> |
| #include <uapi/linux/sched/types.h> |
| |
| #include <asm/unistd.h> |
| |
| #include "alt-syscall.h" |
| #include "android_whitelists.h" |
| #include "complete_whitelists.h" |
| #include "read_write_test_whitelists.h" |
| #include "third_party_whitelists.h" |
| |
| /* Intercept and log blocked syscalls. */ |
| static asmlinkage long block_syscall(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| |
| pr_warn_ratelimited("[%d] %s: blocked syscall %d\n", task_pid_nr(task), |
| task->comm, syscall_get_nr(task, regs)); |
| |
| return -ENOSYS; |
| } |
| |
| /* |
| * In permissive mode, warn that the syscall was blocked, but still allow |
| * it to go through. Note that since we don't have an easy way to map from |
| * syscall to number of arguments, we pass the maximum (6). |
| */ |
| static asmlinkage long warn_syscall(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| int nr = syscall_get_nr(task, regs); |
| sys_call_ptr_t fn = (sys_call_ptr_t)default_table.table[nr]; |
| |
| pr_warn_ratelimited("[%d] %s: syscall %d not whitelisted\n", |
| task_pid_nr(task), task->comm, nr); |
| |
| return fn(regs); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| static asmlinkage long warn_compat_syscall(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| int nr = syscall_get_nr(task, regs); |
| sys_call_ptr_t fn = (sys_call_ptr_t)default_table.compat_table[nr]; |
| |
| pr_warn_ratelimited("[%d] %s: compat syscall %d not whitelisted\n", |
| task_pid_nr(task), task->comm, nr); |
| |
| return fn(regs); |
| } |
| #endif /* CONFIG_COMPAT */ |
| |
| static asmlinkage long alt_sys_prctl(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| |
| syscall_get_arguments(task, regs, args); |
| |
| if (args[0] == PR_ALT_SYSCALL && |
| args[1] == PR_ALT_SYSCALL_SET_SYSCALL_TABLE) |
| return -EPERM; |
| |
| return ksys_prctl(args[0], args[1], args[2], args[3], args[4]); |
| } |
| |
| /* Thread priority used by Android. */ |
| #define ANDROID_PRIORITY_FOREGROUND -2 |
| #define ANDROID_PRIORITY_DISPLAY -4 |
| #define ANDROID_PRIORITY_URGENT_DISPLAY -8 |
| #define ANDROID_PRIORITY_AUDIO -16 |
| #define ANDROID_PRIORITY_URGENT_AUDIO -19 |
| #define ANDROID_PRIORITY_HIGHEST -20 |
| |
| /* Reduced priority when running inside container. */ |
| #define CONTAINER_PRIORITY_FOREGROUND -1 |
| #define CONTAINER_PRIORITY_DISPLAY -2 |
| #define CONTAINER_PRIORITY_URGENT_DISPLAY -4 |
| #define CONTAINER_PRIORITY_AUDIO -8 |
| #define CONTAINER_PRIORITY_URGENT_AUDIO -9 |
| #define CONTAINER_PRIORITY_HIGHEST -10 |
| |
| /* |
| * TODO(mortonm): Move the implementation of these Android-specific |
| * alt-syscalls (starting with android_*) to their own .c file. |
| */ |
| static asmlinkage long android_getpriority(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| long prio, nice; |
| unsigned long args[6]; |
| int which, who; |
| |
| syscall_get_arguments(task, regs, args); |
| which = args[0]; |
| who = args[1]; |
| |
| prio = ksys_getpriority(which, who); |
| if (prio <= 20) |
| return prio; |
| |
| nice = -(prio - 20); |
| switch (nice) { |
| case CONTAINER_PRIORITY_FOREGROUND: |
| nice = ANDROID_PRIORITY_FOREGROUND; |
| break; |
| case CONTAINER_PRIORITY_DISPLAY: |
| nice = ANDROID_PRIORITY_DISPLAY; |
| break; |
| case CONTAINER_PRIORITY_URGENT_DISPLAY: |
| nice = ANDROID_PRIORITY_URGENT_DISPLAY; |
| break; |
| case CONTAINER_PRIORITY_AUDIO: |
| nice = ANDROID_PRIORITY_AUDIO; |
| break; |
| case CONTAINER_PRIORITY_URGENT_AUDIO: |
| nice = ANDROID_PRIORITY_URGENT_AUDIO; |
| break; |
| case CONTAINER_PRIORITY_HIGHEST: |
| nice = ANDROID_PRIORITY_HIGHEST; |
| break; |
| } |
| |
| return -nice + 20; |
| } |
| |
| static asmlinkage long android_keyctl(struct pt_regs *regs) |
| { |
| return -EACCES; |
| } |
| |
| #ifdef CONFIG_KCMP |
| static asmlinkage long android_kcmp(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| pid_t pid1, pid2; |
| int type; |
| unsigned long idx1, idx2; |
| |
| syscall_get_arguments(task, regs, args); |
| type = args[2]; |
| if (type == KCMP_SYSVSEM) |
| return -ENOSYS; |
| |
| pid1 = args[0]; |
| pid2 = args[1]; |
| idx1 = args[3]; |
| idx2 = args[4]; |
| return ksys_kcmp(pid1, pid2, type, idx1, idx2); |
| } |
| #endif |
| |
| static asmlinkage long android_setpriority(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| int which, who, niceval; |
| |
| syscall_get_arguments(task, regs, args); |
| which = args[0]; |
| who = args[1]; |
| niceval = args[2]; |
| |
| if (niceval < 0) { |
| if (niceval < -20) |
| niceval = -20; |
| niceval = niceval / 2; |
| } |
| return ksys_setpriority(which, who, niceval); |
| } |
| |
| static asmlinkage long |
| do_android_sched_setscheduler(pid_t pid, int policy, |
| struct sched_param __user *param) |
| { |
| struct sched_param lparam; |
| struct task_struct *p; |
| long retval; |
| |
| if (!param || pid < 0) |
| return -EINVAL; |
| if (copy_from_user(&lparam, param, sizeof(struct sched_param))) |
| return -EFAULT; |
| |
| rcu_read_lock(); |
| retval = -ESRCH; |
| p = pid ? find_task_by_vpid(pid) : current; |
| if (likely(p)) |
| get_task_struct(p); |
| rcu_read_unlock(); |
| |
| if (likely(p)) { |
| const struct cred *cred = current_cred(); |
| kuid_t android_root_uid, android_system_uid; |
| |
| /* |
| * Allow root(0) and system(1000) processes to set RT scheduler. |
| * |
| * The system_server process run under system provides |
| * SchedulingPolicyService which is used by audioflinger and |
| * other services to boost their threads, so allow it to set RT |
| * scheduler for other threads. |
| */ |
| android_root_uid = make_kuid(cred->user_ns, 0); |
| android_system_uid = make_kuid(cred->user_ns, 1000); |
| if ((uid_eq(cred->euid, android_root_uid) || |
| uid_eq(cred->euid, android_system_uid)) && |
| ns_capable(cred->user_ns, CAP_SYS_NICE)) |
| retval = sched_setscheduler_nocheck(p, policy, &lparam); |
| else |
| retval = sched_setscheduler(p, policy, &lparam); |
| put_task_struct(p); |
| } |
| |
| return retval; |
| } |
| |
| static asmlinkage long |
| android_sched_setscheduler(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| pid_t pid; |
| int policy; |
| struct sched_param __user *param; |
| |
| syscall_get_arguments(task, regs, args); |
| pid = args[0]; |
| policy = args[1]; |
| param = (struct sched_param __user *)args[2]; |
| |
| /* negative values for policy are not valid */ |
| if (policy < 0) |
| return -EINVAL; |
| return do_android_sched_setscheduler(pid, policy, param); |
| } |
| |
| /* |
| * sched_setparam() passes in -1 for its policy, to let the functions |
| * it calls know not to change it. |
| */ |
| #define SETPARAM_POLICY -1 |
| |
| static asmlinkage long android_sched_setparam(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| pid_t pid; |
| struct sched_param __user *param; |
| |
| syscall_get_arguments(task, regs, args); |
| pid = args[0]; |
| param = (struct sched_param __user *)args[1]; |
| |
| return do_android_sched_setscheduler(pid, SETPARAM_POLICY, param); |
| } |
| |
| static asmlinkage long __maybe_unused android_socket(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| int domain, type, socket; |
| |
| syscall_get_arguments(task, regs, args); |
| domain = args[0]; |
| type = args[1]; |
| socket = args[2]; |
| |
| if (domain == AF_VSOCK) |
| return -EACCES; |
| return __sys_socket(domain, type, socket); |
| } |
| |
| static asmlinkage long android_perf_event_open(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| struct perf_event_attr __user *attr_uptr; |
| pid_t pid; |
| int cpu, group_fd; |
| unsigned long flags; |
| |
| if (!allow_devmode_syscalls) |
| return -EACCES; |
| |
| syscall_get_arguments(task, regs, args); |
| attr_uptr = (struct perf_event_attr __user *)args[0]; |
| pid = args[1]; |
| cpu = args[2]; |
| group_fd = args[3]; |
| flags = args[4]; |
| |
| return ksys_perf_event_open(attr_uptr, pid, cpu, group_fd, flags); |
| } |
| |
| static asmlinkage long android_adjtimex(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| struct __kernel_timex kbuf; |
| struct __kernel_timex __user *buf; |
| unsigned long args[6]; |
| |
| syscall_get_arguments(task, regs, args); |
| buf = (struct __kernel_timex __user *)args[0]; |
| |
| /* adjtimex() is allowed only for read. */ |
| if (copy_from_user(&kbuf, buf, sizeof(struct __kernel_timex))) |
| return -EFAULT; |
| if (kbuf.modes != 0) |
| return -EPERM; |
| return ksys_adjtimex(buf); |
| } |
| |
| static asmlinkage long android_clock_adjtime(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| struct __kernel_timex kbuf; |
| unsigned long args[6]; |
| clockid_t which_clock; |
| struct __kernel_timex __user *buf; |
| |
| syscall_get_arguments(task, regs, args); |
| which_clock = args[0]; |
| buf = (struct __kernel_timex __user *)args[1]; |
| |
| /* clock_adjtime() is allowed only for read. */ |
| if (copy_from_user(&kbuf, buf, sizeof(struct __kernel_timex))) |
| return -EFAULT; |
| if (kbuf.modes != 0) |
| return -EPERM; |
| return ksys_clock_adjtime(which_clock, buf); |
| } |
| |
| static asmlinkage long android_getcpu(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| unsigned long args[6]; |
| unsigned __user *cpu; |
| unsigned __user *node; |
| struct getcpu_cache __user *tcache; |
| |
| syscall_get_arguments(task, regs, args); |
| cpu = (unsigned __user *)args[0]; |
| node = (unsigned __user *)args[1]; |
| tcache = (struct getcpu_cache __user *)args[2]; |
| |
| if (node || tcache) |
| return -EPERM; |
| return ksys_getcpu(cpu, node, tcache); |
| } |
| |
| #ifdef CONFIG_COMPAT |
| static asmlinkage long android_compat_adjtimex(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| struct old_timex32 kbuf; |
| struct old_timex32 __user *buf; |
| unsigned long args[6]; |
| |
| syscall_get_arguments(task, regs, args); |
| buf = (struct old_timex32 __user *)args[0]; |
| |
| /* adjtimex() is allowed only for read. */ |
| if (copy_from_user(&kbuf, buf, sizeof(struct old_timex32))) |
| return -EFAULT; |
| if (kbuf.modes != 0) |
| return -EPERM; |
| return ksys_adjtimex_time32(buf); |
| } |
| |
| static asmlinkage long |
| android_compat_clock_adjtime(struct pt_regs *regs) |
| { |
| struct task_struct *task = current; |
| struct old_timex32 kbuf; |
| unsigned long args[6]; |
| clockid_t which_clock; |
| struct old_timex32 __user *buf; |
| |
| syscall_get_arguments(task, regs, args); |
| which_clock = args[0]; |
| buf = (struct old_timex32 __user *)args[1]; |
| |
| /* clock_adjtime() is allowed only for read. */ |
| if (copy_from_user(&kbuf, buf, sizeof(struct old_timex32))) |
| return -EFAULT; |
| if (kbuf.modes != 0) |
| return -EPERM; |
| return ksys_clock_adjtime32(which_clock, buf); |
| } |
| #endif /* CONFIG_COMPAT */ |
| |
| static struct syscall_whitelist whitelists[] = { |
| SYSCALL_WHITELIST(read_write_test), |
| SYSCALL_WHITELIST(android), |
| PERMISSIVE_SYSCALL_WHITELIST(android), |
| SYSCALL_WHITELIST(third_party), |
| PERMISSIVE_SYSCALL_WHITELIST(third_party), |
| SYSCALL_WHITELIST(complete), |
| PERMISSIVE_SYSCALL_WHITELIST(complete) |
| }; |
| |
| static int alt_syscall_apply_whitelist(const struct syscall_whitelist *wl, |
| struct alt_sys_call_table *t) |
| { |
| unsigned int i; |
| unsigned long *whitelist = kcalloc(BITS_TO_LONGS(t->size), |
| sizeof(unsigned long), GFP_KERNEL); |
| |
| if (!whitelist) |
| return -ENOMEM; |
| |
| for (i = 0; i < wl->nr_whitelist; i++) { |
| unsigned int nr = wl->whitelist[i].nr; |
| |
| if (nr >= t->size) { |
| kfree(whitelist); |
| return -EINVAL; |
| } |
| bitmap_set(whitelist, nr, 1); |
| if (wl->whitelist[i].alt) |
| t->table[nr] = wl->whitelist[i].alt; |
| } |
| |
| for (i = 0; i < t->size; i++) { |
| if (!test_bit(i, whitelist)) { |
| t->table[i] = wl->permissive ? |
| (sys_call_ptr_t)warn_syscall : |
| (sys_call_ptr_t)block_syscall; |
| } |
| } |
| |
| kfree(whitelist); |
| return 0; |
| } |
| |
| #ifdef CONFIG_COMPAT |
| static int |
| alt_syscall_apply_compat_whitelist(const struct syscall_whitelist *wl, |
| struct alt_sys_call_table *t) |
| { |
| unsigned int i; |
| unsigned long *whitelist = kcalloc(BITS_TO_LONGS(t->compat_size), |
| sizeof(unsigned long), GFP_KERNEL); |
| |
| if (!whitelist) |
| return -ENOMEM; |
| |
| for (i = 0; i < wl->nr_compat_whitelist; i++) { |
| unsigned int nr = wl->compat_whitelist[i].nr; |
| |
| if (nr >= t->compat_size) { |
| kfree(whitelist); |
| return -EINVAL; |
| } |
| bitmap_set(whitelist, nr, 1); |
| if (wl->compat_whitelist[i].alt) |
| t->compat_table[nr] = wl->compat_whitelist[i].alt; |
| } |
| |
| for (i = 0; i < t->compat_size; i++) { |
| if (!test_bit(i, whitelist)) { |
| t->compat_table[i] = wl->permissive ? |
| (sys_call_ptr_t)warn_compat_syscall : |
| (sys_call_ptr_t)block_syscall; |
| } |
| } |
| |
| kfree(whitelist); |
| return 0; |
| } |
| #else |
| static inline int |
| alt_syscall_apply_compat_whitelist(const struct syscall_whitelist *wl, |
| struct alt_sys_call_table *t) |
| { |
| return 0; |
| } |
| #endif /* CONFIG_COMPAT */ |
| |
| static int alt_syscall_init_one(const struct syscall_whitelist *wl) |
| { |
| struct alt_sys_call_table *t; |
| int err; |
| |
| t = kzalloc(sizeof(*t), GFP_KERNEL); |
| if (!t) |
| return -ENOMEM; |
| strncpy(t->name, wl->name, sizeof(t->name)); |
| |
| err = arch_dup_sys_call_table(t); |
| if (err) |
| return err; |
| |
| err = alt_syscall_apply_whitelist(wl, t); |
| if (err) |
| return err; |
| err = alt_syscall_apply_compat_whitelist(wl, t); |
| if (err) |
| return err; |
| |
| return register_alt_sys_call_table(t); |
| } |
| |
| /* |
| * Register an alternate syscall table for each whitelist. Note that the |
| * lack of a module_exit() is intentional - once a syscall table is registered |
| * it cannot be unregistered. |
| * |
| * TODO(abrestic) Support unregistering syscall tables? |
| */ |
| static int chromiumos_alt_syscall_init(void) |
| { |
| unsigned int i; |
| int err; |
| |
| #ifdef CONFIG_SYSCTL |
| if (!register_sysctl_paths(chromiumos_sysctl_path, |
| chromiumos_sysctl_table)) |
| pr_warn("Failed to register sysctl\n"); |
| #endif |
| |
| err = arch_dup_sys_call_table(&default_table); |
| if (err) |
| return err; |
| |
| for (i = 0; i < ARRAY_SIZE(whitelists); i++) { |
| err = alt_syscall_init_one(&whitelists[i]); |
| if (err) |
| pr_warn("Failed to register syscall table %s: %d\n", |
| whitelists[i].name, err); |
| } |
| |
| return 0; |
| } |
| module_init(chromiumos_alt_syscall_init); |