| // SPDX-License-Identifier: GPL-2.0 |
| /* |
| * Author: Andrei Vagin <avagin@openvz.org> |
| * Author: Dmitry Safonov <dima@arista.com> |
| */ |
| |
| #include <linux/cleanup.h> |
| #include <linux/mm.h> |
| #include <linux/time_namespace.h> |
| #include <linux/time.h> |
| #include <linux/vdso_datastore.h> |
| |
| #include <vdso/clocksource.h> |
| #include <vdso/datapage.h> |
| |
| #include "namespace_internal.h" |
| |
| static struct timens_offset offset_from_ts(struct timespec64 off) |
| { |
| struct timens_offset ret; |
| |
| ret.sec = off.tv_sec; |
| ret.nsec = off.tv_nsec; |
| |
| return ret; |
| } |
| |
| /* |
| * A time namespace VVAR page has the same layout as the VVAR page which |
| * contains the system wide VDSO data. |
| * |
| * For a normal task the VVAR pages are installed in the normal ordering: |
| * VVAR |
| * PVCLOCK |
| * HVCLOCK |
| * TIMENS <- Not really required |
| * |
| * Now for a timens task the pages are installed in the following order: |
| * TIMENS |
| * PVCLOCK |
| * HVCLOCK |
| * VVAR |
| * |
| * The check for vdso_clock->clock_mode is in the unlikely path of |
| * the seq begin magic. So for the non-timens case most of the time |
| * 'seq' is even, so the branch is not taken. |
| * |
| * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check |
| * for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the |
| * update to finish and for 'seq' to become even anyway. |
| * |
| * Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which |
| * enforces the time namespace handling path. |
| */ |
| static void timens_setup_vdso_clock_data(struct vdso_clock *vc, |
| struct time_namespace *ns) |
| { |
| struct timens_offset *offset = vc->offset; |
| struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); |
| struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); |
| |
| vc->seq = 1; |
| vc->clock_mode = VDSO_CLOCKMODE_TIMENS; |
| offset[CLOCK_MONOTONIC] = monotonic; |
| offset[CLOCK_MONOTONIC_RAW] = monotonic; |
| offset[CLOCK_MONOTONIC_COARSE] = monotonic; |
| offset[CLOCK_BOOTTIME] = boottime; |
| offset[CLOCK_BOOTTIME_ALARM] = boottime; |
| } |
| |
| struct page *find_timens_vvar_page(struct vm_area_struct *vma) |
| { |
| if (likely(vma->vm_mm == current->mm)) |
| return current->nsproxy->time_ns->vvar_page; |
| |
| /* |
| * VM_PFNMAP | VM_IO protect .fault() handler from being called |
| * through interfaces like /proc/$pid/mem or |
| * process_vm_{readv,writev}() as long as there's no .access() |
| * in special_mapping_vmops(). |
| * For more details check_vma_flags() and __access_remote_vm() |
| */ |
| |
| WARN(1, "vvar_page accessed remotely"); |
| |
| return NULL; |
| } |
| |
| static void timens_set_vvar_page(struct task_struct *task, |
| struct time_namespace *ns) |
| { |
| struct vdso_time_data *vdata; |
| struct vdso_clock *vc; |
| unsigned int i; |
| |
| if (ns == &init_time_ns) |
| return; |
| |
| /* Fast-path, taken by every task in namespace except the first. */ |
| if (likely(ns->frozen_offsets)) |
| return; |
| |
| guard(mutex)(&timens_offset_lock); |
| /* Nothing to-do: vvar_page has been already initialized. */ |
| if (ns->frozen_offsets) |
| return; |
| |
| ns->frozen_offsets = true; |
| vdata = page_address(ns->vvar_page); |
| vc = vdata->clock_data; |
| |
| for (i = 0; i < CS_BASES; i++) |
| timens_setup_vdso_clock_data(&vc[i], ns); |
| |
| if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) { |
| for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++) |
| timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns); |
| } |
| } |
| |
| /* |
| * The vvar page layout depends on whether a task belongs to the root or |
| * non-root time namespace. Whenever a task changes its namespace, the VVAR |
| * page tables are cleared and then they will be re-faulted with a |
| * corresponding layout. |
| * See also the comment near timens_setup_vdso_clock_data() for details. |
| */ |
| static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) |
| { |
| struct mm_struct *mm = task->mm; |
| struct vm_area_struct *vma; |
| VMA_ITERATOR(vmi, mm, 0); |
| |
| guard(mmap_read_lock)(mm); |
| for_each_vma(vmi, vma) { |
| if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) |
| zap_vma(vma); |
| } |
| return 0; |
| } |
| |
| void timens_commit(struct task_struct *tsk, struct time_namespace *ns) |
| { |
| timens_set_vvar_page(tsk, ns); |
| vdso_join_timens(tsk, ns); |
| } |
| |
| int timens_vdso_alloc_vvar_page(struct time_namespace *ns) |
| { |
| ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
| if (!ns->vvar_page) |
| return -ENOMEM; |
| |
| return 0; |
| } |
| |
| void timens_vdso_free_vvar_page(struct time_namespace *ns) |
| { |
| __free_page(ns->vvar_page); |
| } |