[linux kernel] fork() syscall analysis

목적

리눅스 커널 공부, 운영체제 공부
큰 코드베이스 읽는 능력 향상
재미

왜 `fork` syacall 인가

원래는 bottom-up을 하려고 헀는데 동기부여가 잘 되지 않아서 top-down으로 하려고 한다.
프로세스 관련해서는 fork가 좋을 것 같아서 선택했다.

분석

glibc 분석

`include/unistd.h`

/* Internal name for fork function.  */
extern __pid_t __libc_fork (void);

`posix/fork.c`

pid_t
__libc_fork (void)
{
  /* Determine if we are running multiple threads.  We skip some fork
     handlers in the single-thread case, to make fork safer to use in
     signal handlers.  Although POSIX has dropped async-signal-safe
     requirement for fork (Austin Group tracker issue #62) this is
     best effort to make is async-signal-safe at least for single-thread
     case.  */
  bool multiple_threads = !SINGLE_THREAD_P;
  uint64_t lastrun;

  lastrun = __run_prefork_handlers (multiple_threads);

  struct nss_database_data nss_database_data;

  /* If we are not running multiple threads, we do not have to
     preserve lock state.  If fork runs from a signal handler, only
     async-signal-safe functions can be used in the child.  These data
     structures are only used by unsafe functions, so their state does
     not matter if fork was called from a signal handler.  */
  if (multiple_threads)
    {
      call_function_static_weak (__nss_database_fork_prepare_parent,
				 &nss_database_data);

      _IO_proc_file_chain_lock ();
      _IO_list_lock ();

      /* Acquire malloc locks.  This needs to come last because fork
	 handlers may use malloc, and the libio list lock has an
	 indirect malloc dependency as well (via the getdelim
	 function).  */
      call_function_static_weak (__malloc_fork_lock_parent);
    }

  pid_t pid = _Fork ();

  if (pid == 0)
    {
      fork_system_setup ();

      /* Reset the lock state in the multi-threaded case.  */
      if (multiple_threads)
	{
	  __libc_unwind_link_after_fork ();

	  fork_system_setup_after_fork ();

	  call_function_static_weak (__abort_fork_reset_child);

	  /* Release malloc locks.  */
	  call_function_static_weak (__malloc_fork_unlock_child);

	  /* Reset the file list.  These are recursive mutexes.  */
	  fresetlockfiles ();

	  /* Reset locks in the I/O code.  */
	  _IO_list_resetlock ();
	  _IO_proc_file_chain_resetlock ();

	  call_function_static_weak (__nss_database_fork_subprocess,
				     &nss_database_data);
	}

      /* Reset the lock the dynamic loader uses to protect its data.  */
      __rtld_lock_initialize (GL(dl_load_lock));

      /* Reset the lock protecting dynamic TLS related data.  */
      __rtld_lock_initialize (GL(dl_load_tls_lock));

      reclaim_stacks ();

      /* Run the handlers registered for the child.  */
      __run_postfork_handlers (atfork_run_child, multiple_threads, lastrun);
    }
  else
    {
      /* If _Fork failed, preserve its errno value.  */
      int save_errno = errno;

      /* Release acquired locks in the multi-threaded case.  */
      if (multiple_threads)
	{
	  /* Release malloc locks, parent process variant.  */
	  call_function_static_weak (__malloc_fork_unlock_parent);

	  /* We execute this even if the 'fork' call failed.  */
	  _IO_list_unlock ();
	  _IO_proc_file_chain_unlock ();
	}

      /* Run the handlers registered for the parent.  */
      __run_postfork_handlers (atfork_run_parent, multiple_threads, lastrun);

      if (pid < 0)
	__set_errno (save_errno);
    }

  return pid;
}
weak_alias (__libc_fork, __fork)
libc_hidden_def (__fork)
weak_alias (__libc_fork, fork)

__run_prefork__handlers 함수
- 프로세스가 fork 호출하기 전에 등록된 모든 prepare 핸들러를 연숙으로 실행한다.
  - 마지막에 등록된 핸들러부터 첫번째로 등록된 핸들러까지 역순으로 실행한다. <- LIFO로 해야된다. 리소스 정리는 이렇게 한다.
    - prepare 핸들러는 무엇인가?
      - pthread_atfork의 핵심 구성 요소이다.
        
        pthread_atfork 구조
        int pthread_atfork(void (*prepare)(void), void (*parent)(void), void (*child)(void));
        
        prepare는 fork호출 직전에 실행, parent는 fork후 부모 프로세스에서 실행, child는 fork후 자식 프로세스에서 실행한다.
        
        prepare에서 모든 뮤텍스를 잠그고, parent에서 잠겼던 모든 뮤텍스를 풀어주고, child에서 prepare에서 잠겼던 모든 뮤텍스를 푼다.
        
        이게 필요한 이유는 fork에서의 문제점을 해결하기 위해서이다.
        // 예시: 멀티스레드 프로그램에서 뮤텍스 문제 pthread_mutex_t global_mutex = PTHREAD_MUTEX_INITIALIZER; void thread1() { pthread_mutex_lock(&global_mutex); // 작업 중... // 만약 이 시점에 다른 스레드가 fork()를 호출하면? pthread_mutex_unlock(&global_mutex); } void fork_calling_thread() { pid_t pid = fork(); // 문제 발생! // 자식 프로세스는 락이 걸린 뮤텍스를 상속받음 // 하지만 락을 건 스레드는 복사되지 않음 → 데드락 }
        
        이게 뭔 소리냐?
        
        pthread_mutex_lock이 임계 구역에 들어가서 mutex를 가지고 있을 때, 다른 스레드가 fork를 한다고 가정하자. 그려면 fork때 메모리 상태가 복사되기 때문에 mutex의 상태가 복사된다. 그럼 thread1은 뮤텍스를 계속 사용중이라서 데드락에 빠진다.
        
        결론적으로 pthread_atfork를 쓰면된다.
- lll_lock/lll_unlock 함수
  - low level lock의 줄임말
  - 매우 가볍고 빠른 저수준 락
  - atfork_lock 을 획득하는 함수
    - atfork란 무엇인가?
      - ptread_atfork도 lock을 걸어야 한다. atfork에 락을 건다.
- fork_handler_list_size 함수
  - fork 핸들러 리스트의 현재 크기를 반환하는 함수
- fork_handler_list_at 함수
  - 지정된 인덱스의 fork 핸들러 구조체를 반환 -> 실제 핸들러 정보에 접근
- 한 마디로 fork 직전에 자식 프로세스에서 발생할 수 있는 데드락을 방지하기 위해서 pthread_atfork로 등록된 모든 락 핸들러를 실행하는 것이다.
nss_database_data 구조체
- 간단히 말하자면 Name Service Switch 시스템의 각 데이터 베이스 상태를 관리하는 내부 데이터 구조체이다.
  - NSS: 시스템의 다양한 이름을 어떻게 가져올지 결정하는 프레임워크이다.
  - 각각의 데이터 베이스 (passwd, groups, …)에 대한 내부 상태 정보를 담고 있는 그릇이다.
  - 요게 fork에서 쓰이는 이유는 부모 프로세스의 NSS 상태를 자식 프로세스에게 안전하게 복사하려고 쓰인다.
만약 멀티 스레드라면
- fork로 인해 발생할 수 있는 데드락과 데이터 오염을 방지하는 코드블록을 실행한다.
- call_function_static_weak: NSS 데이터 베이스 잠금
- _IO_proc_file_chain_lock + _IO_list_lock: 표준 라이브러리 잠금
- call_function_static_weak: 동적 메모리 할당 잠금

_Fork는 실제로 clone 시스템 콜을 부른 곳이다.

근데 함수를 따라가보면 딱히 뭐가 없다. <- 이 이유는 다양한 CPU와 아키텍처에 맞게 syscall을 해야하기 때문에 sysdeps/ 폴더 아래에 구현이 있다.

직접 보자

/* clone() is even more special than fork() as it mucks with stacks
   and invokes a function in the right context after its all over.  */

#include <sysdep.h>
#define _ERRNO_H	1
#include <bits/errno.h>
#include <asm-syntax.h>

/* The userland implementation is:
   int clone (int (*fn)(void *arg), void *child_stack, int flags,
      void *arg, pid_t *parent_tid, void *tls, pid_t *child_tid);
   the kernel entry is:
   int clone (long flags, void *child_stack, pid_t *parent_tid,
      pid_t *child_tid, void *tls);

   The parameters are passed in register and on the stack from userland:
   rdi: fn
   rsi: child_stack
   rdx:	flags
   rcx: arg
    r8:	TID field in parent
    r9: thread pointer
%rsp+8:	TID field in child

   The kernel expects:
   rax: system call number
   rdi: flags
   rsi: child_stack
   rdx: TID field in parent
   r10: TID field in child
   r8:	thread pointer  */


        .text
ENTRY (__clone)
/* Sanity check arguments.  */
movq	$-EINVAL,%rax
testq	%rdi,%rdi		/* no NULL function pointers */
jz	SYSCALL_ERROR_LABEL

/* Align stack to 16 bytes per the x86-64 psABI.  */
andq	$-16, %rsi
jz	SYSCALL_ERROR_LABEL	/* no NULL stack pointers */

/* Insert the argument onto the new stack.  */
movq	%rcx,-8(%rsi)

subq	$16,%rsi

/* Save the function pointer.  It will be popped off in the
   child.  */
movq	%rdi,0(%rsi)

/* Do the system call.  */
movq	%rdx, %rdi
movq	%r8, %rdx
movq	%r9, %r8
mov	8(%rsp), %R10_LP
movl	$SYS_ify(clone),%eax

/* End FDE now, because in the child the unwind info will be
   wrong.  */
cfi_endproc;
syscall

testq	%rax,%rax
jl	SYSCALL_ERROR_LABEL
jz	L(thread_start)

ret

  L(thread_start):
cfi_startproc;
/* Clearing frame pointer is insufficient, use CFI.  */
cfi_undefined (rip);
/* Clear the frame pointer.  The ABI suggests this be done, to mark
   the outermost frame obviously.  */
xorl	%ebp, %ebp

/* Set up arguments for the function call.  */
popq	%rax		/* Function to call.  */
popq	%rdi		/* Argument.  */
call	*%rax
/* Call exit with return value from function call. */
movq	%rax, %rdi
movl	$SYS_ify(exit), %eax
syscall
cfi_endproc;

cfi_startproc;
  PSEUDO_END (__clone)

  libc_hidden_def (__clone)
  weak_alias (__clone, clone)

movl $SYS_ify(clone),%eax 에서 실제 clone을 부르고, 그전에는 인자 처리하고 나중에는 반환값 처리, 자식 프로세스 처리하는 역할을 한다.

fork함수의 syscall 이후 함수들의 역할
- 자식 프로세스 (if (pid == 0))
  - 부모의 메모리 상태를 그대로 물려 받고, 잠금 상태를 푼다. 내부 상태를 초기화한다.
- 부모 프로세스
  - 걸었던 lock을 풀고 원래 하던 일을 계속 할 수 있도록 한다.

지금까지 glibc에서 어떻게 fork 시스템콜을 부르는지 살펴보았다. 이제 linux kernel에서 어떻게 clone 시스템 콜을 처리하는지 보겠다.

linux kernel 분석

kernel/fork.c

// 중요하지 않는 부분은 뺐다.
SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
{
	int err;

	struct kernel_clone_args kargs;
	pid_t set_tid[MAX_PID_NS_LEVEL];

	kargs.set_tid = set_tid;

	err = copy_clone_args_from_user(&kargs, uargs, size);
	if (err)
		return err;

	if (!clone3_args_valid(&kargs))
		return -EINVAL;

	return kernel_clone(&kargs);
}

이 부분이 clone3함수이다.
- 이걸 보면 인자게 제대로 왔는 지 검사하고 kernel_clone을 호출한다는 것을 알 수 있다.

pid_t kernel_clone(struct kernel_clone_args *args)
{
	u64 clone_flags = args->flags;
	struct completion vfork;
	struct pid *pid;
	struct task_struct *p;
	int trace = 0;
	pid_t nr;

	/*
	 * For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument
	 * to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are
	 * mutually exclusive. With clone3() CLONE_PIDFD has grown a separate
	 * field in struct clone_args and it still doesn't make sense to have
	 * them both point at the same memory location. Performing this check
	 * here has the advantage that we don't need to have a separate helper
	 * to check for legacy clone().
	 */
	if ((clone_flags & CLONE_PIDFD) &&
	    (clone_flags & CLONE_PARENT_SETTID) &&
	    (args->pidfd == args->parent_tid))
		return -EINVAL;

	/*
	 * Determine whether and which event to report to ptracer.  When
	 * called from kernel_thread or CLONE_UNTRACED is explicitly
	 * requested, no event is reported; otherwise, report if the event
	 * for the type of forking is enabled.
	 */
	if (!(clone_flags & CLONE_UNTRACED)) {
		if (clone_flags & CLONE_VFORK)
			trace = PTRACE_EVENT_VFORK;
		else if (args->exit_signal != SIGCHLD)
			trace = PTRACE_EVENT_CLONE;
		else
			trace = PTRACE_EVENT_FORK;

		if (likely(!ptrace_event_enabled(current, trace)))
			trace = 0;
	}

	p = copy_process(NULL, trace, NUMA_NO_NODE, args);
	add_latent_entropy();

	if (IS_ERR(p))
		return PTR_ERR(p);

	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	trace_sched_process_fork(current, p);

	pid = get_task_pid(p, PIDTYPE_PID);
	nr = pid_vnr(pid);

	if (clone_flags & CLONE_PARENT_SETTID)
		put_user(nr, args->parent_tid);

	if (clone_flags & CLONE_VFORK) {
		p->vfork_done = &vfork;
		init_completion(&vfork);
		get_task_struct(p);
	}

	if (IS_ENABLED(CONFIG_LRU_GEN_WALKS_MMU) && !(clone_flags & CLONE_VM)) {
		/* lock the task to synchronize with memcg migration */
		task_lock(p);
		lru_gen_add_mm(p->mm);
		task_unlock(p);
	}

	wake_up_new_task(p);

	/* forking complete and child started to run, tell ptracer */
	if (unlikely(trace))
		ptrace_event_pid(trace, pid);

	if (clone_flags & CLONE_VFORK) {
		if (!wait_for_vfork_done(p, &vfork))
			ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
	}

	put_pid(pid);
	return nr;
}

이 함수가 사실상 clone 시스템 콜의 로직이다.
다른 함수 처럼 준비, 실행, 후처리 라는 3가지 로직으로 구성된다.
- 준비: 인자 유효성 검사를 하고 ptrace 이벤트를 결정한다.
- 실행: copy_process라는 함수를 부른다.
- 후처리
  - 자식 태스크를 깨우기 전 처리
  - 동기화 준비
  - 새로은 태스크를 스케줄러에 등록
  - 자식 태스크가 깨어난 후 처리

// kernel config가 일반적이라고 가정하고 나머지 코드는 뺐다.

/*
 * copy_process - clone() 시스템 콜의 핵심 구현 함수 (단순화 버전)
 *
 * 이 함수는 부모 태스크(current)를 기반으로 새로운 자식 태스크(p)를 생성합니다.
 * clone_flags 값에 따라 어떤 자원을 공유하고, 어떤 자원을 복제할지 결정합니다.
 * 성공 시 새로운 task_struct 포인터를, 실패 시 ERR_PTR을 반환합니다.
 */
struct task_struct *copy_process(
			struct pid *pid,
			int trace,
			int node, /* NUMA 노드, 여기서는 무시 */
			struct kernel_clone_args *args)
{
	int pidfd = -1, retval;
	struct task_struct *p;
	const u64 clone_flags = args->flags;

	/*
	 * ===================================================================
	 * 1. clone 플래그 유효성 검사
	 * ===================================================================
	 * clone()으로 전달된 플래그들의 조합이 유효한지 검사합니다.
	 * 예를 들어, 스레드(CLONE_THREAD)는 반드시 시그널 핸들러(CLONE_SIGHAND)를 공유해야 하고,
	 * 시그널 핸들러를 공유하면 반드시 가상 메모리(CLONE_VM)도 공유해야 합니다.
	 */
	if ((clone_flags & (CLONE_NEWNS | CLONE_FS)) == (CLONE_NEWNS | CLONE_FS))
		return ERR_PTR(-EINVAL);

	if ((clone_flags & (CLONE_NEWUSER | CLONE_FS)) == (CLONE_NEWUSER | CLONE_FS))
		return ERR_PTR(-EINVAL);

	if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
		return ERR_PTR(-EINVAL);

	if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
		return ERR_PTR(-EINVAL);

	if ((clone_flags & CLONE_PARENT) &&
	    current->signal->flags & SIGNAL_UNKILLABLE)
		return ERR_PTR(-EINVAL);

	if (clone_flags & CLONE_THREAD) {
		if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID))
			return ERR_PTR(-EINVAL);
	}

	/*
	 * ===================================================================
	 * 2. 태스크 자료구조 복제 및 기본 초기화
	 * ===================================================================
	 */
	retval = -ENOMEM;
	p = dup_task_struct(current, node); // task_struct와 커널 스택을 복제
	if (!p)
		goto fork_out;

	retval = -EAGAIN;
	/* 프로세스 개수 제한(RLIMIT_NPROC)과 전체 스레드 개수 제한을 확인 */
	if (is_rlimit_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
		if (p->real_cred->user != INIT_USER &&
		    !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
			goto bad_fork_free;
	}

	if (data_race(nr_threads >= max_threads))
		goto bad_fork_free;

	p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE);
	p->flags |= PF_FORKNOEXEC; // exec()가 실행되기 전까지는 fork 상태임을 표시

	/* 자식/형제 리스트 초기화 */
	INIT_LIST_HEAD(&p->children);
	INIT_LIST_HEAD(&p->sibling);

	/*
	 * ===================================================================
	 * 3. 각종 서브시스템별로 자원 복제(copy) 또는 공유(share)
	 * ===================================================================
	 * 여기가 clone()의 동작을 결정하는 가장 중요한 부분입니다.
	 * 각 copy_* 함수는 clone_flags를 보고 자원을 복제할지,
	 * 아니면 부모와 공유(참조 카운트만 증가)할지 결정합니다.
	 */

	/* 사용자 자격증명(UID, GID 등) 복사 */
	retval = copy_creds(p, clone_flags);
	if (retval < 0)
		goto bad_fork_free;

	/* 스케줄러 관련 정보 설정. 이 태스크를 어떤 CPU에 할당할지 결정 */
	retval = sched_fork(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_creds;

	/*
	 * copy_xxx 함수들의 연속 호출. 하나라도 실패하면 goto로 에러 처리.
	 * 순서는 매우 중요합니다. (의존성 역순으로 정리)
	 */
	retval = copy_files(clone_flags, p, args->no_files); // 파일 디스크립터 테이블 복사/공유
	if (retval)
		goto bad_fork_sched_cancel;

	retval = copy_fs(clone_flags, p); // 파일 시스템 정보(root, pwd) 복사/공유
	if (retval)
		goto bad_fork_cleanup_files;

	retval = copy_sighand(clone_flags, p); // 시그널 핸들러 테이블 복사/공유
	if (retval)
		goto bad_fork_cleanup_fs;

	retval = copy_signal(clone_flags, p); // 시그널 처리 구조체 복사/공유
	if (retval)
		goto bad_fork_cleanup_sighand;

	retval = copy_mm(clone_flags, p); // 메모리 공간(mm_struct) 복사/공유
	if (retval)
		goto bad_fork_cleanup_signal;

	retval = copy_namespaces(clone_flags, p); // 네임스페이스 복사/공유
	if (retval)
		goto bad_fork_cleanup_mm;

	retval = copy_thread(p, args); // 아키텍처 의존적 정보(레지스터, 스택) 설정
	if (retval)
		goto bad_fork_cleanup_namespaces;

	/*
	 * ===================================================================
	 * 4. PID 할당 및 마무리 작업
	 * ===================================================================
	 */
	if (pid != &init_struct_pid) {
		pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
				  args->set_tid_size);
		if (IS_ERR(pid)) {
			retval = PTR_ERR(pid);
			goto bad_fork_cleanup_thread;
		}
	}

	// CLONE_PIDFD 플래그가 있으면 PID 파일 디스크립터를 생성해서 부모에게 전달
	if (clone_flags & CLONE_PIDFD) {
		retval = pidfd_prepare(pid, 0, &pidfile);
		if (retval < 0)
			goto bad_fork_free_pid;
		pidfd = retval;
	}

	/* ok, now we should be set up.. */
	p->pid = pid_nr(pid);
	if (clone_flags & CLONE_THREAD) {
		/* 스레드인 경우: 부모와 같은 스레드 그룹에 속함 */
		p->group_leader = current->group_leader;
		p->tgid = current->tgid;
	} else {
		/* 프로세스인 경우: 자기 자신이 스레드 그룹의 리더가 됨 */
		p->group_leader = p;
		p->tgid = p->pid;
	}

	p->start_time = ktime_get_ns();
	p->start_boottime = ktime_get_boottime_ns();

	/*
	 * ===================================================================
	 * 5. 새로운 태스크를 시스템에 등록 (tasklist_lock 잠금 하에 원자적으로 수행)
	 * ===================================================================
	 * 이 시점부터 새로운 태스크가 시스템에 보이게 됩니다.
	 */
	write_lock_irq(&tasklist_lock);

	/* 부모-자식 관계 설정 */
	if (clone_flags & (CLONE_PARENT | CLONE_THREAD)) {
		p->real_parent = current->real_parent;
		if (clone_flags & CLONE_THREAD)
			p->exit_signal = -1; // 스레드는 부모에게 종료 시그널을 보내지 않음
		else
			p->exit_signal = current->group_leader->exit_signal;
	} else {
		p->real_parent = current;
		p->exit_signal = args->exit_signal;
	}

	/* PID/TGID/PGID/SID를 태스크에 연결 */
	init_task_pid_links(p);
	if (likely(p->pid)) {
		init_task_pid(p, PIDTYPE_PID, pid);

		if (thread_group_leader(p)) {
			/* 프로세스인 경우 (스레드 그룹 리더) */
			init_task_pid(p, PIDTYPE_TGID, pid);
			init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
			init_task_pid(p, PIDTYPE_SID, task_session(current));

			list_add_tail(&p->sibling, &p->real_parent->children);
			list_add_tail_rcu(&p->tasks, &init_task.tasks);
			attach_pid(p, PIDTYPE_TGID);
			attach_pid(p, PIDTYPE_PGID);
			attach_pid(p, PIDTYPE_SID);
			__this_cpu_inc(process_counts);
		} else {
			/* 스레드인 경우 (기존 스레드 그룹 멤버) */
			current->signal->nr_threads++;
			atomic_inc(&current->signal->live);
			refcount_inc(&current->signal->sigcnt);
			list_add_tail_rcu(&p->thread_node, &p->signal->thread_head);
		}
		attach_pid(p, PIDTYPE_PID);
		nr_threads++;
	}
	total_forks++;

	write_unlock_irq(&tasklist_lock);

	if (pidfd != -1)
		fd_install(pidfd, pidfile);

	/* fork 이후 후처리 작업들 */
	proc_fork_connector(p);
	sched_post_fork(p);
	trace_task_newtask(p, clone_flags);

	return p; /* 성공: 새로운 task_struct 포인터 반환 */

	/*
	 * ===================================================================
	 * 6. 에러 처리 경로 (goto 레이블)
	 * ===================================================================
	 * fork 과정 중 실패하면, 그 시점까지 할당했던 모든 자원을
	 * 역순으로 정리하고 에러를 반환합니다.
	 */
bad_fork_free_pid:
	if (pid != &init_struct_pid)
		free_pid(pid);
bad_fork_cleanup_thread:
	exit_thread(p);
bad_fork_cleanup_namespaces:
	exit_task_namespaces(p);
bad_fork_cleanup_mm:
	if (p->mm) {
		mm_clear_owner(p->mm, p);
		mmput(p->mm);
	}
bad_fork_cleanup_signal:
	if (!(clone_flags & CLONE_THREAD))
		free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
	__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
	exit_fs(p);
bad_fork_cleanup_files:
	exit_files(p);
bad_fork_sched_cancel:
	sched_cancel_fork(p);
bad_fork_cleanup_creds:
	exit_creds(p);
bad_fork_free:
	put_task_struct(p);
fork_out:
	return ERR_PTR(retval);
}

요약하면 아래와 같다.
1. 플래그 검증
2. dup_task_struct: task_struct를 위한 메모리를 할당하고 복사한다.
3. 리소스 제한을 확인한다.
4. 플래그에 따라 copy_*함수를 호출한다.
5. pid를 할당한다.
6. task_list 잠금하고 새로운 태스크를 부모 자식 관게, 스레드 그룹 등에 연결한다.
7. 성공하면 task_struct를 반환하고 아니면 goto를 써서 할당했던 자원을 순서대로 해제한다.

결국 뭘 얻었냐?

glibc 코드를 어떻게 분석하는지 알 것 같다.
- 처음에는 전부 분석하려고 했지만 사실 중요한 부분만 보는 것이 현실적이라는 것을 느꼈다.
linux kernel 코드가 내가 생각했던 것 만큼 무시무시하고 비현실적인 코드가 아닌 그냥 C언어 코드라고 느꼈다.
그 외의 자잘한 지식들을 얻었다.

목적#

왜 fork syacall 인가#

분석#

glibc 분석#

include/unistd.h#

posix/fork.c#

linux kernel 분석#

kernel/fork.c#

결국 뭘 얻었냐?#

목적

왜 `fork` syacall 인가

분석

glibc 분석

`include/unistd.h`

`posix/fork.c`

linux kernel 분석

kernel/fork.c

결국 뭘 얻었냐?