funcstartContainer(context*cli.Context,spec*specs.Spec,actionCtAct,criuOpts*libcontainer.CriuOpts)(int,error){id:=context.Args().First()ifid==""{return-1,errEmptyID}notifySocket:=newNotifySocket(context,os.Getenv("NOTIFY_SOCKET"),id)ifnotifySocket!=nil{iferr:=notifySocket.setupSpec(context,spec);err!=nil{return-1,err}}container,err:=createContainer(context,id,spec)iferr!=nil{return-1,err}ifnotifySocket!=nil{iferr:=notifySocket.setupSocketDirectory();err!=nil{return-1,err}ifaction==CT_ACT_RUN{iferr:=notifySocket.bindSocket();err!=nil{return-1,err}}}// Support on-demand socket activation by passing file descriptors into the container init process.
listenFDs:=[]*os.File{}ifos.Getenv("LISTEN_FDS")!=""{listenFDs=activation.Files(false)}logLevel:="info"ifcontext.GlobalBool("debug"){logLevel="debug"}r:=&runner{enableSubreaper:!context.Bool("no-subreaper"),shouldDestroy:true,container:container,listenFDs:listenFDs,notifySocket:notifySocket,consoleSocket:context.String("console-socket"),detach:context.Bool("detach"),pidFile:context.String("pid-file"),preserveFDs:context.Int("preserve-fds"),action:action,criuOpts:criuOpts,init:true,logLevel:logLevel,}returnr.run(spec.Process)}
func(p*initProcess)start()(retErrerror){deferp.messageSockPair.parent.Close()err:=p.cmd.Start()p.process.ops=p...ierr:=parseSync(p.messageSockPair.parent,func(sync*syncT)error{switchsync.Type{caseprocReady:// set rlimits, this has to be done here because we lose permissions
...sentRun=truecaseprocHooks:// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
...sentResume=truedefault:returnnewSystemError(errors.New("invalid JSON payload from child"))}returnnil})returnnil}
varinitCommand=cli.Command{Name:"init",Usage:`initialize the namespaces and launch the process (do not call it outside of runc)`,Action:func(context*cli.Context)error{factory,_:=libcontainer.New("")iferr:=factory.StartInitialization();err!=nil{// as the error is sent back to the parent there is no need to log
// or write it to stderr because the parent process will handle this
os.Exit(1)}panic("libcontainer: container init failed to exec")},}
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
// This is a low level implementation detail of the reexec and should not be consumed externally
func(l*LinuxFactory)StartInitialization()(errerror){...envInitType:=os.Getenv("_LIBCONTAINER_INITTYPE")it:=initType(envInitType)...i,err:=newContainerInit(it,pipe,consoleSocket,fifofd,logPipeFd)iferr!=nil{returnerr}// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
returni.Init()
func(l*linuxStandardInit)Init()error{runtime.LockOSThread()deferruntime.UnlockOSThread()...iferr:=setupNetwork(l.config);err!=nil{returnerr}iferr:=setupRoute(l.config.Config);err!=nil{returnerr}// initialises the labeling system
selinux.GetEnabled()iferr:=prepareRootfs(l.pipe,l.config);err!=nil{returnerr}// Set up the console. This has to be done *before* we finalize the rootfs,
// but *after* we've given the user the chance to set up all of the mounts
// they wanted.
ifl.config.CreateConsole{iferr:=setupConsole(l.consoleSocket,l.config,true);err!=nil{returnerr}iferr:=system.Setctty();err!=nil{returnerrors.Wrap(err,"setctty")}}// Finish the rootfs setup.
ifl.config.Config.Namespaces.Contains(configs.NEWNS){iferr:=finalizeRootfs(l.config.Config);err!=nil{returnerr}}...// Compare the parent from the initial start of the init process and make
// sure that it did not change. if the parent changes that means it died
// and we were reparented to something else so we should just kill ourself
// and not cause problems for someone else.
ifunix.Getppid()!=l.parentPid{returnunix.Kill(unix.Getpid(),unix.SIGKILL)}// Check for the arg before waiting to make sure it exists and it is
// returned as a create time error.
name,err:=exec.LookPath(l.config.Args[0])...iferr:=unix.Exec(name,l.config.Args[0:],os.Environ());err!=nil{returnnewSystemErrorWithCause(err,"exec user process")}returnnil}
voidnsexec(void){intpipenum;jmp_bufenv;intsync_child_pipe[2],sync_grandchild_pipe[2];structnlconfig_tconfig={0};/*
* Setup a pipe to send logs to the parent. This should happen
* first, because bail will use that pipe.
*/setup_logpipe();/*
* If we don't have an init pipe, just return to the go routine.
* We'll only get an init pipe for start or exec.
*/pipenum=initpipe();if(pipenum==-1)return;/*
* We need to re-exec if we are not in a cloned binary. This is necessary
* to ensure that containers won't be able to access the host binary
* through /proc/self/exe. See CVE-2019-5736.
*/if(ensure_cloned_binary()<0)bail("could not ensure we are a cloned binary");...}
intensure_cloned_binary(void){intexecfd;char**argv=NULL;/* Check that we're not self-cloned, and if we are then bail. */intcloned=is_self_cloned();if(cloned>0||cloned==-ENOTRECOVERABLE)returncloned;if(fetchve(&argv)<0)return-EINVAL;execfd=clone_binary();if(execfd<0)return-EIO;if(putenv(CLONED_BINARY_ENV"=1"))gotoerror;fexecve(execfd,argv,environ);error:close(execfd);return-ENOEXEC;}