Repositories / jai.git

jai.git

Clone (read-only): git clone http://git.guha-anderson.com/git/jai.git

Branch

maybe working job control?

Author
David Mazieres <dm@uun.org>
Date
2026-03-22 17:49:14 -0700
Commit
1b33d2b5964e97fde67a325a879e364dc75ce53c
jai.cc
index c4b72f0..362fd32 100644
--- a/jai.cc
+++ b/jai.cc
@@ -9,9 +9,11 @@
 #include <print>
 
 #include <acl/libacl.h>
+#include <poll.h>
 #include <pwd.h>
 #include <ranges>
 #include <sys/prctl.h>
+#include <sys/signalfd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <termios.h>
@@ -660,43 +662,58 @@ Config::make_env()
   return ret;
 }
 
-// Exits if child pid exited, returns stop signal if pid stopped
-static int
-propagate_exit(int pid, bool immediate)
-try {
-  assert(pid > 0);
-  int status;
-again:
-  if (auto r = waitpid(-1, &status, WUNTRACED); r == -1) {
-    if (errno == EINTR)
-      goto again;
-    syserr("waitpid");
-  }
-  else if (r != pid)
-    // PID 1 in the jail may need to reap reparented processes
-    goto again;
+static pid_t main_pid = getpid();
 
+// Return stop signal if status indicates a child stopped, exit or
+// kill ourselves if the child terminated on a signal, and return 0
+// otherwise.
+static int
+propagate_termination_status(int status)
+{
   if (WIFSTOPPED(status)) {
     if (int sig = WSTOPSIG(status);
         sig == SIGSTOP || sig == SIGTSTP || sig == SIGTTIN || sig == SIGTTOU)
       return sig;
     // Unlikely to reach here, but maybe another process attached to
     // our child with the debugger and it got SIGTRAP or something?
-    goto again;
+    return 0;
   }
 
-  // unmount();
+  void (*do_exit)(int) = getpid() == main_pid ? exit : _exit;
+
   if (WIFEXITED(status))
-    (immediate ? _exit : exit)(WEXITSTATUS(status));
+    do_exit(WEXITSTATUS(status));
   if (WIFSIGNALED(status)) {
-    signal(WTERMSIG(status), SIG_DFL);
-    raise(WTERMSIG(status));
-    (immediate ? _exit : exit)(-1);
+    int sig = WTERMSIG(status);
+    signal(sig, SIG_DFL);
+    auto ss = sigsingleton(WTERMSIG(status));
+    sigprocmask(SIG_UNBLOCK, &ss, nullptr);
+    raise(sig);
+    do_exit(-1);
+  }
+
+  return 0; // Continued?
+}
+
+static int
+wait_propagate(int pid)
+{
+  assert(pid > 0);
+  int status;
+
+  for (;;) {
+    if (auto r = waitpid(-1, &status, WUNTRACED); r == -1) {
+      if (errno != EINTR) {
+        if (getpid() == main_pid)
+          syserr("waitpid");
+        warn("waitpid: {}", strerror(errno));
+        _exit(1);
+      }
+    }
+    else if (r == pid)
+      if (auto sig = propagate_termination_status(status); sig > 0)
+        return sig;
   }
-  err("unknown child wait status 0x{:x}", status);
-} catch (const std::exception &e) {
-  warn("{}", e.what());
-  immediate ? _exit(1) : exit(1);
 }
 
 void
@@ -713,6 +730,108 @@ try {
   _exit(1);
 }
 
+void
+Config::exec(int nsfd, char **argv)
+{
+  // This function is a bit annoying because the existing jai process
+  // cannot move to a new PID namespace, so we have to fork once.  But
+  // the forked process will have PID 1 and behave strangely (such as
+  // not receiving signals from within the PID namespace), so needs to
+  // fork again to run the actual jailed program with a normal PID.
+  // That means PID 1 has to propagate termination events of process 2
+  // to its own parent, which must then propagate them to the process
+  // than ran jai.
+  //
+  // Further complicating matters, PID 1 cannot stop itself (since it
+  // cannot receive a SIGSTOP from within the PID namespace).  Hence,
+  // if the jailed program stops, it uses a pipe to request that the
+  // original jai process stop it (from outside the PID namespace).
+  auto stop_me = xpipe();
+
+  if (auto pid = xfork(CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWNS)) {
+    // This is the last process in the old PID namespace
+    close(nsfd);
+    stop_me[1].reset();
+    // discard first write, whose only purpose is to test the parent
+    // didn't die before the child set PR_SET_PDEATSIG.
+    char c;
+    read(*stop_me[0], &c, 1);
+    parent_loop(pid, *stop_me[0]);
+  }
+  stop_me[0].reset();
+
+  xsetns(nsfd, CLONE_NEWNS);
+  fix_proc();
+  pid1(std::move(stop_me[1]));
+  pid2(argv);
+}
+
+void
+Config::parent_loop(pid_t pid, int stop_requests)
+{
+  auto ss = sigsingleton(SIGCHLD);
+  if (sigprocmask(SIG_BLOCK, &ss, nullptr))
+    syserr("sigprocmask SIG_BLOCK SIGCHLD");
+  Fd sigfd = signalfd(-1, &ss, SFD_CLOEXEC);
+  if (!sigfd)
+    syserr("signalfd");
+
+  if (int n = fcntl(stop_requests, F_GETFL); n == -1)
+    syserr("F_GETFL");
+  else if (fcntl(stop_requests, F_SETFL, n | O_NONBLOCK) == -1)
+    syserr("F_SETFL O_NONBLOCK");
+
+  // Put stop_requests in static to call drain_pipe from signal handler
+  static int rqfd;
+  rqfd = stop_requests;
+  // Flush pipe returning latest signal request or 0 if none
+  constexpr auto drain_pipe = +[]() {
+    int ret = 0, n;
+    unsigned char buf[8];
+    while ((n = read(rqfd, buf, sizeof(buf))) > 0)
+      ret = buf[n - 1];
+    if (n == -1 && errno != EAGAIN && errno != EINTR)
+      syserr("read from stop_requests pipe");
+    return ret;
+  };
+
+  // If we've been resumed, discard any previous stop requests
+  struct sigaction sa{};
+  sa.sa_handler = +[](int) { drain_pipe(); };
+  sigemptyset(&sa.sa_mask);
+  if (sigaction(SIGCONT, &sa, nullptr))
+    syserr("sigcation(SIGCONT)");
+
+  std::array<pollfd, 2> pollfds{pollfd{.fd = stop_requests, .events = POLLIN},
+                                pollfd{.fd = *sigfd, .events = POLLIN}};
+
+  for (int my_next_stop_sig = 0;;) {
+    if (poll(pollfds.data(), pollfds.size(), -1) < 0) {
+      if (errno == EINTR)
+        continue;
+      syserr("poll");
+    }
+
+    for (;;) {
+      int status;
+      if (auto r = waitpid(pid, &status, WNOHANG | WUNTRACED); r == 0)
+        break;
+      else if (r == -1) {
+        if (errno != EINTR)
+          syserr("waitpid");
+      }
+      else if (auto sig = propagate_termination_status(status); sig > 0) {
+        if (my_next_stop_sig > 0)
+          sig = std::exchange(my_next_stop_sig, 0);
+        raise(sig);
+      }
+    }
+
+    if ((my_next_stop_sig = drain_pipe()) > 0)
+      kill(pid, SIGSTOP);
+  }
+}
+
 // Implement PID 1 in the new namespace.  Only returns for PID 2.
 void
 Config::pid1(Fd stop_me)
@@ -733,20 +852,6 @@ try {
 
   prctl(PR_SET_NAME, "jai-init");
 
-  // We have to two distinct cases.  1) If our child process starts a
-  // new process group (e.g., an interactive shell), then when the
-  // child stops stops (e.g., the user runs "suspend"), we also need
-  // to stop.  When we are resumed, we need to resume the child and
-  // make it's process group the foreground process group (assuming we
-  // have a controlling terminal).
-  //
-  // 2) if the child process does not start a new process group, then
-  // we just let the outer shell handle any job control duties.  It's
-  // a bit strange that in the this case we are a member of process
-  // group that doesn't exist in our own PID namespace, but getpgid(0)
-  // just returns 0, which is good enough to differentiate it from an
-  // in-namespace pgid.
-
   // Note: getpgid is technicalaly not signal safe, but disassembling
   // glibc shows it doesn't do anything problematic other than maybe
   // change errno.  Conceivably there could be weirdness performing
@@ -760,6 +865,7 @@ try {
   tty = open("/dev/tty", O_RDWR | O_CLOEXEC);
 
   struct sigaction sa{};
+  sigemptyset(&sa.sa_mask);
   sigaddset(&sa.sa_mask, SIGTTOU);
   sa.sa_handler = +[](int sig) {
     int saved_errno = errno;
@@ -774,9 +880,8 @@ try {
     syserr("sigaction(SIGCONT)");
 
   for (;;) {
-    propagate_exit(pid, true);
-    if (auto pg = getpgid(main_child_pid); pg != my_pgid)
-      write(*stop_me, "", 1);
+    unsigned char sig = wait_propagate(pid);
+    write(*stop_me, &sig, 1);
   }
 } catch (const std::exception &e) {
   warn("{}", e.what());
@@ -827,46 +932,6 @@ try {
   _exit(1);
 }
 
-void
-Config::exec(int nsfd, char **argv)
-{
-  // This function is a bit annoying because the existing jai process
-  // cannot move to a new PID namespace, so we have to fork once.  But
-  // the forked process will have PID 1 and behave strangely (such as
-  // not receiving signals from within the PID namespace), so needs to
-  // fork again to run the actual jailed program with a normal PID.
-  // That means PID 1 has to propagate termination events of process 2
-  // to its own parent, which must then propagate them to the process
-  // than ran jai.
-  //
-  // Further complicating matters, PID 1 cannot stop itself (since it
-  // cannot receive a SIGSTOP from within the PID namespace).  Hence,
-  // if the jailed program stops, it uses a pipe to request that the
-  // original jai process stop it (from outside the PID namespace).
-  auto stop_me = xpipe();
-
-  if (auto pid = xfork(CLONE_NEWPID | CLONE_NEWIPC | CLONE_NEWNS)) {
-    // This is the last process in the old PID namespace
-    close(nsfd);
-    stop_me[1].reset();
-    char garbage[64];
-    // discard first write, whose only purpose is to test the parent
-    // didn't die before the child set PR_SET_PDEATSIG.
-    read(*stop_me[0], garbage, 1);
-    for (;;) {
-      if (read(*stop_me[0], garbage, sizeof(garbage)) > 0)
-        kill(pid, SIGSTOP);
-      raise(propagate_exit(pid, false));
-    }
-  }
-  stop_me[0].reset();
-
-  xsetns(nsfd, CLONE_NEWNS);
-  fix_proc();
-  pid1(std::move(stop_me[1]));
-  pid2(argv);
-}
-
 std::unique_ptr<Options>
 Config::opt_parser()
 {
jai.h
index 1b074f1..b3a2af6 100644
--- a/jai.h
+++ b/jai.h
@@ -50,6 +50,27 @@ xfork(std::uint64_t flags = 0)
   syserr("clone3");
 }
 
+/*
+inline bool
+is_pid_stopped(pid_t pid)
+{
+  auto fullstat = read_file(-1, std::format("/proc/{}/stat", pid));
+  auto pos = fullstat.rfind(')');
+  if (pos == fullstat.npos)
+    err("/proc/PID/stat: wrong format");
+  return (std::string_view(fullstat).substr(pos, 3) == ") T");
+}
+*/
+
+inline sigset_t
+sigsingleton(int sig)
+{
+  sigset_t ret;
+  sigemptyset(&ret);
+  sigaddset(&ret, sig);
+  return ret;
+}
+
 #define xsetns(fd, type)                                                       \
   do {                                                                         \
     if (setns(fd, type)) {                                                     \
@@ -112,7 +133,8 @@ struct Config {
   std::vector<const char *> make_env();
 
   static void fix_proc();
-  void pid1(Fd stop_me);
+  [[noreturn]] static void parent_loop(pid_t jai_init_pid, int stop_requests);
+  void static pid1(Fd stop_me);
   [[noreturn]] void pid2(char **argv);
 
   [[nodiscard]] static Defer asuser(const Credentials *crp);