Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Move syscall decode helpers into ptrace.ml

Centralize x86_64 syscall numbers, open/fcntl flags, pending_syscall,
decode_syscall_entry, and helpers (int_arg, syscall_ok, is_*_open) in
Ptrace; agent_snapshot keeps resolve_path and snapshot-specific enter/exit
record_observation logic.

Co-authored-by: Cursor <cursoragent@cursor.com>
Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-03 04:15:02 -0400
Commit
fd8e1fd55d9e1f3e2eac68bf741ecebd89268e84
src/ocaml/agent_snapshot.ml
index dbed0e6..f7adf07 100644
--- a/src/ocaml/agent_snapshot.ml
+++ b/src/ocaml/agent_snapshot.ml
@@ -8,52 +8,6 @@ let utf8_string (s : string) : string =
 
 let jstr (s : string) : Json.t = `String (utf8_string s)
 
-let at_fdcwd = -100
-let o_accmode = 0o3
-let o_rdonly = 0
-let o_wronly = 1
-let o_rdwr = 2
-let o_creat = 0o100
-let o_trunc = 0o1000
-let o_append = 0o2000
-let o_directory = 0o200000
-let f_dupfd = 0
-let f_dupfd_cloexec = 1030
-
-module Syscall = struct
-  let access = 21
-  let close = 3
-  let creat = 85
-  let dup = 32
-  let dup2 = 33
-  let dup3 = 292
-  let faccessat = 269
-  let faccessat2 = 439
-  let fchdir = 81
-  let fcntl = 72
-  let ftruncate = 77
-  let getdents = 78
-  let getdents64 = 217
-  let lstat = 6
-  let mkdir = 83
-  let mkdirat = 258
-  let newfstatat = 262
-  let open_ = 2
-  let openat = 257
-  let openat2 = 437
-  let readlink = 89
-  let readlinkat = 267
-  let rename = 82
-  let renameat = 264
-  let renameat2 = 316
-  let rmdir = 84
-  let stat = 4
-  let truncate = 76
-  let unlink = 87
-  let unlinkat = 263
-  let chdir = 80
-end
-
 type metadata = {
   mutable exists : bool;
   mutable tombstone : bool;
@@ -85,20 +39,10 @@ type file_record = {
   mutable before_recorded : bool;
 }
 
-type pending_syscall = {
-  nr : int;
-  args : int64 array;
-  mutable path_a : string;
-  mutable path_b : string;
-  mutable dirfd : int;
-  mutable fd : int;
-  mutable flags : int;
-}
-
 type proc_state = {
   mutable cwd : string;
   fds : (int, string) Hashtbl.t;
-  mutable pending : pending_syscall option;
+  mutable pending : Ptrace.pending_syscall option;
 }
 
 type repo_record = {
@@ -471,18 +415,10 @@ let resolve_path (proc : proc_state) (dirfd : int) (path : string) : string =
   if is_absolute path then normalize_path path
   else
     let base =
-      if dirfd <> at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd else proc.cwd
+      if dirfd <> Ptrace.at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd else proc.cwd
     in
     concat_path base path
 
-let is_write_open (flags : int) : bool =
-  let access = flags land o_accmode in
-  access = o_wronly || access = o_rdwr || flags land (o_creat lor o_trunc lor o_append) <> 0
-
-let is_read_open (flags : int) : bool =
-  let access = flags land o_accmode in
-  access = o_rdonly || access = o_rdwr
-
 let readlink_opt (path : string) : string option = try Some (Unix.readlink path) with Unix.Unix_error _ -> None
 
 (** Ask /proc where a successful fd points; this avoids reimplementing kernel path resolution. *)
@@ -496,102 +432,72 @@ let refresh_proc_cwd (pid : int) (proc : proc_state) : unit =
   | Some target -> proc.cwd <- best_effort_canonical target
   | None -> ()
 
-let int_arg (regs : Ptrace.regs) (i : int) : int = Int64.to_int regs.Ptrace.args.(i)
-
-(** Decode syscall entry arguments while tracee pointers and pre-mutation filesystem state are still available. *)
+(** Snapshot-specific syscall entry side effects; argument decoding lives in {!Ptrace.decode_syscall_entry}. *)
 let handle_syscall_entry (pid : int) (proc : proc_state) (regs : Ptrace.regs) : unit =
-  let p = { nr = regs.Ptrace.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 } in
-  let tracee_string i = Ptrace.read_string pid regs.args.(i) in
+  let read_arg i = Ptrace.read_string pid regs.Ptrace.args.(i) in
+  let resolve ~dirfd path = resolve_path proc dirfd path in
+  let p = Ptrace.decode_syscall_entry ~resolve ~read_arg regs in
   begin
     match p.nr with
-    | nr when nr = Syscall.open_ ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0);
-        p.flags <- int_arg regs 1;
-        if is_write_open p.flags then record_observation p.path_a "write"
-    | nr when nr = Syscall.openat || nr = Syscall.openat2 ->
-        p.dirfd <- int_arg regs 0;
-        p.path_a <- resolve_path proc p.dirfd (tracee_string 1);
-        p.flags <- int_arg regs 2;
-        if is_write_open p.flags then record_observation p.path_a "write"
-    | nr when nr = Syscall.creat ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0);
-        p.flags <- o_creat lor o_wronly lor o_trunc;
-        record_observation p.path_a "write"
-    | nr when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.access || nr = Syscall.readlink ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0)
-    | nr when nr = Syscall.newfstatat || nr = Syscall.faccessat || nr = Syscall.faccessat2 || nr = Syscall.readlinkat ->
-        p.dirfd <- int_arg regs 0;
-        p.path_a <- resolve_path proc p.dirfd (tracee_string 1)
-    | nr when nr = Syscall.unlink || nr = Syscall.rmdir ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0);
-        record_observation p.path_a "delete"
-    | nr when nr = Syscall.unlinkat || nr = Syscall.mkdirat ->
-        p.dirfd <- int_arg regs 0;
-        p.path_a <- resolve_path proc p.dirfd (tracee_string 1);
-        if p.nr = Syscall.unlinkat then record_observation p.path_a "delete"
-    | nr when nr = Syscall.mkdir || nr = Syscall.chdir || nr = Syscall.truncate ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0)
-    | nr when nr = Syscall.rename ->
-        p.path_a <- resolve_path proc at_fdcwd (tracee_string 0);
-        p.path_b <- resolve_path proc at_fdcwd (tracee_string 1);
+    | nr when nr = Ptrace.Syscall.open_ -> if Ptrace.is_write_open p.flags then record_observation p.path_a "write"
+    | nr when nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2 ->
+        if Ptrace.is_write_open p.flags then record_observation p.path_a "write"
+    | nr when nr = Ptrace.Syscall.creat -> record_observation p.path_a "write"
+    | nr when nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.rmdir -> record_observation p.path_a "delete"
+    | nr when nr = Ptrace.Syscall.unlinkat -> record_observation p.path_a "delete"
+    | nr when nr = Ptrace.Syscall.rename ->
         record_observation p.path_a "delete";
         record_observation p.path_b "write"
-    | nr when nr = Syscall.renameat || nr = Syscall.renameat2 ->
-        p.path_a <- resolve_path proc (int_arg regs 0) (tracee_string 1);
-        p.path_b <- resolve_path proc (int_arg regs 2) (tracee_string 3);
+    | nr when nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2 ->
         record_observation p.path_a "delete";
         record_observation p.path_b "write"
-    | nr when nr = Syscall.getdents || nr = Syscall.getdents64 || nr = Syscall.fchdir || nr = Syscall.ftruncate ->
-        p.fd <- int_arg regs 0
-    | nr when nr = Syscall.close || nr = Syscall.dup || nr = Syscall.dup2 || nr = Syscall.dup3 || nr = Syscall.fcntl ->
-        p.fd <- int_arg regs 0
     | _ -> ()
   end;
   proc.pending <- Some p
 
-let syscall_ok (result : int64) : bool = Int64.compare result 0L >= 0
-
 (** Interpret syscall results, update fd/cwd state, and record observations that depend on success or return fd. *)
 let handle_syscall_exit (pid : int) (proc : proc_state) (regs : Ptrace.regs) : unit =
   match proc.pending with
   | None -> ()
   | Some p ->
-      let ok = syscall_ok regs.Ptrace.result in
+      let ok = Ptrace.syscall_ok regs.Ptrace.result in
       begin
         match p.nr with
-        | nr when nr = Syscall.open_ || nr = Syscall.openat || nr = Syscall.openat2 || nr = Syscall.creat ->
+        | nr when nr = Ptrace.Syscall.open_ || nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2 || nr = Ptrace.Syscall.creat ->
             if ok then (
-              if is_read_open p.flags then record_observation p.path_a "read";
-              if is_write_open p.flags then record_observation p.path_a "write";
-              if p.flags land o_directory <> 0 then record_observation p.path_a "directory";
-              refresh_proc_fd pid proc (Int64.to_int regs.result))
+              if Ptrace.is_read_open p.flags then record_observation p.path_a "read";
+              if Ptrace.is_write_open p.flags then record_observation p.path_a "write";
+              if p.flags land Ptrace.o_directory <> 0 then record_observation p.path_a "directory";
+              refresh_proc_fd pid proc (Int64.to_int regs.Ptrace.result))
             else record_observation p.path_a "existence"
         | nr
-          when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.newfstatat || nr = Syscall.access || nr = Syscall.faccessat
-               || nr = Syscall.faccessat2 || nr = Syscall.readlink || nr = Syscall.readlinkat ->
+          when nr = Ptrace.Syscall.stat || nr = Ptrace.Syscall.lstat || nr = Ptrace.Syscall.newfstatat || nr = Ptrace.Syscall.access
+               || nr = Ptrace.Syscall.faccessat || nr = Ptrace.Syscall.faccessat2 || nr = Ptrace.Syscall.readlink
+               || nr = Ptrace.Syscall.readlinkat ->
             record_observation p.path_a "existence"
-        | nr when nr = Syscall.getdents || nr = Syscall.getdents64 ->
+        | nr when nr = Ptrace.Syscall.getdents || nr = Ptrace.Syscall.getdents64 ->
             if ok && p.fd >= 0 then Option.iter (fun path -> record_observation path "directory") (Hashtbl.find_opt proc.fds p.fd)
-        | nr when nr = Syscall.unlink || nr = Syscall.unlinkat || nr = Syscall.rmdir ->
+        | nr when nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.unlinkat || nr = Ptrace.Syscall.rmdir ->
             record_observation p.path_a "delete"
-        | nr when nr = Syscall.rename || nr = Syscall.renameat || nr = Syscall.renameat2 ->
+        | nr when nr = Ptrace.Syscall.rename || nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2 ->
             record_observation p.path_a "delete";
             record_observation p.path_b "write"
-        | nr when nr = Syscall.mkdir || nr = Syscall.mkdirat || nr = Syscall.truncate ->
+        | nr when nr = Ptrace.Syscall.mkdir || nr = Ptrace.Syscall.mkdirat || nr = Ptrace.Syscall.truncate ->
             record_observation p.path_a "write"
-        | nr when nr = Syscall.ftruncate ->
+        | nr when nr = Ptrace.Syscall.ftruncate ->
             if p.fd >= 0 then Option.iter (fun path -> record_observation path "write") (Hashtbl.find_opt proc.fds p.fd)
-        | nr when nr = Syscall.chdir || nr = Syscall.fchdir ->
+        | nr when nr = Ptrace.Syscall.chdir || nr = Ptrace.Syscall.fchdir ->
             if ok then refresh_proc_cwd pid proc
-        | nr when nr = Syscall.close ->
+        | nr when nr = Ptrace.Syscall.close ->
             if ok then Hashtbl.remove proc.fds p.fd
-        | nr when nr = Syscall.dup ->
-            if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.result) path) (Hashtbl.find_opt proc.fds p.fd)
-        | nr when nr = Syscall.dup2 || nr = Syscall.dup3 ->
+        | nr when nr = Ptrace.Syscall.dup ->
+            if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd)
+        | nr when nr = Ptrace.Syscall.dup2 || nr = Ptrace.Syscall.dup3 ->
             if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int p.args.(1)) path) (Hashtbl.find_opt proc.fds p.fd)
-        | nr when nr = Syscall.fcntl ->
-            if ok && (Int64.to_int p.args.(1) = f_dupfd || Int64.to_int p.args.(1) = f_dupfd_cloexec) then
-              Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.result) path) (Hashtbl.find_opt proc.fds p.fd)
+        | nr when nr = Ptrace.Syscall.fcntl ->
+            if ok
+               && (Int64.to_int p.args.(1) = Ptrace.f_dupfd || Int64.to_int p.args.(1) = Ptrace.f_dupfd_cloexec)
+            then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd)
         | _ -> ()
       end
 
src/ocaml/ptrace.ml
index 86c8be4..01ae194 100644
--- a/src/ocaml/ptrace.ml
+++ b/src/ocaml/ptrace.ml
@@ -6,6 +6,120 @@ type regs = {
   result : int64;
 }
 
+(** [AT_FDCWD] for *at() syscalls on x86_64 Linux. *)
+let at_fdcwd = -100
+
+module Syscall = struct
+  let access = 21
+  let close = 3
+  let creat = 85
+  let dup = 32
+  let dup2 = 33
+  let dup3 = 292
+  let faccessat = 269
+  let faccessat2 = 439
+  let fchdir = 81
+  let fcntl = 72
+  let ftruncate = 77
+  let getdents = 78
+  let getdents64 = 217
+  let lstat = 6
+  let mkdir = 83
+  let mkdirat = 258
+  let newfstatat = 262
+  let open_ = 2
+  let openat = 257
+  let openat2 = 437
+  let readlink = 89
+  let readlinkat = 267
+  let rename = 82
+  let renameat = 264
+  let renameat2 = 316
+  let rmdir = 84
+  let stat = 4
+  let truncate = 76
+  let unlink = 87
+  let unlinkat = 263
+  let chdir = 80
+end
+
+let o_accmode = 0o3
+let o_rdonly = 0
+let o_wronly = 1
+let o_rdwr = 2
+let o_creat = 0o100
+let o_trunc = 0o1000
+let o_append = 0o2000
+let o_directory = 0o200000
+let f_dupfd = 0
+let f_dupfd_cloexec = 1030
+
+let int_arg (regs : regs) (i : int) : int = Int64.to_int regs.args.(i)
+
+let syscall_ok (result : int64) : bool = Int64.compare result 0L >= 0
+
+let is_write_open (flags : int) : bool =
+  let access = flags land o_accmode in
+  access = o_wronly || access = o_rdwr || flags land (o_creat lor o_trunc lor o_append) <> 0
+
+let is_read_open (flags : int) : bool =
+  let access = flags land o_accmode in
+  access = o_rdonly || access = o_rdwr
+
+(** Register snapshot between syscall enter and exit for path-oriented syscalls. *)
+type pending_syscall = {
+  nr : int;
+  args : int64 array;
+  mutable path_a : string;
+  mutable path_b : string;
+  mutable dirfd : int;
+  mutable fd : int;
+  mutable flags : int;
+}
+
+(** Fill [pending_syscall] from registers at syscall entry; [read_arg] reads the tracee's C string at argument index [i]. *)
+let decode_syscall_entry ~(resolve : dirfd:int -> string -> string) ~(read_arg : int -> string) (regs : regs) : pending_syscall =
+  let p =
+    { nr = regs.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 }
+  in
+  begin
+    match p.nr with
+    | nr when nr = Syscall.open_ ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
+        p.flags <- int_arg regs 1
+    | nr when nr = Syscall.openat || nr = Syscall.openat2 ->
+        p.dirfd <- int_arg regs 0;
+        p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1);
+        p.flags <- int_arg regs 2
+    | nr when nr = Syscall.creat ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
+        p.flags <- o_creat lor o_wronly lor o_trunc
+    | nr when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.access || nr = Syscall.readlink ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
+    | nr when nr = Syscall.newfstatat || nr = Syscall.faccessat || nr = Syscall.faccessat2 || nr = Syscall.readlinkat ->
+        p.dirfd <- int_arg regs 0;
+        p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1)
+    | nr when nr = Syscall.unlink || nr = Syscall.rmdir ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
+    | nr when nr = Syscall.unlinkat || nr = Syscall.mkdirat ->
+        p.dirfd <- int_arg regs 0;
+        p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1)
+    | nr when nr = Syscall.mkdir || nr = Syscall.chdir || nr = Syscall.truncate ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
+    | nr when nr = Syscall.rename ->
+        p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
+        p.path_b <- resolve ~dirfd:at_fdcwd (read_arg 1)
+    | nr when nr = Syscall.renameat || nr = Syscall.renameat2 ->
+        p.path_a <- resolve ~dirfd:(int_arg regs 0) (read_arg 1);
+        p.path_b <- resolve ~dirfd:(int_arg regs 2) (read_arg 3)
+    | nr when nr = Syscall.getdents || nr = Syscall.getdents64 || nr = Syscall.fchdir || nr = Syscall.ftruncate ->
+        p.fd <- int_arg regs 0
+    | nr when nr = Syscall.close || nr = Syscall.dup || nr = Syscall.dup2 || nr = Syscall.dup3 || nr = Syscall.fcntl ->
+        p.fd <- int_arg regs 0
+    | _ -> ()
+  end;
+  p
+
 type wait_stop =
   | Exited of pid * int
   | Signaled of pid * int