Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Documentation by GPT-5.5

Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-04 06:26:46 -0400
Commit
ca8fa95550e5335c1ca91180bbda54803ff1ab39
src/ocaml/agent_snapshot.ml
index f7a774a..b04b543 100644
--- a/src/ocaml/agent_snapshot.ml
+++ b/src/ocaml/agent_snapshot.ml
@@ -694,9 +694,8 @@ let refresh_proc_cwd (pid : int) (proc : proc_state) : unit =
 
 (** Snapshot-specific syscall entry side effects; argument decoding lives in {!Ptrace.decode_syscall_entry}. *)
 let handle_syscall_entry (pid : int) (proc : proc_state) (regs : Ptrace.regs) : unit =
-  let read_arg i = Ptrace.read_string pid regs.Ptrace.args.(i) in
   let resolve ~dirfd path = resolve_path proc dirfd path in
-  let p = Ptrace.decode_syscall_entry ~resolve ~read_arg regs in
+  let p = Ptrace.decode_syscall_entry pid ~resolve regs in
   begin
     match p.nr with
     | nr when nr = Ptrace.Syscall.open_ -> if Ptrace.is_write_open p.flags then record_observation p.path_a "write"
src/ocaml/ptrace.ml
index 4d40029..2360cdf 100644
--- a/src/ocaml/ptrace.ml
+++ b/src/ocaml/ptrace.ml
@@ -77,8 +77,37 @@ type pending_syscall = {
   mutable flags : int;
 }
 
-(** Fill [pending_syscall] from registers at syscall entry; [read_arg] reads the tracee's C string at argument index [i]. *)
-let decode_syscall_entry ~(resolve : dirfd:int -> string -> string) ~(read_arg : int -> string) (regs : regs) : pending_syscall =
+external peek_word : pid -> int64 -> string = "as_peek_word"
+
+(** Read a NUL-terminated string from tracee memory, bounded so bad pointers cannot loop forever. *)
+let read_string (pid : pid) (address : int64) : string =
+  if Int64.equal address 0L then ""
+  else
+    let max_len = 65536 in
+    let word_size = Sys.word_size / 8 in
+    let buffer = Buffer.create 64 in
+    let rec loop offset =
+      if offset >= max_len then Buffer.contents buffer
+      else
+        match peek_word pid (Int64.add address (Int64.of_int offset)) with
+        | exception _ -> Buffer.contents buffer
+        | word ->
+            let rec scan i =
+              if i >= String.length word then loop (offset + word_size)
+              else
+                let c = word.[i] in
+                if Char.equal c '\000' then Buffer.contents buffer
+                else (
+                  Buffer.add_char buffer c;
+                  scan (i + 1))
+            in
+            scan 0
+    in
+    loop 0
+
+(** Fill [pending_syscall] from registers at syscall entry. *)
+let decode_syscall_entry (pid : pid) ~(resolve : dirfd:int -> string -> string) (regs : regs) : pending_syscall =
+  let read_arg i = read_string pid regs.args.(i) in
   let p =
     { nr = regs.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 }
   in
@@ -145,7 +174,6 @@ external setoptions : pid -> unit = "as_setoptions"
 external syscall : pid -> int -> unit = "as_syscall"
 external geteventmsg : pid -> int = "as_geteventmsg"
 external getregs_raw : pid -> int * int64 * int64 * int64 * int64 * int64 * int64 * int64 = "as_getregs"
-external peek_word : pid -> int64 -> string = "as_peek_word"
 external wait_raw : pid -> bool -> wait_stop = "as_wait"
 external const_sigtrap_sysgood : unit -> int = "as_const_sigtrap_sysgood"
 external const_sigtrap : unit -> int = "as_const_sigtrap"
@@ -161,32 +189,6 @@ let regs (pid : pid) : regs =
   let nr, a0, a1, a2, a3, a4, a5, result = getregs_raw pid in
   { syscall_nr = nr; args = [| a0; a1; a2; a3; a4; a5 |]; result }
 
-(** Read a NUL-terminated string from tracee memory, bounded so bad pointers cannot loop forever. *)
-let read_string (pid : pid) (address : int64) : string =
-  if Int64.equal address 0L then ""
-  else
-    let max_len = 65536 in
-    let word_size = Sys.word_size / 8 in
-    let buffer = Buffer.create 64 in
-    let rec loop offset =
-      if offset >= max_len then Buffer.contents buffer
-      else
-        match peek_word pid (Int64.add address (Int64.of_int offset)) with
-        | exception _ -> Buffer.contents buffer
-        | word ->
-            let rec scan i =
-              if i >= String.length word then loop (offset + word_size)
-              else
-                let c = word.[i] in
-                if Char.equal c '\000' then Buffer.contents buffer
-                else (
-                  Buffer.add_char buffer c;
-                  scan (i + 1))
-            in
-            scan 0
-    in
-    loop 0
-
 let wait_initial (pid : pid) : wait_stop = wait_raw pid false
 
 let wait_next () : wait_stop = wait_raw (-1) true
src/ocaml/ptrace.mli
index 56b3f8d..dd475aa 100644
--- a/src/ocaml/ptrace.mli
+++ b/src/ocaml/ptrace.mli
@@ -123,7 +123,7 @@ type pending_syscall = {
   mutable flags : int;
 }
 
-(** [decode_syscall_entry ~resolve ~read_arg regs] decodes the syscall-entry
+(** [decode_syscall_entry pid ~resolve regs] decodes the syscall-entry
     registers for filesystem operations known to the tracer.
 
     Call this when {!trace} emits [Syscall_enter (pid, regs)]. The returned
@@ -138,9 +138,8 @@ type pending_syscall = {
     {[
       Ptrace.trace command (function
         | Ptrace.Syscall_enter (pid, regs) ->
-            let read_arg i = Ptrace.read_string pid regs.Ptrace.args.(i) in
             let resolve ~dirfd path = resolve_path_for_process pid ~dirfd path in
-            let pending = Ptrace.decode_syscall_entry ~resolve ~read_arg regs in
+            let pending = Ptrace.decode_syscall_entry pid ~resolve regs in
             remember_pending_syscall pid pending
         | Ptrace.Syscall_exit (pid, regs) ->
             let pending = take_pending_syscall pid in
@@ -149,10 +148,9 @@ type pending_syscall = {
         | _ -> ())
     ]}
 
-    [read_arg i] reads syscall argument [i] as a NUL-terminated string from the
-    tracee. For path syscalls this is normally implemented with {!read_string}:
-    [fun i -> read_string pid regs.args.(i)]. It is a callback because only the
-    caller knows which [pid] and register snapshot are currently being decoded.
+    The [pid] argument identifies the tracee whose memory contains any string
+    pointer arguments. The decoder uses {!read_string} internally to read those
+    arguments before the tracee resumes.
 
     [resolve ~dirfd path] converts a path argument into the caller's canonical
     path representation. For absolute paths it can usually return [path]
@@ -166,7 +164,7 @@ type pending_syscall = {
     {!pending_syscall.args} are populated and whose derived fields keep their
     defaults. *)
 val decode_syscall_entry :
-  resolve:(dirfd:int -> string -> string) -> read_arg:(int -> string) -> regs -> pending_syscall
+  pid -> resolve:(dirfd:int -> string -> string) -> regs -> pending_syscall
 
 (** Trace events emitted by {!trace}.