Repositories / agent-snapshot.git
agent-snapshot.git
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
@@ -8,52 +8,6 @@ let utf8_string (s : string) : string = let jstr (s : string) : Json.t = `String (utf8_string s) -let at_fdcwd = -100 -let o_accmode = 0o3 -let o_rdonly = 0 -let o_wronly = 1 -let o_rdwr = 2 -let o_creat = 0o100 -let o_trunc = 0o1000 -let o_append = 0o2000 -let o_directory = 0o200000 -let f_dupfd = 0 -let f_dupfd_cloexec = 1030 - -module Syscall = struct - let access = 21 - let close = 3 - let creat = 85 - let dup = 32 - let dup2 = 33 - let dup3 = 292 - let faccessat = 269 - let faccessat2 = 439 - let fchdir = 81 - let fcntl = 72 - let ftruncate = 77 - let getdents = 78 - let getdents64 = 217 - let lstat = 6 - let mkdir = 83 - let mkdirat = 258 - let newfstatat = 262 - let open_ = 2 - let openat = 257 - let openat2 = 437 - let readlink = 89 - let readlinkat = 267 - let rename = 82 - let renameat = 264 - let renameat2 = 316 - let rmdir = 84 - let stat = 4 - let truncate = 76 - let unlink = 87 - let unlinkat = 263 - let chdir = 80 -end - type metadata = { mutable exists : bool; mutable tombstone : bool; @@ -85,20 +39,10 @@ type file_record = { mutable before_recorded : bool; } -type pending_syscall = { - nr : int; - args : int64 array; - mutable path_a : string; - mutable path_b : string; - mutable dirfd : int; - mutable fd : int; - mutable flags : int; -} - type proc_state = { mutable cwd : string; fds : (int, string) Hashtbl.t; - mutable pending : pending_syscall option; + mutable pending : Ptrace.pending_syscall option; } type repo_record = { @@ -471,18 +415,10 @@ let resolve_path (proc : proc_state) (dirfd : int) (path : string) : string = if is_absolute path then normalize_path path else let base = - if dirfd <> at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd else proc.cwd + if dirfd <> Ptrace.at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd else proc.cwd in concat_path base path -let is_write_open (flags : int) : bool = - let access = flags land o_accmode in - access = o_wronly || access = o_rdwr || flags land (o_creat lor o_trunc lor o_append) <> 0 - -let is_read_open (flags : int) : bool = - let access = flags land o_accmode in - access = o_rdonly || access = o_rdwr - let readlink_opt (path : string) : string option = try Some (Unix.readlink path) with Unix.Unix_error _ -> None (** Ask /proc where a successful fd points; this avoids reimplementing kernel path resolution. *) @@ -496,102 +432,72 @@ let refresh_proc_cwd (pid : int) (proc : proc_state) : unit = | Some target -> proc.cwd <- best_effort_canonical target | None -> () -let int_arg (regs : Ptrace.regs) (i : int) : int = Int64.to_int regs.Ptrace.args.(i) - -(** Decode syscall entry arguments while tracee pointers and pre-mutation filesystem state are still available. *) +(** Snapshot-specific syscall entry side effects; argument decoding lives in {!Ptrace.decode_syscall_entry}. *) let handle_syscall_entry (pid : int) (proc : proc_state) (regs : Ptrace.regs) : unit = - let p = { nr = regs.Ptrace.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 } in - let tracee_string i = Ptrace.read_string pid regs.args.(i) in + let read_arg i = Ptrace.read_string pid regs.Ptrace.args.(i) in + let resolve ~dirfd path = resolve_path proc dirfd path in + let p = Ptrace.decode_syscall_entry ~resolve ~read_arg regs in begin match p.nr with - | nr when nr = Syscall.open_ -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0); - p.flags <- int_arg regs 1; - if is_write_open p.flags then record_observation p.path_a "write" - | nr when nr = Syscall.openat || nr = Syscall.openat2 -> - p.dirfd <- int_arg regs 0; - p.path_a <- resolve_path proc p.dirfd (tracee_string 1); - p.flags <- int_arg regs 2; - if is_write_open p.flags then record_observation p.path_a "write" - | nr when nr = Syscall.creat -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0); - p.flags <- o_creat lor o_wronly lor o_trunc; - record_observation p.path_a "write" - | nr when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.access || nr = Syscall.readlink -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0) - | nr when nr = Syscall.newfstatat || nr = Syscall.faccessat || nr = Syscall.faccessat2 || nr = Syscall.readlinkat -> - p.dirfd <- int_arg regs 0; - p.path_a <- resolve_path proc p.dirfd (tracee_string 1) - | nr when nr = Syscall.unlink || nr = Syscall.rmdir -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0); - record_observation p.path_a "delete" - | nr when nr = Syscall.unlinkat || nr = Syscall.mkdirat -> - p.dirfd <- int_arg regs 0; - p.path_a <- resolve_path proc p.dirfd (tracee_string 1); - if p.nr = Syscall.unlinkat then record_observation p.path_a "delete" - | nr when nr = Syscall.mkdir || nr = Syscall.chdir || nr = Syscall.truncate -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0) - | nr when nr = Syscall.rename -> - p.path_a <- resolve_path proc at_fdcwd (tracee_string 0); - p.path_b <- resolve_path proc at_fdcwd (tracee_string 1); + | nr when nr = Ptrace.Syscall.open_ -> if Ptrace.is_write_open p.flags then record_observation p.path_a "write" + | nr when nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2 -> + if Ptrace.is_write_open p.flags then record_observation p.path_a "write" + | nr when nr = Ptrace.Syscall.creat -> record_observation p.path_a "write" + | nr when nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.rmdir -> record_observation p.path_a "delete" + | nr when nr = Ptrace.Syscall.unlinkat -> record_observation p.path_a "delete" + | nr when nr = Ptrace.Syscall.rename -> record_observation p.path_a "delete"; record_observation p.path_b "write" - | nr when nr = Syscall.renameat || nr = Syscall.renameat2 -> - p.path_a <- resolve_path proc (int_arg regs 0) (tracee_string 1); - p.path_b <- resolve_path proc (int_arg regs 2) (tracee_string 3); + | nr when nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2 -> record_observation p.path_a "delete"; record_observation p.path_b "write" - | nr when nr = Syscall.getdents || nr = Syscall.getdents64 || nr = Syscall.fchdir || nr = Syscall.ftruncate -> - p.fd <- int_arg regs 0 - | nr when nr = Syscall.close || nr = Syscall.dup || nr = Syscall.dup2 || nr = Syscall.dup3 || nr = Syscall.fcntl -> - p.fd <- int_arg regs 0 | _ -> () end; proc.pending <- Some p -let syscall_ok (result : int64) : bool = Int64.compare result 0L >= 0 - (** Interpret syscall results, update fd/cwd state, and record observations that depend on success or return fd. *) let handle_syscall_exit (pid : int) (proc : proc_state) (regs : Ptrace.regs) : unit = match proc.pending with | None -> () | Some p -> - let ok = syscall_ok regs.Ptrace.result in + let ok = Ptrace.syscall_ok regs.Ptrace.result in begin match p.nr with - | nr when nr = Syscall.open_ || nr = Syscall.openat || nr = Syscall.openat2 || nr = Syscall.creat -> + | nr when nr = Ptrace.Syscall.open_ || nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2 || nr = Ptrace.Syscall.creat -> if ok then ( - if is_read_open p.flags then record_observation p.path_a "read"; - if is_write_open p.flags then record_observation p.path_a "write"; - if p.flags land o_directory <> 0 then record_observation p.path_a "directory"; - refresh_proc_fd pid proc (Int64.to_int regs.result)) + if Ptrace.is_read_open p.flags then record_observation p.path_a "read"; + if Ptrace.is_write_open p.flags then record_observation p.path_a "write"; + if p.flags land Ptrace.o_directory <> 0 then record_observation p.path_a "directory"; + refresh_proc_fd pid proc (Int64.to_int regs.Ptrace.result)) else record_observation p.path_a "existence" | nr - when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.newfstatat || nr = Syscall.access || nr = Syscall.faccessat - || nr = Syscall.faccessat2 || nr = Syscall.readlink || nr = Syscall.readlinkat -> + when nr = Ptrace.Syscall.stat || nr = Ptrace.Syscall.lstat || nr = Ptrace.Syscall.newfstatat || nr = Ptrace.Syscall.access + || nr = Ptrace.Syscall.faccessat || nr = Ptrace.Syscall.faccessat2 || nr = Ptrace.Syscall.readlink + || nr = Ptrace.Syscall.readlinkat -> record_observation p.path_a "existence" - | nr when nr = Syscall.getdents || nr = Syscall.getdents64 -> + | nr when nr = Ptrace.Syscall.getdents || nr = Ptrace.Syscall.getdents64 -> if ok && p.fd >= 0 then Option.iter (fun path -> record_observation path "directory") (Hashtbl.find_opt proc.fds p.fd) - | nr when nr = Syscall.unlink || nr = Syscall.unlinkat || nr = Syscall.rmdir -> + | nr when nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.unlinkat || nr = Ptrace.Syscall.rmdir -> record_observation p.path_a "delete" - | nr when nr = Syscall.rename || nr = Syscall.renameat || nr = Syscall.renameat2 -> + | nr when nr = Ptrace.Syscall.rename || nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2 -> record_observation p.path_a "delete"; record_observation p.path_b "write" - | nr when nr = Syscall.mkdir || nr = Syscall.mkdirat || nr = Syscall.truncate -> + | nr when nr = Ptrace.Syscall.mkdir || nr = Ptrace.Syscall.mkdirat || nr = Ptrace.Syscall.truncate -> record_observation p.path_a "write" - | nr when nr = Syscall.ftruncate -> + | nr when nr = Ptrace.Syscall.ftruncate -> if p.fd >= 0 then Option.iter (fun path -> record_observation path "write") (Hashtbl.find_opt proc.fds p.fd) - | nr when nr = Syscall.chdir || nr = Syscall.fchdir -> + | nr when nr = Ptrace.Syscall.chdir || nr = Ptrace.Syscall.fchdir -> if ok then refresh_proc_cwd pid proc - | nr when nr = Syscall.close -> + | nr when nr = Ptrace.Syscall.close -> if ok then Hashtbl.remove proc.fds p.fd - | nr when nr = Syscall.dup -> - if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.result) path) (Hashtbl.find_opt proc.fds p.fd) - | nr when nr = Syscall.dup2 || nr = Syscall.dup3 -> + | nr when nr = Ptrace.Syscall.dup -> + if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd) + | nr when nr = Ptrace.Syscall.dup2 || nr = Ptrace.Syscall.dup3 -> if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int p.args.(1)) path) (Hashtbl.find_opt proc.fds p.fd) - | nr when nr = Syscall.fcntl -> - if ok && (Int64.to_int p.args.(1) = f_dupfd || Int64.to_int p.args.(1) = f_dupfd_cloexec) then - Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.result) path) (Hashtbl.find_opt proc.fds p.fd) + | nr when nr = Ptrace.Syscall.fcntl -> + if ok + && (Int64.to_int p.args.(1) = Ptrace.f_dupfd || Int64.to_int p.args.(1) = Ptrace.f_dupfd_cloexec) + then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd) | _ -> () end
@@ -6,6 +6,120 @@ type regs = { result : int64; } +(** [AT_FDCWD] for *at() syscalls on x86_64 Linux. *) +let at_fdcwd = -100 + +module Syscall = struct + let access = 21 + let close = 3 + let creat = 85 + let dup = 32 + let dup2 = 33 + let dup3 = 292 + let faccessat = 269 + let faccessat2 = 439 + let fchdir = 81 + let fcntl = 72 + let ftruncate = 77 + let getdents = 78 + let getdents64 = 217 + let lstat = 6 + let mkdir = 83 + let mkdirat = 258 + let newfstatat = 262 + let open_ = 2 + let openat = 257 + let openat2 = 437 + let readlink = 89 + let readlinkat = 267 + let rename = 82 + let renameat = 264 + let renameat2 = 316 + let rmdir = 84 + let stat = 4 + let truncate = 76 + let unlink = 87 + let unlinkat = 263 + let chdir = 80 +end + +let o_accmode = 0o3 +let o_rdonly = 0 +let o_wronly = 1 +let o_rdwr = 2 +let o_creat = 0o100 +let o_trunc = 0o1000 +let o_append = 0o2000 +let o_directory = 0o200000 +let f_dupfd = 0 +let f_dupfd_cloexec = 1030 + +let int_arg (regs : regs) (i : int) : int = Int64.to_int regs.args.(i) + +let syscall_ok (result : int64) : bool = Int64.compare result 0L >= 0 + +let is_write_open (flags : int) : bool = + let access = flags land o_accmode in + access = o_wronly || access = o_rdwr || flags land (o_creat lor o_trunc lor o_append) <> 0 + +let is_read_open (flags : int) : bool = + let access = flags land o_accmode in + access = o_rdonly || access = o_rdwr + +(** Register snapshot between syscall enter and exit for path-oriented syscalls. *) +type pending_syscall = { + nr : int; + args : int64 array; + mutable path_a : string; + mutable path_b : string; + mutable dirfd : int; + mutable fd : int; + mutable flags : int; +} + +(** Fill [pending_syscall] from registers at syscall entry; [read_arg] reads the tracee's C string at argument index [i]. *) +let decode_syscall_entry ~(resolve : dirfd:int -> string -> string) ~(read_arg : int -> string) (regs : regs) : pending_syscall = + let p = + { nr = regs.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 } + in + begin + match p.nr with + | nr when nr = Syscall.open_ -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0); + p.flags <- int_arg regs 1 + | nr when nr = Syscall.openat || nr = Syscall.openat2 -> + p.dirfd <- int_arg regs 0; + p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1); + p.flags <- int_arg regs 2 + | nr when nr = Syscall.creat -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0); + p.flags <- o_creat lor o_wronly lor o_trunc + | nr when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.access || nr = Syscall.readlink -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0) + | nr when nr = Syscall.newfstatat || nr = Syscall.faccessat || nr = Syscall.faccessat2 || nr = Syscall.readlinkat -> + p.dirfd <- int_arg regs 0; + p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1) + | nr when nr = Syscall.unlink || nr = Syscall.rmdir -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0) + | nr when nr = Syscall.unlinkat || nr = Syscall.mkdirat -> + p.dirfd <- int_arg regs 0; + p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1) + | nr when nr = Syscall.mkdir || nr = Syscall.chdir || nr = Syscall.truncate -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0) + | nr when nr = Syscall.rename -> + p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0); + p.path_b <- resolve ~dirfd:at_fdcwd (read_arg 1) + | nr when nr = Syscall.renameat || nr = Syscall.renameat2 -> + p.path_a <- resolve ~dirfd:(int_arg regs 0) (read_arg 1); + p.path_b <- resolve ~dirfd:(int_arg regs 2) (read_arg 3) + | nr when nr = Syscall.getdents || nr = Syscall.getdents64 || nr = Syscall.fchdir || nr = Syscall.ftruncate -> + p.fd <- int_arg regs 0 + | nr when nr = Syscall.close || nr = Syscall.dup || nr = Syscall.dup2 || nr = Syscall.dup3 || nr = Syscall.fcntl -> + p.fd <- int_arg regs 0 + | _ -> () + end; + p + type wait_stop = | Exited of pid * int | Signaled of pid * int