Repositories / agent-snapshot.git

src/ocaml/ptrace_easy.ml

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch
8454 bytes · 0bbc150168ac
type pid = int type open_intent = { read : bool; write : bool; directory_only : bool; } type event = | Pre_open of { pid : pid; path : string; intent : open_intent } | Post_open of { pid : pid; path : string; intent : open_intent; ok : bool } | Pre_unlink of { pid : pid; path : string } | Post_unlink of { pid : pid; path : string; ok : bool } | Pre_rename of { pid : pid; src : string; dst : string } | Post_rename of { pid : pid; src : string; dst : string; ok : bool } | Stat_like of { pid : pid; path : string } | Read_dir of { pid : pid; path : string } | Make_dir of { pid : pid; path : string } | Truncate of { pid : pid; path : string } | Fork of { parent : pid; child : pid } | Process_exit of pid (** Per-tracee state needed to resolve paths and pair syscall entry with exit. *) type proc_state = { mutable cwd : string; fds : (int, string) Hashtbl.t; mutable pending : Ptrace.pending_syscall option; } let normalize_path (path : string) : string = if path = "" then "." else Fpath.to_string (Fpath.normalize (Fpath.v path)) let concat_path (base : string) (path : string) : string = if path = "" then base else Fpath.to_string (Fpath.normalize (Fpath.append (Fpath.v base) (Fpath.v path))) let is_absolute (path : string) : bool = path <> "" && Fpath.is_abs (Fpath.v path) let readlink_opt (path : string) : string option = try Some (Unix.readlink path) with Unix.Unix_error _ -> None (** Resolve a syscall path argument against per-process cwd or a directory fd. *) let resolve_path (proc : proc_state) (dirfd : int) (path : string) : string = if is_absolute path then normalize_path path else let base = if dirfd <> Ptrace.at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd else proc.cwd in concat_path base path (** Ask /proc where a successful fd points. The kernel knows the canonical path; reimplementing path resolution here would be duplicative and racy. *) let refresh_proc_fd (pid : pid) (proc : proc_state) (fd : int) : unit = match readlink_opt (Printf.sprintf "/proc/%d/fd/%d" pid fd) with | Some target when is_absolute target -> Hashtbl.replace proc.fds fd (normalize_path target) | _ -> () let refresh_proc_cwd (pid : pid) (proc : proc_state) : unit = match readlink_opt (Printf.sprintf "/proc/%d/cwd" pid) with | Some target -> proc.cwd <- normalize_path target | None -> () let new_proc_state () : proc_state = { cwd = FileUtil.pwd (); fds = Hashtbl.create 8; pending = None } let clone_proc_state (state : proc_state) : proc_state = let fds = Hashtbl.create (Hashtbl.length state.fds) in Hashtbl.iter (fun fd path -> Hashtbl.add fds fd path) state.fds; { cwd = state.cwd; fds; pending = state.pending } let intent_of_flags (flags : int) : open_intent = { read = Ptrace.is_read_open flags; write = Ptrace.is_write_open flags; directory_only = flags land Ptrace.o_directory <> 0; } let is_open_syscall (nr : int) : bool = nr = Ptrace.Syscall.open_ || nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2 || nr = Ptrace.Syscall.creat let is_unlink_syscall (nr : int) : bool = nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.rmdir || nr = Ptrace.Syscall.unlinkat let is_rename_syscall (nr : int) : bool = nr = Ptrace.Syscall.rename || nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2 let is_stat_like_syscall (nr : int) : bool = nr = Ptrace.Syscall.stat || nr = Ptrace.Syscall.lstat || nr = Ptrace.Syscall.newfstatat || nr = Ptrace.Syscall.access || nr = Ptrace.Syscall.faccessat || nr = Ptrace.Syscall.faccessat2 || nr = Ptrace.Syscall.readlink || nr = Ptrace.Syscall.readlinkat (** Snapshot path-oriented arguments at entry and emit Pre_* events for syscalls about to mutate the filesystem. *) let handle_syscall_entry (pid : pid) (proc : proc_state) (regs : Ptrace.regs) (emit : event -> unit) : unit = let resolve ~dirfd path = resolve_path proc dirfd path in let p = Ptrace.decode_syscall_entry pid ~resolve regs in begin match p.nr with | nr when is_open_syscall nr -> if Ptrace.is_write_open p.flags then emit (Pre_open { pid; path = p.path_a; intent = intent_of_flags p.flags }) | nr when is_unlink_syscall nr -> emit (Pre_unlink { pid; path = p.path_a }) | nr when is_rename_syscall nr -> emit (Pre_rename { pid; src = p.path_a; dst = p.path_b }) | _ -> () end; proc.pending <- Some p (** Emit Post_* / observation events at exit and update fd and cwd state. *) let handle_syscall_exit (pid : pid) (proc : proc_state) (regs : Ptrace.regs) (emit : event -> unit) : unit = match proc.pending with | None -> () | Some p -> let ok = Ptrace.syscall_ok regs.Ptrace.result in begin match p.nr with | nr when is_open_syscall nr -> let intent = intent_of_flags p.flags in emit (Post_open { pid; path = p.path_a; intent; ok }); if ok then refresh_proc_fd pid proc (Int64.to_int regs.Ptrace.result) | nr when is_stat_like_syscall nr -> emit (Stat_like { pid; path = p.path_a }) | nr when nr = Ptrace.Syscall.getdents || nr = Ptrace.Syscall.getdents64 -> if ok && p.fd >= 0 then Option.iter (fun path -> emit (Read_dir { pid; path })) (Hashtbl.find_opt proc.fds p.fd) | nr when is_unlink_syscall nr -> emit (Post_unlink { pid; path = p.path_a; ok }) | nr when is_rename_syscall nr -> emit (Post_rename { pid; src = p.path_a; dst = p.path_b; ok }) | nr when nr = Ptrace.Syscall.mkdir || nr = Ptrace.Syscall.mkdirat -> if ok then emit (Make_dir { pid; path = p.path_a }) | nr when nr = Ptrace.Syscall.truncate -> if ok then emit (Truncate { pid; path = p.path_a }) | nr when nr = Ptrace.Syscall.ftruncate -> if ok && p.fd >= 0 then Option.iter (fun path -> emit (Truncate { pid; path })) (Hashtbl.find_opt proc.fds p.fd) | nr when nr = Ptrace.Syscall.chdir || nr = Ptrace.Syscall.fchdir -> if ok then refresh_proc_cwd pid proc | nr when nr = Ptrace.Syscall.close -> if ok then Hashtbl.remove proc.fds p.fd | nr when nr = Ptrace.Syscall.dup -> if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd) | nr when nr = Ptrace.Syscall.dup2 || nr = Ptrace.Syscall.dup3 -> if ok then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int p.args.(1)) path) (Hashtbl.find_opt proc.fds p.fd) | nr when nr = Ptrace.Syscall.fcntl -> if ok && (Int64.to_int p.args.(1) = Ptrace.f_dupfd || Int64.to_int p.args.(1) = Ptrace.f_dupfd_cloexec) then Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path) (Hashtbl.find_opt proc.fds p.fd) | _ -> () end let trace (command : string list) (on_event : event -> unit) : unit = let processes : (pid, proc_state) Hashtbl.t = Hashtbl.create 8 in let ensure_proc (pid : pid) : proc_state = match Hashtbl.find_opt processes pid with | Some s -> s | None -> let s = new_proc_state () in refresh_proc_cwd pid s; Hashtbl.add processes pid s; s in Ptrace.trace command (function | Ptrace.Fork { parent; child } -> let state = match Hashtbl.find_opt processes parent with | Some s -> clone_proc_state s | None -> new_proc_state () in Hashtbl.replace processes child state; on_event (Fork { parent; child }) | Ptrace.Process_exit pid -> Hashtbl.remove processes pid; on_event (Process_exit pid) | Ptrace.Syscall_enter (pid, regs) -> let proc = ensure_proc pid in handle_syscall_entry pid proc regs on_event | Ptrace.Syscall_exit (pid, regs) -> Option.iter (fun proc -> handle_syscall_exit pid proc regs on_event) (Hashtbl.find_opt processes pid) | Ptrace.Exec pid | Ptrace.Exit pid | Ptrace.Signal (pid, _) -> if not (Hashtbl.mem processes pid) then ignore (ensure_proc pid))