Repositories / agent-snapshot.git
src/ocaml/ptrace_easy.ml
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
type pid = int
type open_intent = {
read : bool;
write : bool;
directory_only : bool;
}
type event =
| Pre_open of { pid : pid; path : string; intent : open_intent }
| Post_open of { pid : pid; path : string; intent : open_intent; ok : bool }
| Pre_unlink of { pid : pid; path : string }
| Post_unlink of { pid : pid; path : string; ok : bool }
| Pre_rename of { pid : pid; src : string; dst : string }
| Post_rename of { pid : pid; src : string; dst : string; ok : bool }
| Stat_like of { pid : pid; path : string }
| Read_dir of { pid : pid; path : string }
| Make_dir of { pid : pid; path : string }
| Truncate of { pid : pid; path : string }
| Fork of { parent : pid; child : pid }
| Process_exit of pid
(** Per-tracee state needed to resolve paths and pair syscall entry with exit. *)
type proc_state = {
mutable cwd : string;
fds : (int, string) Hashtbl.t;
mutable pending : Ptrace.pending_syscall option;
}
let normalize_path (path : string) : string =
if path = "" then "." else Fpath.to_string (Fpath.normalize (Fpath.v path))
let concat_path (base : string) (path : string) : string =
if path = "" then base
else Fpath.to_string (Fpath.normalize (Fpath.append (Fpath.v base) (Fpath.v path)))
let is_absolute (path : string) : bool = path <> "" && Fpath.is_abs (Fpath.v path)
let readlink_opt (path : string) : string option =
try Some (Unix.readlink path) with Unix.Unix_error _ -> None
(** Resolve a syscall path argument against per-process cwd or a directory fd. *)
let resolve_path (proc : proc_state) (dirfd : int) (path : string) : string =
if is_absolute path then normalize_path path
else
let base =
if dirfd <> Ptrace.at_fdcwd then Option.value (Hashtbl.find_opt proc.fds dirfd) ~default:proc.cwd
else proc.cwd
in
concat_path base path
(** Ask /proc where a successful fd points. The kernel knows the canonical path; reimplementing path resolution here would be duplicative and racy. *)
let refresh_proc_fd (pid : pid) (proc : proc_state) (fd : int) : unit =
match readlink_opt (Printf.sprintf "/proc/%d/fd/%d" pid fd) with
| Some target when is_absolute target -> Hashtbl.replace proc.fds fd (normalize_path target)
| _ -> ()
let refresh_proc_cwd (pid : pid) (proc : proc_state) : unit =
match readlink_opt (Printf.sprintf "/proc/%d/cwd" pid) with
| Some target -> proc.cwd <- normalize_path target
| None -> ()
let new_proc_state () : proc_state =
{ cwd = FileUtil.pwd (); fds = Hashtbl.create 8; pending = None }
let clone_proc_state (state : proc_state) : proc_state =
let fds = Hashtbl.create (Hashtbl.length state.fds) in
Hashtbl.iter (fun fd path -> Hashtbl.add fds fd path) state.fds;
{ cwd = state.cwd; fds; pending = state.pending }
let intent_of_flags (flags : int) : open_intent =
{
read = Ptrace.is_read_open flags;
write = Ptrace.is_write_open flags;
directory_only = flags land Ptrace.o_directory <> 0;
}
let is_open_syscall (nr : int) : bool =
nr = Ptrace.Syscall.open_ || nr = Ptrace.Syscall.openat || nr = Ptrace.Syscall.openat2
|| nr = Ptrace.Syscall.creat
let is_unlink_syscall (nr : int) : bool =
nr = Ptrace.Syscall.unlink || nr = Ptrace.Syscall.rmdir || nr = Ptrace.Syscall.unlinkat
let is_rename_syscall (nr : int) : bool =
nr = Ptrace.Syscall.rename || nr = Ptrace.Syscall.renameat || nr = Ptrace.Syscall.renameat2
let is_stat_like_syscall (nr : int) : bool =
nr = Ptrace.Syscall.stat || nr = Ptrace.Syscall.lstat || nr = Ptrace.Syscall.newfstatat
|| nr = Ptrace.Syscall.access || nr = Ptrace.Syscall.faccessat || nr = Ptrace.Syscall.faccessat2
|| nr = Ptrace.Syscall.readlink || nr = Ptrace.Syscall.readlinkat
(** Snapshot path-oriented arguments at entry and emit Pre_* events for syscalls about to mutate the filesystem. *)
let handle_syscall_entry (pid : pid) (proc : proc_state) (regs : Ptrace.regs)
(emit : event -> unit) : unit =
let resolve ~dirfd path = resolve_path proc dirfd path in
let p = Ptrace.decode_syscall_entry pid ~resolve regs in
begin
match p.nr with
| nr when is_open_syscall nr ->
if Ptrace.is_write_open p.flags then
emit (Pre_open { pid; path = p.path_a; intent = intent_of_flags p.flags })
| nr when is_unlink_syscall nr ->
emit (Pre_unlink { pid; path = p.path_a })
| nr when is_rename_syscall nr ->
emit (Pre_rename { pid; src = p.path_a; dst = p.path_b })
| _ -> ()
end;
proc.pending <- Some p
(** Emit Post_* / observation events at exit and update fd and cwd state. *)
let handle_syscall_exit (pid : pid) (proc : proc_state) (regs : Ptrace.regs)
(emit : event -> unit) : unit =
match proc.pending with
| None -> ()
| Some p ->
let ok = Ptrace.syscall_ok regs.Ptrace.result in
begin
match p.nr with
| nr when is_open_syscall nr ->
let intent = intent_of_flags p.flags in
emit (Post_open { pid; path = p.path_a; intent; ok });
if ok then refresh_proc_fd pid proc (Int64.to_int regs.Ptrace.result)
| nr when is_stat_like_syscall nr ->
emit (Stat_like { pid; path = p.path_a })
| nr when nr = Ptrace.Syscall.getdents || nr = Ptrace.Syscall.getdents64 ->
if ok && p.fd >= 0 then
Option.iter (fun path -> emit (Read_dir { pid; path }))
(Hashtbl.find_opt proc.fds p.fd)
| nr when is_unlink_syscall nr ->
emit (Post_unlink { pid; path = p.path_a; ok })
| nr when is_rename_syscall nr ->
emit (Post_rename { pid; src = p.path_a; dst = p.path_b; ok })
| nr when nr = Ptrace.Syscall.mkdir || nr = Ptrace.Syscall.mkdirat ->
if ok then emit (Make_dir { pid; path = p.path_a })
| nr when nr = Ptrace.Syscall.truncate ->
if ok then emit (Truncate { pid; path = p.path_a })
| nr when nr = Ptrace.Syscall.ftruncate ->
if ok && p.fd >= 0 then
Option.iter (fun path -> emit (Truncate { pid; path }))
(Hashtbl.find_opt proc.fds p.fd)
| nr when nr = Ptrace.Syscall.chdir || nr = Ptrace.Syscall.fchdir ->
if ok then refresh_proc_cwd pid proc
| nr when nr = Ptrace.Syscall.close ->
if ok then Hashtbl.remove proc.fds p.fd
| nr when nr = Ptrace.Syscall.dup ->
if ok then
Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path)
(Hashtbl.find_opt proc.fds p.fd)
| nr when nr = Ptrace.Syscall.dup2 || nr = Ptrace.Syscall.dup3 ->
if ok then
Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int p.args.(1)) path)
(Hashtbl.find_opt proc.fds p.fd)
| nr when nr = Ptrace.Syscall.fcntl ->
if ok
&& (Int64.to_int p.args.(1) = Ptrace.f_dupfd
|| Int64.to_int p.args.(1) = Ptrace.f_dupfd_cloexec)
then
Option.iter (fun path -> Hashtbl.replace proc.fds (Int64.to_int regs.Ptrace.result) path)
(Hashtbl.find_opt proc.fds p.fd)
| _ -> ()
end
let trace (command : string list) (on_event : event -> unit) : unit =
let processes : (pid, proc_state) Hashtbl.t = Hashtbl.create 8 in
let ensure_proc (pid : pid) : proc_state =
match Hashtbl.find_opt processes pid with
| Some s -> s
| None ->
let s = new_proc_state () in
refresh_proc_cwd pid s;
Hashtbl.add processes pid s;
s
in
Ptrace.trace command (function
| Ptrace.Fork { parent; child } ->
let state =
match Hashtbl.find_opt processes parent with
| Some s -> clone_proc_state s
| None -> new_proc_state ()
in
Hashtbl.replace processes child state;
on_event (Fork { parent; child })
| Ptrace.Process_exit pid ->
Hashtbl.remove processes pid;
on_event (Process_exit pid)
| Ptrace.Syscall_enter (pid, regs) ->
let proc = ensure_proc pid in
handle_syscall_entry pid proc regs on_event
| Ptrace.Syscall_exit (pid, regs) ->
Option.iter
(fun proc -> handle_syscall_exit pid proc regs on_event)
(Hashtbl.find_opt processes pid)
| Ptrace.Exec pid | Ptrace.Exit pid | Ptrace.Signal (pid, _) ->
if not (Hashtbl.mem processes pid) then ignore (ensure_proc pid))