Repositories / agent-snapshot.git
src/ocaml/ptrace.ml
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
type pid = int
type regs = {
syscall_nr : int;
args : int64 array;
result : int64;
}
(** [AT_FDCWD] for *at() syscalls on x86_64 Linux. *)
let at_fdcwd = -100
module Syscall = struct
let access = 21
let close = 3
let creat = 85
let dup = 32
let dup2 = 33
let dup3 = 292
let faccessat = 269
let faccessat2 = 439
let fchdir = 81
let fcntl = 72
let ftruncate = 77
let getdents = 78
let getdents64 = 217
let lstat = 6
let mkdir = 83
let mkdirat = 258
let newfstatat = 262
let open_ = 2
let openat = 257
let openat2 = 437
let readlink = 89
let readlinkat = 267
let rename = 82
let renameat = 264
let renameat2 = 316
let rmdir = 84
let stat = 4
let truncate = 76
let unlink = 87
let unlinkat = 263
let chdir = 80
end
let o_accmode = 0o3
let o_rdonly = 0
let o_wronly = 1
let o_rdwr = 2
let o_creat = 0o100
let o_trunc = 0o1000
let o_append = 0o2000
let o_directory = 0o200000
let f_dupfd = 0
let f_dupfd_cloexec = 1030
let int_arg (regs : regs) (i : int) : int = Int64.to_int regs.args.(i)
let syscall_ok (result : int64) : bool = Int64.compare result 0L >= 0
let is_write_open (flags : int) : bool =
let access = flags land o_accmode in
access = o_wronly || access = o_rdwr || flags land (o_creat lor o_trunc lor o_append) <> 0
let is_read_open (flags : int) : bool =
let access = flags land o_accmode in
access = o_rdonly || access = o_rdwr
(** Register snapshot between syscall enter and exit for path-oriented syscalls. *)
type pending_syscall = {
nr : int;
args : int64 array;
mutable path_a : string;
mutable path_b : string;
mutable dirfd : int;
mutable fd : int;
mutable flags : int;
}
external peek_word : pid -> int64 -> string = "as_peek_word"
(** Read a NUL-terminated string from tracee memory, bounded so bad pointers cannot loop forever. *)
let read_string (pid : pid) (address : int64) : string =
if Int64.equal address 0L then ""
else
let max_len = 65536 in
let word_size = Sys.word_size / 8 in
let buffer = Buffer.create 64 in
let rec loop offset =
if offset >= max_len then Buffer.contents buffer
else
match peek_word pid (Int64.add address (Int64.of_int offset)) with
| exception _ -> Buffer.contents buffer
| word ->
let rec scan i =
if i >= String.length word then loop (offset + word_size)
else
let c = word.[i] in
if Char.equal c '\000' then Buffer.contents buffer
else (
Buffer.add_char buffer c;
scan (i + 1))
in
scan 0
in
loop 0
(** Fill [pending_syscall] from registers at syscall entry. *)
let decode_syscall_entry (pid : pid) ~(resolve : dirfd:int -> string -> string) (regs : regs) : pending_syscall =
let read_arg i = read_string pid regs.args.(i) in
let p =
{ nr = regs.syscall_nr; args = regs.args; path_a = ""; path_b = ""; dirfd = at_fdcwd; fd = -1; flags = 0 }
in
begin
match p.nr with
| nr when nr = Syscall.open_ ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
p.flags <- int_arg regs 1
| nr when nr = Syscall.openat || nr = Syscall.openat2 ->
p.dirfd <- int_arg regs 0;
p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1);
p.flags <- int_arg regs 2
| nr when nr = Syscall.creat ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
p.flags <- o_creat lor o_wronly lor o_trunc
| nr when nr = Syscall.stat || nr = Syscall.lstat || nr = Syscall.access || nr = Syscall.readlink ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
| nr when nr = Syscall.newfstatat || nr = Syscall.faccessat || nr = Syscall.faccessat2 || nr = Syscall.readlinkat ->
p.dirfd <- int_arg regs 0;
p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1)
| nr when nr = Syscall.unlink || nr = Syscall.rmdir ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
| nr when nr = Syscall.unlinkat || nr = Syscall.mkdirat ->
p.dirfd <- int_arg regs 0;
p.path_a <- resolve ~dirfd:p.dirfd (read_arg 1)
| nr when nr = Syscall.mkdir || nr = Syscall.chdir || nr = Syscall.truncate ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0)
| nr when nr = Syscall.rename ->
p.path_a <- resolve ~dirfd:at_fdcwd (read_arg 0);
p.path_b <- resolve ~dirfd:at_fdcwd (read_arg 1)
| nr when nr = Syscall.renameat || nr = Syscall.renameat2 ->
p.path_a <- resolve ~dirfd:(int_arg regs 0) (read_arg 1);
p.path_b <- resolve ~dirfd:(int_arg regs 2) (read_arg 3)
| nr when nr = Syscall.getdents || nr = Syscall.getdents64 || nr = Syscall.fchdir || nr = Syscall.ftruncate ->
p.fd <- int_arg regs 0
| nr when nr = Syscall.close || nr = Syscall.dup || nr = Syscall.dup2 || nr = Syscall.dup3 || nr = Syscall.fcntl ->
p.fd <- int_arg regs 0
| _ -> ()
end;
p
type wait_stop =
| Exited of pid * int
| Signaled of pid * int
| Stopped of pid * int * int
type event =
| Syscall_enter of pid * regs
| Syscall_exit of pid * regs
| Fork of { parent : pid; child : pid }
| Exec of pid
| Exit of pid
| Signal of pid * int
| Process_exit of pid
type task = {
mutable in_syscall : bool;
mutable suppress_attach_stop : bool;
}
external fork : unit -> int = "as_fork"
external traceme : unit -> unit = "as_traceme"
external setoptions : pid -> unit = "as_setoptions"
external syscall : pid -> int -> unit = "as_syscall"
external geteventmsg : pid -> int = "as_geteventmsg"
external getregs_raw : pid -> int * int64 * int64 * int64 * int64 * int64 * int64 * int64 = "as_getregs"
external wait_raw : pid -> bool -> wait_stop = "as_wait"
external const_sigtrap_sysgood : unit -> int = "as_const_sigtrap_sysgood"
external const_sigtrap : unit -> int = "as_const_sigtrap"
external const_sigstop : unit -> int = "as_const_sigstop"
external const_event_fork : unit -> int = "as_const_event_fork"
external const_event_vfork : unit -> int = "as_const_event_vfork"
external const_event_clone : unit -> int = "as_const_event_clone"
external const_event_exec : unit -> int = "as_const_event_exec"
external const_event_exit : unit -> int = "as_const_event_exit"
(** Decode x86_64 syscall registers into the small record used by the OCaml tracer. *)
let regs (pid : pid) : regs =
let nr, a0, a1, a2, a3, a4, a5, result = getregs_raw pid in
{ syscall_nr = nr; args = [| a0; a1; a2; a3; a4; a5 |]; result }
let wait_initial (pid : pid) : wait_stop = wait_raw pid false
let wait_next () : wait_stop = wait_raw (-1) true
let resume ?(signal : int = 0) (pid : pid) : unit = syscall pid signal
(** A forked child can exit before options are applied; that race is not fatal to tracing. *)
let try_setoptions (pid : pid) : unit =
try setoptions pid with Unix.Unix_error (Unix.ESRCH, _, _) -> ()
let try_resume ?(signal : int = 0) (pid : pid) : unit =
try resume ~signal pid with Unix.Unix_error (Unix.ESRCH, _, _) -> ()
let is_fork_event (event : int) : bool =
event = const_event_fork () || event = const_event_vfork () || event = const_event_clone ()
(** PTRACE_O_TRACEFORK/CLONE stops the new child with SIGSTOP before it can
run. Non-interactive parents usually never notice, but an interactive shell
may run waitpid(..., WUNTRACED) for job control as soon as we resume the
forking parent. If that attach stop is still pending, the shell reports the
foreground command as "Stopped" and returns 128 + SIGSTOP (147 on Linux).
Drain only those attach-time SIGSTOPs while the parent remains ptrace-stopped.
Return the first non-attach stop so normal syscall/event decoding still sees
the child's real execution from the beginning. *)
let rec consume_attach_stops (pid : pid) : wait_stop option =
try_resume pid;
match wait_raw pid true with
| Stopped (stopped_pid, signal, event) when stopped_pid = pid && signal = const_sigstop () && event = 0 ->
consume_attach_stops pid
| stop -> Some stop
(** Convert raw wait stops into higher-level trace events while maintaining syscall entry/exit phase. *)
let decode_stop (tasks : (pid, task) Hashtbl.t) (stop : wait_stop) : event list =
match stop with
| Exited (pid, _) | Signaled (pid, _) ->
Hashtbl.remove tasks pid;
[ Process_exit pid ]
| Stopped (pid, signal, event) when is_fork_event event ->
let child = geteventmsg pid in
let parent_task =
match Hashtbl.find_opt tasks pid with
| Some task -> { in_syscall = task.in_syscall; suppress_attach_stop = true }
| None -> { in_syscall = false; suppress_attach_stop = true }
in
Hashtbl.replace tasks child parent_task;
try_setoptions child;
[ Fork { parent = pid; child } ]
| Stopped (pid, _signal, event) when event = const_event_exec () -> [ Exec pid ]
| Stopped (pid, _signal, event) when event = const_event_exit () -> [ Exit pid ]
| Stopped (pid, signal, _event) when signal = const_sigtrap_sysgood () -> (
let task =
match Hashtbl.find_opt tasks pid with
| Some task -> task
| None ->
let task = { in_syscall = false; suppress_attach_stop = false } in
Hashtbl.replace tasks pid task;
task
in
let regs = regs pid in
task.suppress_attach_stop <- false;
if task.in_syscall then (
task.in_syscall <- false;
[ Syscall_exit (pid, regs) ])
else (
task.in_syscall <- true;
[ Syscall_enter (pid, regs) ]))
| Stopped (pid, signal, _event) ->
let task = Hashtbl.find_opt tasks pid in
let signal =
if signal = const_sigtrap () then 0
else
match task with
| Some task when signal = const_sigstop () && task.suppress_attach_stop ->
0
| _ -> signal
in
[ Signal (pid, signal) ]
(** Run [command] under ptrace and invoke [on_event] for normalized trace events. *)
let trace (command : string list) (on_event : event -> unit) : unit =
match command with
| [] -> invalid_arg "empty command"
| argv0 :: _ ->
let child = fork () in
if child = 0 then (
traceme ();
Unix.kill (Unix.getpid ()) Sys.sigstop;
Unix.execvp argv0 (Array.of_list command))
else (
match wait_initial child with
| Stopped _ ->
setoptions child;
let tasks = Hashtbl.create 8 in
Hashtbl.add tasks child { in_syscall = false; suppress_attach_stop = false };
let pending_stops : wait_stop Queue.t = Queue.create () in
try_resume child;
let stopped_pid = function
| Exited (pid, _) | Signaled (pid, _) | Stopped (pid, _, _) -> pid
in
let resume_signal = function
| [ Signal (_, signal) ] -> signal
| _ -> 0
in
let rec handle_stop stop =
let pid = stopped_pid stop in
let events = decode_stop tasks stop in
List.iter on_event events;
List.iter
(function
| Fork { child; _ } -> Option.iter (fun stop -> Queue.add stop pending_stops) (consume_attach_stops child)
| _ -> ())
events;
while not (Queue.is_empty pending_stops) do
handle_stop (Queue.take pending_stops)
done;
if Hashtbl.mem tasks pid then try_resume ~signal:(resume_signal events) pid
in
while Hashtbl.length tasks > 0 do
match wait_next () with
| exception Unix.Unix_error (Unix.ECHILD, _, _) -> Hashtbl.clear tasks
| stop -> handle_stop stop
done
| _ -> failwith "tracee did not stop at startup")