Repositories / agent-snapshot.git
src/ocaml/ptrace.mli
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
(** Linux ptrace support for following a command and decoding filesystem-related
syscalls.
This module is intentionally small and Linux/x86_64-specific. It owns the raw
ptrace interaction, normalizes wait stops into trace events, and exposes just
enough syscall decoding helpers for the snapshot layer to classify filesystem
observations. *)
(** Operating-system process identifier. *)
type pid = int
(** Register state at a syscall stop.
The fields are decoded from the x86_64 Linux syscall ABI:
- {!syscall_nr} is the syscall number.
- {!args} contains the six syscall arguments in order.
- {!result} is meaningful on syscall-exit stops and contains the return
value, including negative errno values. *)
type regs = {
syscall_nr : int;
args : int64 array;
result : int64;
}
(** The special directory file descriptor used by *at() syscalls to mean "the
current working directory". This is Linux's [AT_FDCWD]. *)
val at_fdcwd : int
(** Syscall numbers used by the snapshot tracer.
The values are for Linux on x86_64. They are exposed so callers can pattern
match decoded syscall records without duplicating architecture constants. *)
module Syscall : sig
val access : int
val close : int
val creat : int
val dup : int
val dup2 : int
val dup3 : int
val faccessat : int
val faccessat2 : int
val fchdir : int
val fcntl : int
val ftruncate : int
val getdents : int
val getdents64 : int
val lstat : int
val mkdir : int
val mkdirat : int
val newfstatat : int
val open_ : int
val openat : int
val openat2 : int
val readlink : int
val readlinkat : int
val rename : int
val renameat : int
val renameat2 : int
val rmdir : int
val stat : int
val truncate : int
val unlink : int
val unlinkat : int
val chdir : int
end
(** Mask selecting the access-mode bits from Linux [open] flags. *)
val o_accmode : int
(** Linux [O_RDONLY]. *)
val o_rdonly : int
(** Linux [O_WRONLY]. *)
val o_wronly : int
(** Linux [O_RDWR]. *)
val o_rdwr : int
(** Linux [O_CREAT]. *)
val o_creat : int
(** Linux [O_TRUNC]. *)
val o_trunc : int
(** Linux [O_APPEND]. *)
val o_append : int
(** Linux [O_DIRECTORY]. *)
val o_directory : int
(** Linux [F_DUPFD] command for [fcntl]. *)
val f_dupfd : int
(** Linux [F_DUPFD_CLOEXEC] command for [fcntl]. *)
val f_dupfd_cloexec : int
(** [syscall_ok result] is [true] when [result] is a non-negative Linux syscall
return value. Negative values represent [-errno]. *)
val syscall_ok : int64 -> bool
(** [is_write_open flags] is [true] when Linux [open]-style [flags] may create,
truncate, append to, or otherwise open a file for writing. *)
val is_write_open : int -> bool
(** [is_read_open flags] is [true] when Linux [open]-style [flags] open a file
for reading. *)
val is_read_open : int -> bool
(** Decoded syscall-entry state for path-oriented syscalls.
The tracer captures this at syscall entry so the snapshot layer can still
interpret the operation after the kernel has completed it. Fields that do
not apply to the syscall keep neutral defaults: empty paths, {!at_fdcwd},
[-1] file descriptors, or zero flags. *)
type pending_syscall = {
nr : int;
args : int64 array;
mutable path_a : string;
mutable path_b : string;
mutable dirfd : int;
mutable fd : int;
mutable flags : int;
}
(** [decode_syscall_entry pid ~resolve regs] decodes the syscall-entry
registers for filesystem operations known to the tracer.
Call this when {!trace} emits [Syscall_enter (pid, regs)]. The returned
{!pending_syscall} is the syscall-entry snapshot that a caller usually stores
in per-process state until the matching [Syscall_exit (pid, regs)] arrives.
This is necessary because pointer arguments, relative paths, and file
descriptor context should be captured before the tracee continues, while the
syscall result is only available at exit.
A typical use looks like:
{[
Ptrace.trace command (function
| Ptrace.Syscall_enter (pid, regs) ->
let resolve ~dirfd path = resolve_path_for_process pid ~dirfd path in
let pending = Ptrace.decode_syscall_entry pid ~resolve regs in
remember_pending_syscall pid pending
| Ptrace.Syscall_exit (pid, regs) ->
let pending = take_pending_syscall pid in
if Ptrace.syscall_ok regs.Ptrace.result then
handle_completed_syscall pending regs
| _ -> ())
]}
The [pid] argument identifies the tracee whose memory contains any string
pointer arguments. The decoder uses {!read_string} internally to read those
arguments before the tracee resumes.
[resolve ~dirfd path] converts a path argument into the caller's canonical
path representation. For absolute paths it can usually return [path]
unchanged. For relative paths it should resolve against either the tracee's
current working directory when [dirfd = {!at_fdcwd}], or the directory path
associated with [dirfd] for *at() syscalls such as [openat] and [renameat].
It is a callback because ptrace only exposes the raw integer file descriptor;
the higher-level caller must maintain cwd and fd-to-path state.
Unknown syscalls produce a record whose {!pending_syscall.nr} and
{!pending_syscall.args} are populated and whose derived fields keep their
defaults. *)
val decode_syscall_entry :
pid -> resolve:(dirfd:int -> string -> string) -> regs -> pending_syscall
(** Trace events emitted by {!trace}.
Syscall events come in enter/exit pairs per process unless the tracee exits
or is interrupted mid-syscall. [Fork] covers fork, vfork, and clone events
that create a traceable child. [Process_exit] means the process has left the
traced task set. *)
type event =
| Syscall_enter of pid * regs
| Syscall_exit of pid * regs
| Fork of { parent : pid; child : pid }
| Exec of pid
| Exit of pid
| Signal of pid * int
| Process_exit of pid
(** [read_string pid address] reads a NUL-terminated string from tracee memory.
A null pointer, unreadable memory, or the configured maximum length ends the
read and returns the bytes collected so far. *)
val read_string : pid -> int64 -> string
(** [trace command on_event] runs [command] under ptrace and calls [on_event] for
each normalized event until all traced processes exit.
[command] must contain the executable name followed by its arguments. Raises
[Invalid_argument] for an empty command and may raise [Unix.Unix_error] for
ptrace, wait, fork, or exec failures. *)
val trace : string list -> (event -> unit) -> unit