Repositories / agent-snapshot.git
agent-snapshot.git
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
@@ -29,7 +29,7 @@ At a high level, the snapshot includes: The snapshot is intentionally compact. Clean Git-tracked files that are only read are represented by Git metadata instead of copied into the snapshot. Files owned by another user and not writable by the current user are treated as part of the -external system environment and are not copied. +external system environment and are not recorded. There are important exceptions:
@@ -34,6 +34,8 @@ type file_record = { mutable after : metadata; mutable before_git : git_info; mutable after_git : git_info; + mutable before_external_system : bool; + mutable after_external_system : bool; mutable before_recorded : bool; } @@ -511,13 +513,28 @@ let should_capture_content (path : string) (meta : metadata) (git : git_info) : let record_observation (raw_path : string) (operation : string) : unit = if raw_path <> "" then let path = best_effort_canonical raw_path in - if (not (is_ignored_path path)) && (not (path_has_non_directory_prefix path)) && not (path_is_special_file path) then ( + if + (not (is_ignored_path path)) + && (not (path_has_non_directory_prefix path)) + && (not (path_is_special_file path)) + && not (owned_by_other_and_not_writable path) + then ( let recd = match Hashtbl.find_opt files path with | Some recd -> recd | None -> let recd = - { path; operations = Hashtbl.create 5; before = empty_metadata (); after = empty_metadata (); before_git = empty_git (); after_git = empty_git (); before_recorded = false } + { + path; + operations = Hashtbl.create 5; + before = empty_metadata (); + after = empty_metadata (); + before_git = empty_git (); + after_git = empty_git (); + before_external_system = false; + after_external_system = false; + before_recorded = false; + } in Hashtbl.add files path recd; recd @@ -526,6 +543,7 @@ let record_observation (raw_path : string) (operation : string) : unit = if not recd.before_recorded then ( recd.before_recorded <- true; recd.before <- Option.value (stat_metadata path) ~default:(empty_metadata ()); + recd.before_external_system <- recd.before.exists && owned_by_other_and_not_writable path; recd.before_git <- classify_git path; if should_capture_content path recd.before recd.before_git then recd.before.blob <- store_blob "before" path recd.before)) @@ -536,6 +554,7 @@ let finalize_records () : unit = if not (is_ignored_path recd.path) then ( recd.after <- Option.value (stat_metadata recd.path) ~default:(empty_metadata ()); if not recd.after.exists then recd.after.tombstone <- Hashtbl.mem recd.operations "delete"; + recd.after_external_system <- recd.after.exists && owned_by_other_and_not_writable recd.path; recd.after_git <- classify_git recd.path; let written_regular = Hashtbl.mem recd.operations "write" && recd.after.exists && recd.after.regular in if (written_regular && not (owned_by_other_and_not_writable recd.path)) || should_capture_content recd.path recd.after recd.after_git then @@ -593,6 +612,8 @@ let record_is_transient_mutation (recd : file_record) : bool = let record_should_be_manifested (recd : file_record) : bool = (not (metadata_is_special recd.before)) && (not (metadata_is_special recd.after)) + && (not recd.before_external_system) + && (not recd.after_external_system) && not (record_is_transient_mutation recd) let write_manifest (out : string) (command : string list) (exit_status : int) : unit =
@@ -319,18 +319,16 @@ def test_fork_usr_and_directory_traversal(tmp_path): # ptrace must follow the process tree, not just the initial pid. The helper # forks and writes from the child; missing that write means fork/clone events # are not being attached early enough. The same helper reads /usr/bin/env to - # assert that root-owned, non-writable system files are observed but not - # blobbed, and iterates testdata to verify directory traversal is recorded. + # assert that root-owned, non-writable system files are excluded, and iterates + # testdata to verify directory traversal is recorded. snap = capture(tmp_path, PYTHON, "test_programs/fork_and_usr.py") child = snap.file(WORKTREE / "child_output.txt") assert "write" in child["operations"] assert snap.blob_text(child["after"]["blob"]) == "child final\n" - usr_env = snap.file(Path("/usr/bin/env")) - assert "read" in usr_env["operations"] - assert usr_env["before"].get("blob") is None - assert usr_env["after"].get("blob") is None + manifest_paths = {item["path"] for item in snap.manifest["files"]} + assert str(Path("/usr/bin/env").resolve()) not in manifest_paths directory = snap.file(WORKTREE) assert "directory" in directory["operations"]