Repositories / agent-snapshot.git
agent-snapshot.git
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
@@ -581,6 +581,20 @@ let manifest_git_of_git_info (git : git_info) : Manifest_json.git = let manifest_repo_of_repo_record (repo : repo_record) : Manifest_json.repo = { root = utf8_string repo.root; head = utf8_string repo.head; dirty = repo.dirty } +let operation_was_recorded (recd : file_record) (operation : string) : bool = + Hashtbl.mem recd.operations operation + +let record_has_mutation (recd : file_record) : bool = + operation_was_recorded recd "write" || operation_was_recorded recd "delete" + +let record_is_transient_mutation (recd : file_record) : bool = + (not recd.before.exists) && (not recd.after.exists) && record_has_mutation recd + +let record_should_be_manifested (recd : file_record) : bool = + (not (metadata_is_special recd.before)) + && (not (metadata_is_special recd.after)) + && not (record_is_transient_mutation recd) + let write_manifest (out : string) (command : string list) (exit_status : int) : unit = let git_repositories = Hashtbl.fold (fun _ (repo : repo_record) acc -> manifest_repo_of_repo_record repo :: acc) repos [] @@ -589,7 +603,7 @@ let write_manifest (out : string) (command : string list) (exit_status : int) : let manifest_files = Hashtbl.fold (fun _ recd acc -> - if metadata_is_special recd.before || metadata_is_special recd.after then acc + if not (record_should_be_manifested recd) then acc else let operations = Hashtbl.fold (fun op () acc -> op :: acc) recd.operations [] |> List.sort String.compare |> List.map utf8_string @@ -620,19 +634,15 @@ let write_manifest (out : string) (command : string list) (exit_status : int) : in Json.to_file ~std:true (concat_path out "manifest.json") (Manifest_json.to_yojson manifest) -let operation_was_recorded (recd : file_record) (operation : string) : bool = - Hashtbl.mem recd.operations operation - let print_snapshot_summary () : unit = let updated_files = ref 0 in let uncommitted_read_files = ref 0 in Hashtbl.iter (fun _ recd -> - if - (operation_was_recorded recd "write" || operation_was_recorded recd "delete") - && (Option.is_some recd.after.blob || recd.after.tombstone) + if record_should_be_manifested recd && record_has_mutation recd && (Option.is_some recd.after.blob || recd.after.tombstone) then incr updated_files; - if operation_was_recorded recd "read" && Option.is_some recd.before.blob then incr uncommitted_read_files) + if record_should_be_manifested recd && operation_was_recorded recd "read" && Option.is_some recd.before.blob then + incr uncommitted_read_files) files; Printf.eprintf "Snapshot directory: %s\nWorked in %d repositories. Saved %d updated files. Saved %d read files in the snapshot that were not committed.\n%!"
@@ -0,0 +1,9 @@ +import os +from pathlib import Path + +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) +transient = testdata / "transient_runtime.txt" + +transient.write_text("temporary payload\n") +assert transient.read_text() == "temporary payload\n" +transient.unlink()
@@ -303,6 +303,18 @@ def test_dirty_untracked_created_and_deleted_files_are_captured(tmp_path): assert deleted["after"]["tombstone"] is True +def test_created_then_deleted_file_is_not_manifested_or_blobbed(tmp_path): + transient = WORKTREE / "transient_runtime.txt" + + snap = capture(tmp_path, PYTHON, "test_programs/create_read_delete_transient.py") + manifest_paths = {item["path"] for item in snap.manifest["files"]} + blob_keys = set(snap._blobs_frame()["key"]) + + assert not transient.exists() + assert str(transient.resolve()) not in manifest_paths + assert not any(str(transient.resolve()) in key for key in blob_keys) + + def test_fork_usr_and_directory_traversal(tmp_path): # ptrace must follow the process tree, not just the initial pid. The helper # forks and writes from the child; missing that write means fork/clone events