Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Add edge case snapshot tests

Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-02 07:39:08 -0400
Commit
515f62dae444ab501c952ab8fd8a0e86bf2e0bef
test_programs/read_git_dirty_modes.py
new file mode 100644
index 0000000..c26a573
--- /dev/null
+++ b/test_programs/read_git_dirty_modes.py
@@ -0,0 +1,7 @@
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "clean.txt").read_text()
+(testdata / "dirty.txt").read_text()
test_programs/read_peculiar_names.py
new file mode 100644
index 0000000..547ce95
--- /dev/null
+++ b/test_programs/read_peculiar_names.py
@@ -0,0 +1,12 @@
+import os
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "name with spaces.txt").read_text()
+(testdata / "name with\nnewline.txt").read_text()
+
+bytes_dir = os.fsencode(testdata)
+with open(os.path.join(bytes_dir, b"name-with-\xff-byte.txt"), "rb") as handle:
+    handle.read()
test_programs/rename_paths.py
new file mode 100644
index 0000000..0d4ab32
--- /dev/null
+++ b/test_programs/rename_paths.py
@@ -0,0 +1,6 @@
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "rename_source.txt").rename(testdata / "rename destination.txt")
tests/test_agent_snapshot.py
index fcdb58c..e6f5756 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -38,9 +38,11 @@ def pristine_testdata():
     # The snapshot policy depends on Git's clean/dirty/untracked distinctions.
     # Each test is allowed to dirty or create files under testdata, then this
     # fixture restores the committed baseline so later tests see known Git state.
+    run(["git", "reset", "--", "testdata"])
     run(["git", "checkout", "--", "testdata"])
     run(["git", "clean", "-fd", "--", "testdata"])
     yield
+    run(["git", "reset", "--", "testdata"])
     run(["git", "checkout", "--", "testdata"])
     run(["git", "clean", "-fd", "--", "testdata"])
 
@@ -55,6 +57,9 @@ class Snapshot:
 
     def file(self, path: Path):
         target = str(path.resolve())
+        return self.file_by_manifest_path(target)
+
+    def file_by_manifest_path(self, target: str):
         for item in self.manifest["files"]:
             if item["path"] == target:
                 return item
@@ -159,3 +164,74 @@ def test_restore_applies_final_state(tmp_path):
 
     assert (TESTDATA / "created_by_program.txt").read_text() == "created final\n"
     assert not (TESTDATA / "deleted_by_program.txt").exists()
+
+
+def test_rename_records_source_tombstone_and_destination_content(tmp_path):
+    # Rename is not just a write: a replay-equivalent snapshot needs to know that
+    # the source path stopped existing and that the destination path acquired the
+    # content. This catches implementations that only model the destination open.
+    (TESTDATA / "rename_source.txt").write_text("renamed payload\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/rename_paths.py")
+
+    source = snap.file(TESTDATA / "rename_source.txt")
+    assert "delete" in source["operations"]
+    assert source["before"]["exists"] is True
+    assert source["after"]["exists"] is False
+    assert source["after"]["tombstone"] is True
+
+    destination = snap.file(TESTDATA / "rename destination.txt")
+    assert "write" in destination["operations"]
+    assert destination["before"]["exists"] is False
+    assert snap.blob_text(destination["after"]["blob"]) == "renamed payload\n"
+
+
+def test_staged_and_unstaged_dirty_git_files_are_both_captured(tmp_path):
+    # Git has more than one kind of "dirty". A staged change and an unstaged
+    # worktree change are both unreconstructable from HEAD alone, so both should
+    # receive blobs even though their status bits differ.
+    staged_path = TESTDATA / "clean.txt"
+    unstaged_path = TESTDATA / "dirty.txt"
+    staged_path.write_text("staged dirty content\n")
+    run(["git", "add", "testdata/clean.txt"])
+    unstaged_path.write_text("unstaged dirty content\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/read_git_dirty_modes.py")
+
+    staged = snap.file(staged_path)
+    assert staged["git"]["tracked"] is True
+    assert staged["git"]["dirty"] is True
+    assert snap.blob_text(staged["before"]["blob"]) == "staged dirty content\n"
+
+    unstaged = snap.file(unstaged_path)
+    assert unstaged["git"]["tracked"] is True
+    assert unstaged["git"]["dirty"] is True
+    assert snap.blob_text(unstaged["before"]["blob"]) == "unstaged dirty content\n"
+
+
+def test_peculiar_file_names_are_recorded_and_blobbed(tmp_path):
+    # Path handling should not accidentally normalize away meaningful bytes or
+    # delimiters. Spaces, embedded newlines, and non-UTF-8 bytes are all legal on
+    # Linux filesystems and are exactly the kind of names that reveal whether the
+    # manifest format has a real path encoding strategy.
+    (TESTDATA / "name with spaces.txt").write_text("space payload\n")
+    (TESTDATA / "name with\nnewline.txt").write_text("newline payload\n")
+    bytes_name = b"name-with-\xff-byte.txt"
+    bytes_path = os.path.join(os.fsencode(TESTDATA), bytes_name)
+    with open(bytes_path, "wb") as handle:
+        handle.write(b"byte payload\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/read_peculiar_names.py")
+
+    spaced = snap.file(TESTDATA / "name with spaces.txt")
+    assert "read" in spaced["operations"]
+    assert snap.blob_text(spaced["before"]["blob"]) == "space payload\n"
+
+    newline = snap.file(TESTDATA / "name with\nnewline.txt")
+    assert "read" in newline["operations"]
+    assert snap.blob_text(newline["before"]["blob"]) == "newline payload\n"
+
+    byte_path_text = os.fsdecode(os.path.abspath(bytes_path))
+    unusual = snap.file_by_manifest_path(byte_path_text)
+    assert "read" in unusual["operations"]
+    assert (snap.path / "blobs" / unusual["before"]["blob"]).read_bytes() == b"byte payload\n"