Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Add filesystem edge case tests

Author: Arjun Guha <a.guha@northeastern.edu>
Date: 2026-05-02 07:47:51 -0400
Commit: 7681c1527041c6c27fdd4de64faee390a493888c

test_programs/read_git_dirty_modes.py

new file mode 100644
index 0000000..c26a573
--- /dev/null
+++ b/test_programs/read_git_dirty_modes.py
@@ -0,0 +1,7 @@
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "clean.txt").read_text()
+(testdata / "dirty.txt").read_text()

test_programs/read_non_utf8_filename.py

new file mode 100644
index 0000000..431fd20
--- /dev/null
+++ b/test_programs/read_non_utf8_filename.py
@@ -0,0 +1,9 @@
+import os
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+path = os.path.join(os.fsencode(testdata), b"non-utf8-\xff.txt")
+with open(path, "rb") as handle:
+    handle.read()

test_programs/read_peculiar_text_names.py

new file mode 100644
index 0000000..e5cc5b0
--- /dev/null
+++ b/test_programs/read_peculiar_text_names.py
@@ -0,0 +1,7 @@
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "name with spaces.txt").read_text()
+(testdata / "name with\nnewline.txt").read_text()

test_programs/rename_paths.py

new file mode 100644
index 0000000..0d4ab32
--- /dev/null
+++ b/test_programs/rename_paths.py
@@ -0,0 +1,6 @@
+from pathlib import Path
+
+root = Path(__file__).resolve().parents[1]
+testdata = root / "testdata"
+
+(testdata / "rename_source.txt").rename(testdata / "rename destination.txt")

tests/test_agent_snapshot.py

index fcdb58c..c4cc711 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -38,9 +38,11 @@ def pristine_testdata():
     # The snapshot policy depends on Git's clean/dirty/untracked distinctions.
     # Each test is allowed to dirty or create files under testdata, then this
     # fixture restores the committed baseline so later tests see known Git state.
+    run(["git", "reset", "--", "testdata"])
     run(["git", "checkout", "--", "testdata"])
     run(["git", "clean", "-fd", "--", "testdata"])
     yield
+    run(["git", "reset", "--", "testdata"])
     run(["git", "checkout", "--", "testdata"])
     run(["git", "clean", "-fd", "--", "testdata"])
 
@@ -159,3 +161,73 @@ def test_restore_applies_final_state(tmp_path):
 
     assert (TESTDATA / "created_by_program.txt").read_text() == "created final\n"
     assert not (TESTDATA / "deleted_by_program.txt").exists()
+
+
+def test_rename_records_source_tombstone_and_destination_content(tmp_path):
+    # Rename is not just a write: a replay-equivalent snapshot needs to know that
+    # the source path stopped existing and that the destination path acquired the
+    # content. This catches implementations that only model the destination open.
+    (TESTDATA / "rename_source.txt").write_text("renamed payload\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/rename_paths.py")
+
+    source = snap.file(TESTDATA / "rename_source.txt")
+    assert "delete" in source["operations"]
+    assert source["before"]["exists"] is True
+    assert source["after"]["exists"] is False
+    assert source["after"]["tombstone"] is True
+
+    destination = snap.file(TESTDATA / "rename destination.txt")
+    assert "write" in destination["operations"]
+    assert destination["before"]["exists"] is False
+    assert snap.blob_text(destination["after"]["blob"]) == "renamed payload\n"
+
+
+def test_staged_and_unstaged_dirty_git_files_are_both_captured(tmp_path):
+    # Git has more than one kind of "dirty". A staged change and an unstaged
+    # worktree change are both unreconstructable from HEAD alone, so both should
+    # receive blobs even though their status bits differ.
+    staged_path = TESTDATA / "clean.txt"
+    unstaged_path = TESTDATA / "dirty.txt"
+    staged_path.write_text("staged dirty content\n")
+    run(["git", "add", "testdata/clean.txt"])
+    unstaged_path.write_text("unstaged dirty content\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/read_git_dirty_modes.py")
+
+    staged = snap.file(staged_path)
+    assert staged["git"]["tracked"] is True
+    assert staged["git"]["dirty"] is True
+    assert snap.blob_text(staged["before"]["blob"]) == "staged dirty content\n"
+
+    unstaged = snap.file(unstaged_path)
+    assert unstaged["git"]["tracked"] is True
+    assert unstaged["git"]["dirty"] is True
+    assert snap.blob_text(unstaged["before"]["blob"]) == "unstaged dirty content\n"
+
+
+def test_text_peculiar_file_names_are_recorded_and_blobbed(tmp_path):
+    # Spaces and embedded newlines are valid UTF-8 paths and should work with the
+    # current manifest design. The raw non-UTF-8 byte case is kept separate below
+    # because nlohmann/json rejects invalid UTF-8 in JSON strings.
+    (TESTDATA / "name with spaces.txt").write_text("space payload\n")
+    (TESTDATA / "name with\nnewline.txt").write_text("newline payload\n")
+
+    snap = capture(tmp_path, PYTHON, "test_programs/read_peculiar_text_names.py")
+
+    spaced = snap.file(TESTDATA / "name with spaces.txt")
+    assert "read" in spaced["operations"]
+    assert snap.blob_text(spaced["before"]["blob"]) == "space payload\n"
+
+    newline = snap.file(TESTDATA / "name with\nnewline.txt")
+    assert "read" in newline["operations"]
+    assert snap.blob_text(newline["before"]["blob"]) == "newline payload\n"
+
+
+@pytest.mark.skip(reason="nlohmann/json rejects non-UTF-8 std::string values when dumping JSON")
+def test_non_utf8_filename_exposes_json_string_limitation(tmp_path):
+    bytes_path = os.path.join(os.fsencode(TESTDATA), b"non-utf8-\xff.txt")
+    with open(bytes_path, "wb") as handle:
+        handle.write(b"non utf8 payload\n")
+
+    capture(tmp_path, PYTHON, "test_programs/read_non_utf8_filename.py")