Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Use disposable git repo in tests

Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-02 10:40:45 -0400
Commit
fb8cd3cb5103feb1ec17dacc06e9be12ac417e28
test_programs/create_and_commit_file.py
new file mode 100644
index 0000000..2b4a2e9
--- /dev/null
+++ b/test_programs/create_and_commit_file.py
@@ -0,0 +1,10 @@
+import os
+import subprocess
+from pathlib import Path
+
+repo = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
+path = repo / "committed_by_program.txt"
+
+path.write_text("created and committed by traced program\n")
+subprocess.run(["git", "add", "committed_by_program.txt"], cwd=repo, check=True)
+subprocess.run(["git", "commit", "-m", "Add traced file"], cwd=repo, check=True)
test_programs/dirty_untracked_write.py
index 9600648..13c4e39 100644
--- a/test_programs/dirty_untracked_write.py
+++ b/test_programs/dirty_untracked_write.py
@@ -1,7 +1,7 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 (testdata / "dirty.txt").read_text()
 (testdata / "untracked_runtime.txt").read_text()
test_programs/fork_and_usr.py
index 41c397b..1d6f822 100644
--- a/test_programs/fork_and_usr.py
+++ b/test_programs/fork_and_usr.py
@@ -1,8 +1,7 @@
 import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 pid = os.fork()
 if pid == 0:
test_programs/read_clean.py
index 2310ff0..ce0d0f4 100644
--- a/test_programs/read_clean.py
+++ b/test_programs/read_clean.py
@@ -1,4 +1,4 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-print((root / "testdata" / "clean.txt").read_text())
+print((Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) / "clean.txt").read_text())
test_programs/read_git_dirty_modes.py
index c26a573..92802d5 100644
--- a/test_programs/read_git_dirty_modes.py
+++ b/test_programs/read_git_dirty_modes.py
@@ -1,7 +1,7 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 (testdata / "clean.txt").read_text()
 (testdata / "dirty.txt").read_text()
test_programs/read_ignored_paths.py
index db2f5d0..a78d26f 100644
--- a/test_programs/read_ignored_paths.py
+++ b/test_programs/read_ignored_paths.py
@@ -1,8 +1,7 @@
 import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 config_home = Path(os.environ.get("XDG_CONFIG_HOME", Path(os.environ["HOME"]) / ".config"))
 ignore_config = config_home / "agent-snapshot" / "ignore.json"
 
test_programs/read_non_utf8_filename.py
index 431fd20..fc2e08a 100644
--- a/test_programs/read_non_utf8_filename.py
+++ b/test_programs/read_non_utf8_filename.py
@@ -1,8 +1,7 @@
 import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 path = os.path.join(os.fsencode(testdata), b"non-utf8-\xff.txt")
 with open(path, "rb") as handle:
test_programs/read_peculiar_text_names.py
index e5cc5b0..7661be8 100644
--- a/test_programs/read_peculiar_text_names.py
+++ b/test_programs/read_peculiar_text_names.py
@@ -1,7 +1,7 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 (testdata / "name with spaces.txt").read_text()
 (testdata / "name with\nnewline.txt").read_text()
test_programs/rename_paths.py
index 0d4ab32..74d4bec 100644
--- a/test_programs/rename_paths.py
+++ b/test_programs/rename_paths.py
@@ -1,6 +1,6 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-testdata = root / "testdata"
+testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 (testdata / "rename_source.txt").rename(testdata / "rename destination.txt")
test_programs/rewrite_clean_tracked.py
index dabe208..7037bbe 100644
--- a/test_programs/rewrite_clean_tracked.py
+++ b/test_programs/rewrite_clean_tracked.py
@@ -1,6 +1,6 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
-path = root / "testdata" / "clean.txt"
+path = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) / "clean.txt"
 
 path.write_text(path.read_text())
test_programs/write_git_internal.py
index b174018..29cd347 100644
--- a/test_programs/write_git_internal.py
+++ b/test_programs/write_git_internal.py
@@ -1,5 +1,6 @@
+import os
 from pathlib import Path
 
-root = Path(__file__).resolve().parents[1]
+root = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
 
 (root / ".git" / "delete_me").write_text("internal git scratch\n")
tests/test_agent_snapshot.py
index e6419a0..a4de765 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -11,6 +11,7 @@ ROOT = Path(__file__).resolve().parents[1]
 BUILD = ROOT / "build" / "pytest"
 BIN = BUILD / "agent-snapshot"
 TESTDATA = ROOT / "testdata"
+WORKTREE = TESTDATA / "runtime_repo"
 # Use the system Python rather than uv's managed interpreter. The snapshotter
 # intentionally observes interpreter and loader activity too, and a uv-managed
 # Python in the user's home directory can be writable by the current user. That
@@ -34,25 +35,30 @@ def build_agent_snapshot():
 
 
 @pytest.fixture(autouse=True)
-def pristine_testdata():
-    # The snapshot policy depends on Git's clean/dirty/untracked distinctions.
-    # Each test is allowed to dirty or create files under testdata, then this
-    # fixture restores the committed baseline so later tests see known Git state.
-    run(["git", "reset", "--", "testdata"])
-    run(["git", "checkout", "--", "testdata"])
-    run(["git", "clean", "-fd", "--", "testdata"])
-    yield
-    run(["git", "reset", "--", "testdata"])
-    run(["git", "checkout", "--", "testdata"])
-    run(["git", "clean", "-fd", "--", "testdata"])
-
-
-@pytest.fixture(autouse=True)
-def pristine_git_internal_file():
-    internal = ROOT / ".git" / "delete_me"
-    internal.unlink(missing_ok=True)
+def runtime_git_repo(monkeypatch):
+    # Most snapshot policy depends on Git state, but tests should not mutate the
+    # project checkout's own index or .git directory. Each test gets a disposable
+    # repository under testdata and the helper programs operate there via env.
+    shutil.rmtree(WORKTREE, ignore_errors=True)
+    WORKTREE.mkdir(parents=True)
+    (WORKTREE / "clean.txt").write_text("clean tracked fixture\nline two\n")
+    (WORKTREE / "dirty.txt").write_text("dirty tracked fixture original\n")
+    (WORKTREE / "nested").mkdir()
+    (WORKTREE / "nested" / "info.txt").write_text("nested tracked fixture\n")
+    subprocess.run(["git", "init"], cwd=WORKTREE, text=True, check=True, capture_output=True)
+    subprocess.run(["git", "config", "user.email", "tests@example.invalid"], cwd=WORKTREE, check=True)
+    subprocess.run(["git", "config", "user.name", "Agent Snapshot Tests"], cwd=WORKTREE, check=True)
+    subprocess.run(["git", "add", "."], cwd=WORKTREE, check=True)
+    subprocess.run(
+        ["git", "commit", "-m", "Initial fixture"],
+        cwd=WORKTREE,
+        text=True,
+        check=True,
+        capture_output=True,
+    )
+    monkeypatch.setenv("AGENT_SNAPSHOT_TEST_REPO", str(WORKTREE))
     yield
-    internal.unlink(missing_ok=True)
+    shutil.rmtree(WORKTREE, ignore_errors=True)
 
 
 @pytest.fixture(autouse=True)
@@ -107,8 +113,8 @@ def test_missing_ignore_config_aborts_at_startup(tmp_path, ignore_config):
 
 
 def test_ignore_config_suppresses_files_directories_and_itself(tmp_path, ignore_config):
-    ignored_file = TESTDATA / "ignored_file.txt"
-    ignored_dir = TESTDATA / "ignored_dir"
+    ignored_file = WORKTREE / "ignored_file.txt"
+    ignored_dir = WORKTREE / "ignored_dir"
     ignored_file.write_text("ignored file payload\n")
     ignored_dir.mkdir()
     (ignored_dir / "nested.txt").write_text("ignored nested payload\n")
@@ -117,19 +123,19 @@ def test_ignore_config_suppresses_files_directories_and_itself(tmp_path, ignore_
     snap = capture(tmp_path, PYTHON, "test_programs/read_ignored_paths.py")
     manifest_paths = {item["path"] for item in snap.manifest["files"]}
 
-    assert str((TESTDATA / "clean.txt").resolve()) in manifest_paths
+    assert str((WORKTREE / "clean.txt").resolve()) in manifest_paths
     assert str(ignored_file.resolve()) not in manifest_paths
     assert str((ignored_dir / "nested.txt").resolve()) not in manifest_paths
     assert str(ignore_config.resolve()) not in manifest_paths
 
 
 def test_ignore_config_expands_home_prefix(tmp_path, ignore_config):
-    ignored_file = TESTDATA / "ignored_file.txt"
-    ignored_dir = TESTDATA / "ignored_dir"
+    ignored_file = WORKTREE / "ignored_file.txt"
+    ignored_dir = WORKTREE / "ignored_dir"
     ignored_file.write_text("ignored file payload\n")
     ignored_dir.mkdir()
     (ignored_dir / "nested.txt").write_text("nested payload\n")
-    ignore_config.write_text(json.dumps(["$HOME/testdata/ignored_file.txt"]) + "\n")
+    ignore_config.write_text(json.dumps(["$HOME/testdata/runtime_repo/ignored_file.txt"]) + "\n")
 
     snap = capture(tmp_path, PYTHON, "test_programs/read_ignored_paths.py")
     manifest_paths = {item["path"] for item in snap.manifest["files"]}
@@ -140,8 +146,8 @@ def test_ignore_config_expands_home_prefix(tmp_path, ignore_config):
 def test_ignore_config_expands_xdg_config_home_fallback(tmp_path, ignore_config):
     home = tmp_path / "home"
     config_path = home / ".config" / "agent-snapshot" / "ignore.json"
-    ignored_file = TESTDATA / "ignored_file.txt"
-    ignored_dir = TESTDATA / "ignored_dir"
+    ignored_file = WORKTREE / "ignored_file.txt"
+    ignored_dir = WORKTREE / "ignored_dir"
     ignored_file.write_text("ignored file payload\n")
     ignored_dir.mkdir()
     (ignored_dir / "nested.txt").write_text("nested payload\n")
@@ -170,31 +176,42 @@ def test_clean_git_tracked_read_records_repo_without_blob(tmp_path):
     # reads testdata/clean.txt, but the snapshot should rely on Git repository
     # root + HEAD + relative path instead of copying file contents into blobs.
     snap = capture(tmp_path, PYTHON, "test_programs/read_clean.py")
-    clean = snap.file(TESTDATA / "clean.txt")
+    clean = snap.file(WORKTREE / "clean.txt")
 
     assert "read" in clean["operations"]
     assert clean["git"]["tracked"] is True
     assert clean["git"]["dirty"] is False
     assert clean["before"].get("blob") is None
     assert clean["after"].get("blob") is None
-    assert any(repo["root"] == str(ROOT.resolve()) for repo in snap.manifest["git_repositories"])
+    assert any(repo["root"] == str(WORKTREE.resolve()) for repo in snap.manifest["git_repositories"])
 
 
 def test_git_internal_directory_writes_are_ignored(tmp_path):
     snap = capture(tmp_path, PYTHON, "test_programs/write_git_internal.py")
     manifest_paths = {item["path"] for item in snap.manifest["files"]}
 
-    assert str((ROOT / ".git" / "delete_me").resolve()) not in manifest_paths
+    assert str((WORKTREE / ".git" / "delete_me").resolve()) not in manifest_paths
 
 
 def test_written_clean_git_tracked_file_gets_after_blob(tmp_path):
     snap = capture(tmp_path, PYTHON, "test_programs/rewrite_clean_tracked.py")
-    clean = snap.file(TESTDATA / "clean.txt")
+    clean = snap.file(WORKTREE / "clean.txt")
 
     assert "write" in clean["operations"]
     assert clean["git"]["tracked"] is True
     assert clean["git"]["dirty"] is False
-    assert snap.blob_text(clean["after"]["blob"]) == (TESTDATA / "clean.txt").read_text()
+    assert snap.blob_text(clean["after"]["blob"]) == (WORKTREE / "clean.txt").read_text()
+
+
+def test_file_created_and_committed_by_tracee_still_gets_after_blob(tmp_path):
+    snap = capture(tmp_path, PYTHON, "test_programs/create_and_commit_file.py")
+    created = snap.file(WORKTREE / "committed_by_program.txt")
+
+    assert "write" in created["operations"]
+    assert created["git"]["tracked"] is True
+    assert created["git"]["dirty"] is False
+    assert snap.blob_text(created["after"]["blob"]) == "created and committed by traced program\n"
+    assert not any("/.git/" in item["path"] for item in snap.manifest["files"])
 
 
 def test_dirty_untracked_created_and_deleted_files_are_captured(tmp_path):
@@ -203,27 +220,27 @@ def test_dirty_untracked_created_and_deleted_files_are_captured(tmp_path):
     # - untracked files have no commit object to reconstruct from
     # - created files need before=false and after content
     # - deleted files need a tombstone so restore can reproduce non-existence
-    (TESTDATA / "dirty.txt").write_text("dirty tracked fixture changed before run\n")
-    (TESTDATA / "untracked_runtime.txt").write_text("untracked input\n")
-    (TESTDATA / "deleted_by_program.txt").write_text("delete me\n")
+    (WORKTREE / "dirty.txt").write_text("dirty tracked fixture changed before run\n")
+    (WORKTREE / "untracked_runtime.txt").write_text("untracked input\n")
+    (WORKTREE / "deleted_by_program.txt").write_text("delete me\n")
 
     snap = capture(tmp_path, PYTHON, "test_programs/dirty_untracked_write.py")
 
-    dirty = snap.file(TESTDATA / "dirty.txt")
+    dirty = snap.file(WORKTREE / "dirty.txt")
     assert dirty["git"]["tracked"] is True
     assert dirty["git"]["dirty"] is True
     assert snap.blob_text(dirty["before"]["blob"]) == "dirty tracked fixture changed before run\n"
 
-    untracked = snap.file(TESTDATA / "untracked_runtime.txt")
+    untracked = snap.file(WORKTREE / "untracked_runtime.txt")
     assert untracked["git"]["tracked"] is False
     assert snap.blob_text(untracked["before"]["blob"]) == "untracked input\n"
 
-    created = snap.file(TESTDATA / "created_by_program.txt")
+    created = snap.file(WORKTREE / "created_by_program.txt")
     assert "write" in created["operations"]
     assert created["before"]["exists"] is False
     assert snap.blob_text(created["after"]["blob"]) == "created final\n"
 
-    deleted = snap.file(TESTDATA / "deleted_by_program.txt")
+    deleted = snap.file(WORKTREE / "deleted_by_program.txt")
     assert "delete" in deleted["operations"]
     assert deleted["after"]["exists"] is False
     assert deleted["after"]["tombstone"] is True
@@ -237,7 +254,7 @@ def test_fork_usr_and_directory_traversal(tmp_path):
     # blobbed, and iterates testdata to verify directory traversal is recorded.
     snap = capture(tmp_path, PYTHON, "test_programs/fork_and_usr.py")
 
-    child = snap.file(TESTDATA / "child_output.txt")
+    child = snap.file(WORKTREE / "child_output.txt")
     assert "write" in child["operations"]
     assert snap.blob_text(child["after"]["blob"]) == "child final\n"
 
@@ -246,7 +263,7 @@ def test_fork_usr_and_directory_traversal(tmp_path):
     assert usr_env["before"].get("blob") is None
     assert usr_env["after"].get("blob") is None
 
-    directory = snap.file(TESTDATA)
+    directory = snap.file(WORKTREE)
     assert "directory" in directory["operations"]
 
 
@@ -255,37 +272,37 @@ def test_restore_applies_final_state(tmp_path):
     # immediately after capture. That proves the bundle contains enough payload
     # to recreate final captured files and enough tombstone information to remove
     # files that should not exist after the traced command.
-    (TESTDATA / "dirty.txt").write_text("changed before capture\n")
-    (TESTDATA / "untracked_runtime.txt").write_text("untracked input\n")
-    (TESTDATA / "deleted_by_program.txt").write_text("delete me\n")
+    (WORKTREE / "dirty.txt").write_text("changed before capture\n")
+    (WORKTREE / "untracked_runtime.txt").write_text("untracked input\n")
+    (WORKTREE / "deleted_by_program.txt").write_text("delete me\n")
     snap = capture(tmp_path, PYTHON, "test_programs/dirty_untracked_write.py")
 
-    shutil.rmtree(TESTDATA)
-    TESTDATA.mkdir()
-    (TESTDATA / "created_by_program.txt").write_text("wrong\n")
-    (TESTDATA / "deleted_by_program.txt").write_text("should disappear\n")
+    shutil.rmtree(WORKTREE)
+    WORKTREE.mkdir()
+    (WORKTREE / "created_by_program.txt").write_text("wrong\n")
+    (WORKTREE / "deleted_by_program.txt").write_text("should disappear\n")
 
     run([str(BIN), "restore", str(snap.path)])
 
-    assert (TESTDATA / "created_by_program.txt").read_text() == "created final\n"
-    assert not (TESTDATA / "deleted_by_program.txt").exists()
+    assert (WORKTREE / "created_by_program.txt").read_text() == "created final\n"
+    assert not (WORKTREE / "deleted_by_program.txt").exists()
 
 
 def test_rename_records_source_tombstone_and_destination_content(tmp_path):
     # Rename is not just a write: a replay-equivalent snapshot needs to know that
     # the source path stopped existing and that the destination path acquired the
     # content. This catches implementations that only model the destination open.
-    (TESTDATA / "rename_source.txt").write_text("renamed payload\n")
+    (WORKTREE / "rename_source.txt").write_text("renamed payload\n")
 
     snap = capture(tmp_path, PYTHON, "test_programs/rename_paths.py")
 
-    source = snap.file(TESTDATA / "rename_source.txt")
+    source = snap.file(WORKTREE / "rename_source.txt")
     assert "delete" in source["operations"]
     assert source["before"]["exists"] is True
     assert source["after"]["exists"] is False
     assert source["after"]["tombstone"] is True
 
-    destination = snap.file(TESTDATA / "rename destination.txt")
+    destination = snap.file(WORKTREE / "rename destination.txt")
     assert "write" in destination["operations"]
     assert destination["before"]["exists"] is False
     assert snap.blob_text(destination["after"]["blob"]) == "renamed payload\n"
@@ -295,10 +312,10 @@ def test_staged_and_unstaged_dirty_git_files_are_both_captured(tmp_path):
     # Git has more than one kind of "dirty". A staged change and an unstaged
     # worktree change are both unreconstructable from HEAD alone, so both should
     # receive blobs even though their status bits differ.
-    staged_path = TESTDATA / "clean.txt"
-    unstaged_path = TESTDATA / "dirty.txt"
+    staged_path = WORKTREE / "clean.txt"
+    unstaged_path = WORKTREE / "dirty.txt"
     staged_path.write_text("staged dirty content\n")
-    run(["git", "add", "testdata/clean.txt"])
+    subprocess.run(["git", "add", "clean.txt"], cwd=WORKTREE, check=True)
     unstaged_path.write_text("unstaged dirty content\n")
 
     snap = capture(tmp_path, PYTHON, "test_programs/read_git_dirty_modes.py")
@@ -318,23 +335,23 @@ def test_text_peculiar_file_names_are_recorded_and_blobbed(tmp_path):
     # Spaces and embedded newlines are valid UTF-8 paths and should work with the
     # current manifest design. The raw non-UTF-8 byte case is kept separate below
     # because nlohmann/json rejects invalid UTF-8 in JSON strings.
-    (TESTDATA / "name with spaces.txt").write_text("space payload\n")
-    (TESTDATA / "name with\nnewline.txt").write_text("newline payload\n")
+    (WORKTREE / "name with spaces.txt").write_text("space payload\n")
+    (WORKTREE / "name with\nnewline.txt").write_text("newline payload\n")
 
     snap = capture(tmp_path, PYTHON, "test_programs/read_peculiar_text_names.py")
 
-    spaced = snap.file(TESTDATA / "name with spaces.txt")
+    spaced = snap.file(WORKTREE / "name with spaces.txt")
     assert "read" in spaced["operations"]
     assert snap.blob_text(spaced["before"]["blob"]) == "space payload\n"
 
-    newline = snap.file(TESTDATA / "name with\nnewline.txt")
+    newline = snap.file(WORKTREE / "name with\nnewline.txt")
     assert "read" in newline["operations"]
     assert snap.blob_text(newline["before"]["blob"]) == "newline payload\n"
 
 
 @pytest.mark.skip(reason="nlohmann/json rejects non-UTF-8 std::string values when dumping JSON")
 def test_non_utf8_filename_exposes_json_string_limitation(tmp_path):
-    bytes_path = os.path.join(os.fsencode(TESTDATA), b"non-utf8-\xff.txt")
+    bytes_path = os.path.join(os.fsencode(WORKTREE), b"non-utf8-\xff.txt")
     with open(bytes_path, "wb") as handle:
         handle.write(b"non utf8 payload\n")