Repositories / agent-snapshot.git
agent-snapshot.git
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
@@ -0,0 +1,10 @@ +import os +import subprocess +from pathlib import Path + +repo = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) +path = repo / "committed_by_program.txt" + +path.write_text("created and committed by traced program\n") +subprocess.run(["git", "add", "committed_by_program.txt"], cwd=repo, check=True) +subprocess.run(["git", "commit", "-m", "Add traced file"], cwd=repo, check=True)
@@ -1,7 +1,7 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) (testdata / "dirty.txt").read_text() (testdata / "untracked_runtime.txt").read_text()
@@ -1,8 +1,7 @@ import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) pid = os.fork() if pid == 0:
@@ -1,4 +1,4 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -print((root / "testdata" / "clean.txt").read_text()) +print((Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) / "clean.txt").read_text())
@@ -1,7 +1,7 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) (testdata / "clean.txt").read_text() (testdata / "dirty.txt").read_text()
@@ -1,8 +1,7 @@ import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) config_home = Path(os.environ.get("XDG_CONFIG_HOME", Path(os.environ["HOME"]) / ".config")) ignore_config = config_home / "agent-snapshot" / "ignore.json"
@@ -1,8 +1,7 @@ import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) path = os.path.join(os.fsencode(testdata), b"non-utf8-\xff.txt") with open(path, "rb") as handle:
@@ -1,7 +1,7 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) (testdata / "name with spaces.txt").read_text() (testdata / "name with\nnewline.txt").read_text()
@@ -1,6 +1,6 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -testdata = root / "testdata" +testdata = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) (testdata / "rename_source.txt").rename(testdata / "rename destination.txt")
@@ -1,6 +1,6 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] -path = root / "testdata" / "clean.txt" +path = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) / "clean.txt" path.write_text(path.read_text())
@@ -1,5 +1,6 @@ +import os from pathlib import Path -root = Path(__file__).resolve().parents[1] +root = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"]) (root / ".git" / "delete_me").write_text("internal git scratch\n")
@@ -11,6 +11,7 @@ ROOT = Path(__file__).resolve().parents[1] BUILD = ROOT / "build" / "pytest" BIN = BUILD / "agent-snapshot" TESTDATA = ROOT / "testdata" +WORKTREE = TESTDATA / "runtime_repo" # Use the system Python rather than uv's managed interpreter. The snapshotter # intentionally observes interpreter and loader activity too, and a uv-managed # Python in the user's home directory can be writable by the current user. That @@ -34,25 +35,30 @@ def build_agent_snapshot(): @pytest.fixture(autouse=True) -def pristine_testdata(): - # The snapshot policy depends on Git's clean/dirty/untracked distinctions. - # Each test is allowed to dirty or create files under testdata, then this - # fixture restores the committed baseline so later tests see known Git state. - run(["git", "reset", "--", "testdata"]) - run(["git", "checkout", "--", "testdata"]) - run(["git", "clean", "-fd", "--", "testdata"]) - yield - run(["git", "reset", "--", "testdata"]) - run(["git", "checkout", "--", "testdata"]) - run(["git", "clean", "-fd", "--", "testdata"]) - - -@pytest.fixture(autouse=True) -def pristine_git_internal_file(): - internal = ROOT / ".git" / "delete_me" - internal.unlink(missing_ok=True) +def runtime_git_repo(monkeypatch): + # Most snapshot policy depends on Git state, but tests should not mutate the + # project checkout's own index or .git directory. Each test gets a disposable + # repository under testdata and the helper programs operate there via env. + shutil.rmtree(WORKTREE, ignore_errors=True) + WORKTREE.mkdir(parents=True) + (WORKTREE / "clean.txt").write_text("clean tracked fixture\nline two\n") + (WORKTREE / "dirty.txt").write_text("dirty tracked fixture original\n") + (WORKTREE / "nested").mkdir() + (WORKTREE / "nested" / "info.txt").write_text("nested tracked fixture\n") + subprocess.run(["git", "init"], cwd=WORKTREE, text=True, check=True, capture_output=True) + subprocess.run(["git", "config", "user.email", "tests@example.invalid"], cwd=WORKTREE, check=True) + subprocess.run(["git", "config", "user.name", "Agent Snapshot Tests"], cwd=WORKTREE, check=True) + subprocess.run(["git", "add", "."], cwd=WORKTREE, check=True) + subprocess.run( + ["git", "commit", "-m", "Initial fixture"], + cwd=WORKTREE, + text=True, + check=True, + capture_output=True, + ) + monkeypatch.setenv("AGENT_SNAPSHOT_TEST_REPO", str(WORKTREE)) yield - internal.unlink(missing_ok=True) + shutil.rmtree(WORKTREE, ignore_errors=True) @pytest.fixture(autouse=True) @@ -107,8 +113,8 @@ def test_missing_ignore_config_aborts_at_startup(tmp_path, ignore_config): def test_ignore_config_suppresses_files_directories_and_itself(tmp_path, ignore_config): - ignored_file = TESTDATA / "ignored_file.txt" - ignored_dir = TESTDATA / "ignored_dir" + ignored_file = WORKTREE / "ignored_file.txt" + ignored_dir = WORKTREE / "ignored_dir" ignored_file.write_text("ignored file payload\n") ignored_dir.mkdir() (ignored_dir / "nested.txt").write_text("ignored nested payload\n") @@ -117,19 +123,19 @@ def test_ignore_config_suppresses_files_directories_and_itself(tmp_path, ignore_ snap = capture(tmp_path, PYTHON, "test_programs/read_ignored_paths.py") manifest_paths = {item["path"] for item in snap.manifest["files"]} - assert str((TESTDATA / "clean.txt").resolve()) in manifest_paths + assert str((WORKTREE / "clean.txt").resolve()) in manifest_paths assert str(ignored_file.resolve()) not in manifest_paths assert str((ignored_dir / "nested.txt").resolve()) not in manifest_paths assert str(ignore_config.resolve()) not in manifest_paths def test_ignore_config_expands_home_prefix(tmp_path, ignore_config): - ignored_file = TESTDATA / "ignored_file.txt" - ignored_dir = TESTDATA / "ignored_dir" + ignored_file = WORKTREE / "ignored_file.txt" + ignored_dir = WORKTREE / "ignored_dir" ignored_file.write_text("ignored file payload\n") ignored_dir.mkdir() (ignored_dir / "nested.txt").write_text("nested payload\n") - ignore_config.write_text(json.dumps(["$HOME/testdata/ignored_file.txt"]) + "\n") + ignore_config.write_text(json.dumps(["$HOME/testdata/runtime_repo/ignored_file.txt"]) + "\n") snap = capture(tmp_path, PYTHON, "test_programs/read_ignored_paths.py") manifest_paths = {item["path"] for item in snap.manifest["files"]} @@ -140,8 +146,8 @@ def test_ignore_config_expands_home_prefix(tmp_path, ignore_config): def test_ignore_config_expands_xdg_config_home_fallback(tmp_path, ignore_config): home = tmp_path / "home" config_path = home / ".config" / "agent-snapshot" / "ignore.json" - ignored_file = TESTDATA / "ignored_file.txt" - ignored_dir = TESTDATA / "ignored_dir" + ignored_file = WORKTREE / "ignored_file.txt" + ignored_dir = WORKTREE / "ignored_dir" ignored_file.write_text("ignored file payload\n") ignored_dir.mkdir() (ignored_dir / "nested.txt").write_text("nested payload\n") @@ -170,31 +176,42 @@ def test_clean_git_tracked_read_records_repo_without_blob(tmp_path): # reads testdata/clean.txt, but the snapshot should rely on Git repository # root + HEAD + relative path instead of copying file contents into blobs. snap = capture(tmp_path, PYTHON, "test_programs/read_clean.py") - clean = snap.file(TESTDATA / "clean.txt") + clean = snap.file(WORKTREE / "clean.txt") assert "read" in clean["operations"] assert clean["git"]["tracked"] is True assert clean["git"]["dirty"] is False assert clean["before"].get("blob") is None assert clean["after"].get("blob") is None - assert any(repo["root"] == str(ROOT.resolve()) for repo in snap.manifest["git_repositories"]) + assert any(repo["root"] == str(WORKTREE.resolve()) for repo in snap.manifest["git_repositories"]) def test_git_internal_directory_writes_are_ignored(tmp_path): snap = capture(tmp_path, PYTHON, "test_programs/write_git_internal.py") manifest_paths = {item["path"] for item in snap.manifest["files"]} - assert str((ROOT / ".git" / "delete_me").resolve()) not in manifest_paths + assert str((WORKTREE / ".git" / "delete_me").resolve()) not in manifest_paths def test_written_clean_git_tracked_file_gets_after_blob(tmp_path): snap = capture(tmp_path, PYTHON, "test_programs/rewrite_clean_tracked.py") - clean = snap.file(TESTDATA / "clean.txt") + clean = snap.file(WORKTREE / "clean.txt") assert "write" in clean["operations"] assert clean["git"]["tracked"] is True assert clean["git"]["dirty"] is False - assert snap.blob_text(clean["after"]["blob"]) == (TESTDATA / "clean.txt").read_text() + assert snap.blob_text(clean["after"]["blob"]) == (WORKTREE / "clean.txt").read_text() + + +def test_file_created_and_committed_by_tracee_still_gets_after_blob(tmp_path): + snap = capture(tmp_path, PYTHON, "test_programs/create_and_commit_file.py") + created = snap.file(WORKTREE / "committed_by_program.txt") + + assert "write" in created["operations"] + assert created["git"]["tracked"] is True + assert created["git"]["dirty"] is False + assert snap.blob_text(created["after"]["blob"]) == "created and committed by traced program\n" + assert not any("/.git/" in item["path"] for item in snap.manifest["files"]) def test_dirty_untracked_created_and_deleted_files_are_captured(tmp_path): @@ -203,27 +220,27 @@ def test_dirty_untracked_created_and_deleted_files_are_captured(tmp_path): # - untracked files have no commit object to reconstruct from # - created files need before=false and after content # - deleted files need a tombstone so restore can reproduce non-existence - (TESTDATA / "dirty.txt").write_text("dirty tracked fixture changed before run\n") - (TESTDATA / "untracked_runtime.txt").write_text("untracked input\n") - (TESTDATA / "deleted_by_program.txt").write_text("delete me\n") + (WORKTREE / "dirty.txt").write_text("dirty tracked fixture changed before run\n") + (WORKTREE / "untracked_runtime.txt").write_text("untracked input\n") + (WORKTREE / "deleted_by_program.txt").write_text("delete me\n") snap = capture(tmp_path, PYTHON, "test_programs/dirty_untracked_write.py") - dirty = snap.file(TESTDATA / "dirty.txt") + dirty = snap.file(WORKTREE / "dirty.txt") assert dirty["git"]["tracked"] is True assert dirty["git"]["dirty"] is True assert snap.blob_text(dirty["before"]["blob"]) == "dirty tracked fixture changed before run\n" - untracked = snap.file(TESTDATA / "untracked_runtime.txt") + untracked = snap.file(WORKTREE / "untracked_runtime.txt") assert untracked["git"]["tracked"] is False assert snap.blob_text(untracked["before"]["blob"]) == "untracked input\n" - created = snap.file(TESTDATA / "created_by_program.txt") + created = snap.file(WORKTREE / "created_by_program.txt") assert "write" in created["operations"] assert created["before"]["exists"] is False assert snap.blob_text(created["after"]["blob"]) == "created final\n" - deleted = snap.file(TESTDATA / "deleted_by_program.txt") + deleted = snap.file(WORKTREE / "deleted_by_program.txt") assert "delete" in deleted["operations"] assert deleted["after"]["exists"] is False assert deleted["after"]["tombstone"] is True @@ -237,7 +254,7 @@ def test_fork_usr_and_directory_traversal(tmp_path): # blobbed, and iterates testdata to verify directory traversal is recorded. snap = capture(tmp_path, PYTHON, "test_programs/fork_and_usr.py") - child = snap.file(TESTDATA / "child_output.txt") + child = snap.file(WORKTREE / "child_output.txt") assert "write" in child["operations"] assert snap.blob_text(child["after"]["blob"]) == "child final\n" @@ -246,7 +263,7 @@ def test_fork_usr_and_directory_traversal(tmp_path): assert usr_env["before"].get("blob") is None assert usr_env["after"].get("blob") is None - directory = snap.file(TESTDATA) + directory = snap.file(WORKTREE) assert "directory" in directory["operations"] @@ -255,37 +272,37 @@ def test_restore_applies_final_state(tmp_path): # immediately after capture. That proves the bundle contains enough payload # to recreate final captured files and enough tombstone information to remove # files that should not exist after the traced command. - (TESTDATA / "dirty.txt").write_text("changed before capture\n") - (TESTDATA / "untracked_runtime.txt").write_text("untracked input\n") - (TESTDATA / "deleted_by_program.txt").write_text("delete me\n") + (WORKTREE / "dirty.txt").write_text("changed before capture\n") + (WORKTREE / "untracked_runtime.txt").write_text("untracked input\n") + (WORKTREE / "deleted_by_program.txt").write_text("delete me\n") snap = capture(tmp_path, PYTHON, "test_programs/dirty_untracked_write.py") - shutil.rmtree(TESTDATA) - TESTDATA.mkdir() - (TESTDATA / "created_by_program.txt").write_text("wrong\n") - (TESTDATA / "deleted_by_program.txt").write_text("should disappear\n") + shutil.rmtree(WORKTREE) + WORKTREE.mkdir() + (WORKTREE / "created_by_program.txt").write_text("wrong\n") + (WORKTREE / "deleted_by_program.txt").write_text("should disappear\n") run([str(BIN), "restore", str(snap.path)]) - assert (TESTDATA / "created_by_program.txt").read_text() == "created final\n" - assert not (TESTDATA / "deleted_by_program.txt").exists() + assert (WORKTREE / "created_by_program.txt").read_text() == "created final\n" + assert not (WORKTREE / "deleted_by_program.txt").exists() def test_rename_records_source_tombstone_and_destination_content(tmp_path): # Rename is not just a write: a replay-equivalent snapshot needs to know that # the source path stopped existing and that the destination path acquired the # content. This catches implementations that only model the destination open. - (TESTDATA / "rename_source.txt").write_text("renamed payload\n") + (WORKTREE / "rename_source.txt").write_text("renamed payload\n") snap = capture(tmp_path, PYTHON, "test_programs/rename_paths.py") - source = snap.file(TESTDATA / "rename_source.txt") + source = snap.file(WORKTREE / "rename_source.txt") assert "delete" in source["operations"] assert source["before"]["exists"] is True assert source["after"]["exists"] is False assert source["after"]["tombstone"] is True - destination = snap.file(TESTDATA / "rename destination.txt") + destination = snap.file(WORKTREE / "rename destination.txt") assert "write" in destination["operations"] assert destination["before"]["exists"] is False assert snap.blob_text(destination["after"]["blob"]) == "renamed payload\n" @@ -295,10 +312,10 @@ def test_staged_and_unstaged_dirty_git_files_are_both_captured(tmp_path): # Git has more than one kind of "dirty". A staged change and an unstaged # worktree change are both unreconstructable from HEAD alone, so both should # receive blobs even though their status bits differ. - staged_path = TESTDATA / "clean.txt" - unstaged_path = TESTDATA / "dirty.txt" + staged_path = WORKTREE / "clean.txt" + unstaged_path = WORKTREE / "dirty.txt" staged_path.write_text("staged dirty content\n") - run(["git", "add", "testdata/clean.txt"]) + subprocess.run(["git", "add", "clean.txt"], cwd=WORKTREE, check=True) unstaged_path.write_text("unstaged dirty content\n") snap = capture(tmp_path, PYTHON, "test_programs/read_git_dirty_modes.py") @@ -318,23 +335,23 @@ def test_text_peculiar_file_names_are_recorded_and_blobbed(tmp_path): # Spaces and embedded newlines are valid UTF-8 paths and should work with the # current manifest design. The raw non-UTF-8 byte case is kept separate below # because nlohmann/json rejects invalid UTF-8 in JSON strings. - (TESTDATA / "name with spaces.txt").write_text("space payload\n") - (TESTDATA / "name with\nnewline.txt").write_text("newline payload\n") + (WORKTREE / "name with spaces.txt").write_text("space payload\n") + (WORKTREE / "name with\nnewline.txt").write_text("newline payload\n") snap = capture(tmp_path, PYTHON, "test_programs/read_peculiar_text_names.py") - spaced = snap.file(TESTDATA / "name with spaces.txt") + spaced = snap.file(WORKTREE / "name with spaces.txt") assert "read" in spaced["operations"] assert snap.blob_text(spaced["before"]["blob"]) == "space payload\n" - newline = snap.file(TESTDATA / "name with\nnewline.txt") + newline = snap.file(WORKTREE / "name with\nnewline.txt") assert "read" in newline["operations"] assert snap.blob_text(newline["before"]["blob"]) == "newline payload\n" @pytest.mark.skip(reason="nlohmann/json rejects non-UTF-8 std::string values when dumping JSON") def test_non_utf8_filename_exposes_json_string_limitation(tmp_path): - bytes_path = os.path.join(os.fsencode(TESTDATA), b"non-utf8-\xff.txt") + bytes_path = os.path.join(os.fsencode(WORKTREE), b"non-utf8-\xff.txt") with open(bytes_path, "wb") as handle: handle.write(b"non utf8 payload\n")