Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Handle non-directory path prefixes

Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-03 15:04:38 -0400
Commit
2cca5938285fd7d858a627ee7bfa0fa63edf0b84
src/ocaml/agent_snapshot.ml
index 89a3115..62fa87a 100644
--- a/src/ocaml/agent_snapshot.ml
+++ b/src/ocaml/agent_snapshot.ml
@@ -254,6 +254,20 @@ let stat_metadata (path : string) : metadata option =
       }
   with Unix.Unix_error _ -> None
 
+let path_has_non_directory_prefix (path : string) : bool =
+  let rec loop prefix = function
+    | [] | [ _ ] -> false
+    | seg :: rest ->
+        let prefix = concat_path prefix seg in
+        try
+          let st = Unix.LargeFile.lstat prefix in
+          if st.st_kind = Unix.S_DIR then loop prefix rest else true
+        with Unix.Unix_error (Unix.ENOENT, _, _) -> false
+           | Unix.Unix_error (Unix.ENOTDIR, _, _) -> true
+           | Unix.Unix_error _ -> false
+  in
+  loop (if is_absolute path then "/" else ".") (split_path path)
+
 (** Treat non-owned, non-writable paths as external system environment instead of snapshot payloads. *)
 let owned_by_other_and_not_writable (path : string) : bool =
   try
@@ -271,11 +285,16 @@ let writable_by_current_user (meta : metadata) : bool =
   else if tracer_uid = 0 then true
   else meta.mode land 0o222 <> 0
 
+let path_exists (path : string) : bool =
+  match stat_metadata path with
+  | Some _ -> true
+  | None -> false
+
 (** Find the nearest existing path Git can use to discover a repository for a possibly deleted or not-yet-created file. *)
 let existing_anchor (path : string) : string option =
   let rec loop path =
     if path = "" || path = "." then None
-    else if FileUtil.test FileUtil.Exists path then Some path
+    else if path_exists path then Some path
     else
       let parent = dirname path in
       if parent = path then None else loop parent
@@ -456,7 +475,7 @@ let should_capture_content (path : string) (meta : metadata) (git : git_info) : 
 let record_observation (raw_path : string) (operation : string) : unit =
   if raw_path <> "" then
     let path = best_effort_canonical raw_path in
-    if not (is_ignored_path path) then (
+    if (not (is_ignored_path path)) && not (path_has_non_directory_prefix path) then (
       let recd =
         match Hashtbl.find_opt files path with
         | Some recd -> recd
test_programs/read_non_directory_component.py
new file mode 100644
index 0000000..ff4809e
--- /dev/null
+++ b/test_programs/read_non_directory_component.py
@@ -0,0 +1,11 @@
+import os
+from pathlib import Path
+
+root = Path(os.environ["AGENT_SNAPSHOT_TEST_REPO"])
+not_directory = root / "not_directory"
+not_directory.write_text("not a directory\n")
+
+try:
+    os.lstat(not_directory / "2851767" / "ns")
+except OSError:
+    pass
tests/test_agent_snapshot.py
index e8941f1..b80cf1c 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -216,6 +216,14 @@ def test_git_internal_directory_writes_are_ignored(tmp_path):
     assert str((WORKTREE / ".git" / "delete_me").resolve()) not in manifest_paths
 
 
+def test_non_directory_path_component_does_not_crash(tmp_path):
+    snap = capture(tmp_path, PYTHON, "test_programs/read_non_directory_component.py")
+    impossible_path = (WORKTREE / "not_directory" / "2851767" / "ns").resolve(strict=False)
+    manifest_paths = {item["path"] for item in snap.manifest["files"]}
+
+    assert str(impossible_path) not in manifest_paths
+
+
 def test_written_clean_git_tracked_file_gets_after_blob(tmp_path):
     snap = capture(tmp_path, PYTHON, "test_programs/rewrite_clean_tracked.py")
     clean = snap.file(WORKTREE / "clean.txt")