Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Omit metadata sizes from manifests

Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-03 20:11:26 -0400
Commit
e13d47e606cb15705486e22ee32fc0c650656148
README.md
index 443b028..da3030f 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,7 @@ Each file record contains:
 - `after`: state captured after the traced process tree exited.
 - `git`: Git classification for the path when applicable.
 
-Metadata records include whether the path exists, file type, mode, size, mtime,
+Metadata records include whether the path exists, file type, mode, mtime,
 and optionally a `blob` key. Blob keys are state-qualified absolute paths such
 as `before:/repo/input.txt` or `after:/repo/generated.txt`; payloads for those
 keys are stored in `blobs.parquet` as described above.
@@ -101,14 +101,12 @@ Clean Git-tracked reads typically have no blob:
     "exists": true,
     "type": "file",
     "mode": 33188,
-    "size": 12,
     "mtime": 1770000000
   },
   "after": {
     "exists": true,
     "type": "file",
     "mode": 33188,
-    "size": 12,
     "mtime": 1770000000
   },
   "git": {
@@ -136,7 +134,6 @@ Captured file contents appear as blob references:
     "exists": true,
     "type": "file",
     "mode": 33188,
-    "size": 18,
     "mtime": 1770000001,
     "blob": "after:/repo/generated.txt"
   }
src/ocaml/agent_snapshot.ml
index 7fc33a7..f7a774a 100644
--- a/src/ocaml/agent_snapshot.ml
+++ b/src/ocaml/agent_snapshot.ml
@@ -65,7 +65,7 @@ module Manifest_json = struct
   [@@deriving yojson { strict = true }]
 
   (** ["before"] / ["after"]: keys mirror [manifest_metadata_of_metadata] output exactly
-      ([exists], optional [tombstone], and when [exists] then [type], [mode], [size], [mtime];
+      ([exists], optional [tombstone], and when [exists] then [type], [mode], [mtime];
       optional [blob]). *)
   type metadata = {
     exists : bool;
@@ -570,7 +570,7 @@ let manifest_metadata_of_metadata (meta : metadata) : Manifest_json.metadata =
          Some (utf8_string (if meta.directory then "directory" else if meta.regular then "file" else "other"))
        else None);
     mode = if meta.exists then Some meta.mode else None;
-    size = if meta.exists then Some meta.size else None;
+    size = None;
     mtime = if meta.exists then Some meta.mtime else None;
     blob = Option.map utf8_string meta.blob;
   }
tests/test_agent_snapshot.py
index 7f9bd23..c0e2938 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -216,6 +216,8 @@ def test_clean_git_tracked_read_records_repo_without_blob(tmp_path):
     assert "read" in clean["operations"]
     assert clean["git"]["tracked"] is True
     assert clean["git"]["dirty"] is False
+    assert "size" not in clean["before"]
+    assert "size" not in clean["after"]
     assert clean["before"].get("blob") is None
     assert clean["after"].get("blob") is None
     assert any(repo["root"] == str(WORKTREE.resolve()) for repo in snap.manifest["git_repositories"])