Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Encode manifest.json with typed Manifest_json Yojson derivations

Replace ad hoc Yojson.Safe construction with a Manifest_json module,
strict record codecs, and converters from runtime metadata; trim README
manifest field list.

Co-authored-by: Cursor <cursoragent@cursor.com>
Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-03 04:18:52 -0400
Commit
0c6865ef66ec053693ac3aa7115fe56493ee5e0c
README.md
index 43fb199..11003b5 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,6 @@ snapshot-dir/
 
 `manifest.json` contains:
 
-- `format_version`: snapshot format version.
 - `command`: command and arguments that were launched.
 - `exit_status`: recorded command status field.
 - `start_cwd`: working directory where Agent Snapshot was launched.
src/ocaml/agent_snapshot.ml
index f7adf07..e66c6eb 100644
--- a/src/ocaml/agent_snapshot.ml
+++ b/src/ocaml/agent_snapshot.ml
@@ -6,8 +6,6 @@ let utf8_string (s : string) : string =
   try Enc.recode_string ~in_enc:Enc.utf8 ~out_enc:Enc.utf8 s
   with Enc.Malformed_code -> Enc.recode_string ~in_enc:Enc.latin1 ~out_enc:Enc.utf8 s
 
-let jstr (s : string) : Json.t = `String (utf8_string s)
-
 type metadata = {
   mutable exists : bool;
   mutable tombstone : bool;
@@ -54,23 +52,64 @@ type repo_record = {
 (** JSON array of strings in the ignore config file. *)
 type ignore_file_entries = string list [@@deriving yojson]
 
-(** One repository row in manifest.json ["git_repositories"]. *)
-type manifest_repo = {
-  root : string;
-  head : string;
-  dirty : bool;
-}
-[@@deriving yojson]
-
-(** Subset of the per-file ["after"] object needed by [restore_snapshot]. *)
-type manifest_after = {
-  exists : bool;
-  tombstone : bool [@default false];
-  blob : string option [@default None];
-  mode : int option [@default None];
-  mtime : int option [@default None];
-}
-[@@deriving yojson { strict = false }]
+(** Wire encoding of [manifest.json] ([write_manifest]); restore reads subsets by hand. *)
+module Manifest_json = struct
+  (** One row of ["git_repositories"]. *)
+  type repo = {
+    root : string;
+    head : string;
+    dirty : bool;
+  }
+  [@@deriving yojson { strict = true }]
+
+  (** ["before"] / ["after"]: keys mirror [manifest_metadata_of_metadata] output exactly
+      ([exists], optional [tombstone], and when [exists] then [type], [mode], [size], [mtime];
+      optional [blob]). *)
+  type metadata = {
+    exists : bool;
+    tombstone : bool [@default false];
+    type_ : string option [@key "type"] [@default None];
+    mode : int option [@default None];
+    size : int64 option [@default None];
+    mtime : int option [@default None];
+    blob : string option [@default None];
+  }
+  [@@deriving yojson { strict = true }]
+
+  (** When [in_repo] is false only [["in_repo"]] appears; when true, all repository fields appear. *)
+  type git = {
+    in_repo : bool;
+    root : string option [@default None];
+    head : string option [@default None];
+    relative_path : string option [@default None];
+    tracked : bool option [@default None];
+    dirty : bool option [@default None];
+    ignored : bool option [@default None];
+  }
+  [@@deriving yojson { strict = true }]
+
+  (** One element of [["files"]]. *)
+  type file_entry = {
+    path : string;
+    operations : string list;
+    before : metadata;
+    after : metadata;
+    git : git;
+  }
+  [@@deriving yojson { strict = true }]
+
+  (** Root object ([manifest.json]). *)
+  type t = {
+    command : string list;
+    exit_status : int;
+    start_cwd : string;
+    uid : int;
+    gid : int;
+    git_repositories : repo list;
+    files : file_entry list;
+  }
+  [@@deriving yojson { strict = true }]
+end
 
 let empty_metadata () : metadata =
   { exists = false; tombstone = false; regular = false; directory = false; mode = 0; size = 0L; mtime = 0; blob = None }
@@ -341,74 +380,81 @@ let finalize_records () : unit =
           recd.after.blob <- Some (store_blob recd.path)))
     files
 
-let metadata_json (meta : metadata) : Json.t =
-  let base = [ ("exists", `Bool meta.exists) ] in
-  let base = if meta.tombstone then ("tombstone", `Bool true) :: base else base in
-  let base =
-    if meta.exists then
-      ("type", `String (if meta.directory then "directory" else if meta.regular then "file" else "other"))
-      :: ("mode", `Int meta.mode)
-      :: ("size", `Intlit (Int64.to_string meta.size))
-      :: ("mtime", `Int meta.mtime)
-      :: base
-    else base
-  in
-  let base = match meta.blob with Some blob -> ("blob", jstr blob) :: base | None -> base in
-  `Assoc (List.rev base)
-
-let git_json (git : git_info) : Json.t =
-  if not git.in_repo then `Assoc [ ("in_repo", `Bool false) ]
+let manifest_metadata_of_metadata (meta : metadata) : Manifest_json.metadata =
+  {
+    exists = meta.exists;
+    tombstone = meta.tombstone;
+    type_ =
+      (if meta.exists then
+         Some (utf8_string (if meta.directory then "directory" else if meta.regular then "file" else "other"))
+       else None);
+    mode = if meta.exists then Some meta.mode else None;
+    size = if meta.exists then Some meta.size else None;
+    mtime = if meta.exists then Some meta.mtime else None;
+    blob = Option.map utf8_string meta.blob;
+  }
+
+let manifest_git_of_git_info (git : git_info) : Manifest_json.git =
+  if not git.in_repo then
+    {
+      in_repo = false;
+      root = None;
+      head = None;
+      relative_path = None;
+      tracked = None;
+      dirty = None;
+      ignored = None;
+    }
   else
-    `Assoc
-      [
-        ("in_repo", `Bool true);
-        ("root", jstr git.root);
-        ("head", jstr git.head);
-        ("relative_path", jstr git.relative_path);
-        ("tracked", `Bool git.tracked);
-        ("dirty", `Bool git.dirty);
-        ("ignored", `Bool git.ignored);
-      ]
+    {
+      in_repo = true;
+      root = Some (utf8_string git.root);
+      head = Some (utf8_string git.head);
+      relative_path = Some (utf8_string git.relative_path);
+      tracked = Some git.tracked;
+      dirty = Some git.dirty;
+      ignored = Some git.ignored;
+    }
+
+let manifest_repo_of_repo_record (repo : repo_record) : Manifest_json.repo =
+  { root = utf8_string repo.root; head = utf8_string repo.head; dirty = repo.dirty }
 
 let write_manifest (out : string) (command : string list) (exit_status : int) : unit =
-  let repo_items =
-    Hashtbl.fold
-      (fun _ (repo : repo_record) acc ->
-        manifest_repo_to_yojson { root = repo.root; head = repo.head; dirty = repo.dirty } :: acc)
-      repos []
+  let git_repositories =
+    Hashtbl.fold (fun _ (repo : repo_record) acc -> manifest_repo_of_repo_record repo :: acc) repos []
     |> List.sort Stdlib.compare
   in
-  let file_items =
+  let manifest_files =
     Hashtbl.fold
       (fun _ recd acc ->
-        let ops = Hashtbl.fold (fun op () acc -> op :: acc) recd.operations [] |> List.sort String.compare in
+        let operations =
+          Hashtbl.fold (fun op () acc -> op :: acc) recd.operations [] |> List.sort String.compare |> List.map utf8_string
+        in
         let git = if recd.after_git.in_repo then recd.after_git else recd.before_git in
-        `Assoc
-          [
-            ("path", jstr recd.path);
-            ("operations", `List (List.map jstr ops));
-            ("before", metadata_json recd.before);
-            ("after", metadata_json recd.after);
-            ("git", git_json git);
-          ]
+        ({
+           path = utf8_string recd.path;
+           operations;
+           before = manifest_metadata_of_metadata recd.before;
+           after = manifest_metadata_of_metadata recd.after;
+           git = manifest_git_of_git_info git;
+         }
+          : Manifest_json.file_entry)
         :: acc)
       files []
     |> List.sort Stdlib.compare
   in
-  let manifest =
-    `Assoc
-      [
-        ("format_version", `Int 1);
-        ("command", `List (List.map jstr command));
-        ("exit_status", `Int exit_status);
-        ("start_cwd", jstr (FileUtil.pwd ()));
-        ("uid", `Int tracer_uid);
-        ("gid", `Int tracer_gid);
-        ("git_repositories", `List repo_items);
-        ("files", `List file_items);
-      ]
+  let manifest : Manifest_json.t =
+    {
+      command = List.map utf8_string command;
+      exit_status;
+      start_cwd = utf8_string (FileUtil.pwd ());
+      uid = tracer_uid;
+      gid = tracer_gid;
+      git_repositories;
+      files = manifest_files;
+    }
   in
-  Json.to_file ~std:true (concat_path out "manifest.json") manifest
+  Json.to_file ~std:true (concat_path out "manifest.json") (Manifest_json.to_yojson manifest)
 
 (** Resolve relative syscall paths against cwd or a directory fd as required by *at syscalls. *)
 let resolve_path (proc : proc_state) (dirfd : int) (path : string) : string =
@@ -552,7 +598,7 @@ let restore_snapshot (dir : string) : unit =
       | `Assoc item -> (
           match (List.assoc_opt "path" item, List.assoc_opt "after" item : Json.t option * Json.t option) with
           | Some (`String path), Some (`Assoc after_fields) -> (
-              match manifest_after_of_yojson (`Assoc after_fields) with
+              match Manifest_json.metadata_of_yojson (`Assoc after_fields) with
               | Error _ -> ()
               | Ok after ->
                   if not after.exists then (