Repositories / agent-snapshot.git

agent-snapshot.git

Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git

Branch

Revert "Preserve non-UTF-8 path bytes in manifest"

This reverts commit ffb9868db7ba77dc06dc623566cbbb072a6f81db.
Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-02 07:45:49 -0400
Commit
68f6dcdb888ff92f4ff713a194d4c39d42ca909d
src/main.cpp
index 55beaff..1dc1d10 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -130,65 +130,6 @@ std::string errno_message(const std::string& prefix) {
   return prefix + ": " + std::strerror(errno);
 }
 
-std::string bytes_hex(const std::string& input) {
-  std::ostringstream out;
-  for (unsigned char c : input) {
-    out << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(c);
-  }
-  return out.str();
-}
-
-bool append_utf8_codepoint(std::string& out, const std::string& input, size_t& index) {
-  const unsigned char first = static_cast<unsigned char>(input[index]);
-  size_t length = 0;
-  uint32_t codepoint = 0;
-  if (first <= 0x7f) {
-    out.push_back(static_cast<char>(first));
-    ++index;
-    return true;
-  } else if ((first & 0xe0) == 0xc0) {
-    length = 2;
-    codepoint = first & 0x1f;
-  } else if ((first & 0xf0) == 0xe0) {
-    length = 3;
-    codepoint = first & 0x0f;
-  } else if ((first & 0xf8) == 0xf0) {
-    length = 4;
-    codepoint = first & 0x07;
-  } else {
-    return false;
-  }
-  if (index + length > input.size()) return false;
-  for (size_t offset = 1; offset < length; ++offset) {
-    const unsigned char next = static_cast<unsigned char>(input[index + offset]);
-    if ((next & 0xc0) != 0x80) return false;
-    codepoint = (codepoint << 6) | (next & 0x3f);
-  }
-  if ((length == 2 && codepoint < 0x80) || (length == 3 && codepoint < 0x800) ||
-      (length == 4 && codepoint < 0x10000) ||
-      (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff) {
-    return false;
-  }
-  out.append(input, index, length);
-  index += length;
-  return true;
-}
-
-std::string json_safe_path_text(const std::string& bytes) {
-  std::string out;
-  for (size_t i = 0; i < bytes.size();) {
-    size_t before = i;
-    if (append_utf8_codepoint(out, bytes, i)) continue;
-    const unsigned char bad = static_cast<unsigned char>(bytes[before]);
-    out += "\\x";
-    constexpr char kHex[] = "0123456789abcdef";
-    out.push_back(kHex[bad >> 4]);
-    out.push_back(kHex[bad & 0x0f]);
-    i = before + 1;
-  }
-  return out;
-}
-
 std::string readlink_string(const fs::path& path) {
   std::vector<char> buffer(4096);
   ssize_t n = readlink(path.c_str(), buffer.data(), buffer.size() - 1);
@@ -402,8 +343,7 @@ json git_json(const GitInfo& git) {
   if (git.in_repo) {
     j["root"] = git.root;
     j["head"] = git.head;
-    j["relative_path"] = json_safe_path_text(git.relative_path);
-    j["relative_path_bytes_hex"] = bytes_hex(git.relative_path);
+    j["relative_path"] = git.relative_path;
     j["tracked"] = git.tracked;
     j["dirty"] = git.dirty;
     j["ignored"] = git.ignored;
@@ -793,8 +733,7 @@ void write_manifest(const fs::path& out, const std::vector<std::string>& command
     json ops = json::array();
     for (const auto& op : rec.operations) ops.push_back(op);
     manifest["files"].push_back({
-        {"path", json_safe_path_text(rec.path)},
-        {"path_bytes_hex", bytes_hex(rec.path)},
+        {"path", rec.path},
         {"operations", ops},
         {"before", metadata_json(rec.before)},
         {"after", metadata_json(rec.after)},
tests/test_agent_snapshot.py
index 41e5667..e6f5756 100644
--- a/tests/test_agent_snapshot.py
+++ b/tests/test_agent_snapshot.py
@@ -60,9 +60,8 @@ class Snapshot:
         return self.file_by_manifest_path(target)
 
     def file_by_manifest_path(self, target: str):
-        target_bytes_hex = os.fsencode(target).hex()
         for item in self.manifest["files"]:
-            if item["path"] == target or item.get("path_bytes_hex") == target_bytes_hex:
+            if item["path"] == target:
                 return item
         raise AssertionError(f"{target} not present in snapshot")