Repositories / agent-snapshot.git
agent-snapshot.git
Clone (read-only): git clone http://git.guha-anderson.com/git/agent-snapshot.git
@@ -130,6 +130,65 @@ std::string errno_message(const std::string& prefix) { return prefix + ": " + std::strerror(errno); } +std::string bytes_hex(const std::string& input) { + std::ostringstream out; + for (unsigned char c : input) { + out << std::hex << std::setw(2) << std::setfill('0') << static_cast<int>(c); + } + return out.str(); +} + +bool append_utf8_codepoint(std::string& out, const std::string& input, size_t& index) { + const unsigned char first = static_cast<unsigned char>(input[index]); + size_t length = 0; + uint32_t codepoint = 0; + if (first <= 0x7f) { + out.push_back(static_cast<char>(first)); + ++index; + return true; + } else if ((first & 0xe0) == 0xc0) { + length = 2; + codepoint = first & 0x1f; + } else if ((first & 0xf0) == 0xe0) { + length = 3; + codepoint = first & 0x0f; + } else if ((first & 0xf8) == 0xf0) { + length = 4; + codepoint = first & 0x07; + } else { + return false; + } + if (index + length > input.size()) return false; + for (size_t offset = 1; offset < length; ++offset) { + const unsigned char next = static_cast<unsigned char>(input[index + offset]); + if ((next & 0xc0) != 0x80) return false; + codepoint = (codepoint << 6) | (next & 0x3f); + } + if ((length == 2 && codepoint < 0x80) || (length == 3 && codepoint < 0x800) || + (length == 4 && codepoint < 0x10000) || + (codepoint >= 0xd800 && codepoint <= 0xdfff) || codepoint > 0x10ffff) { + return false; + } + out.append(input, index, length); + index += length; + return true; +} + +std::string json_safe_path_text(const std::string& bytes) { + std::string out; + for (size_t i = 0; i < bytes.size();) { + size_t before = i; + if (append_utf8_codepoint(out, bytes, i)) continue; + const unsigned char bad = static_cast<unsigned char>(bytes[before]); + out += "\\x"; + constexpr char kHex[] = "0123456789abcdef"; + out.push_back(kHex[bad >> 4]); + out.push_back(kHex[bad & 0x0f]); + i = before + 1; + } + return out; +} + std::string readlink_string(const fs::path& path) { std::vector<char> buffer(4096); ssize_t n = readlink(path.c_str(), buffer.data(), buffer.size() - 1); @@ -343,7 +402,8 @@ json git_json(const GitInfo& git) { if (git.in_repo) { j["root"] = git.root; j["head"] = git.head; - j["relative_path"] = git.relative_path; + j["relative_path"] = json_safe_path_text(git.relative_path); + j["relative_path_bytes_hex"] = bytes_hex(git.relative_path); j["tracked"] = git.tracked; j["dirty"] = git.dirty; j["ignored"] = git.ignored; @@ -733,7 +793,8 @@ void write_manifest(const fs::path& out, const std::vector<std::string>& command json ops = json::array(); for (const auto& op : rec.operations) ops.push_back(op); manifest["files"].push_back({ - {"path", rec.path}, + {"path", json_safe_path_text(rec.path)}, + {"path_bytes_hex", bytes_hex(rec.path)}, {"operations", ops}, {"before", metadata_json(rec.before)}, {"after", metadata_json(rec.after)},
@@ -60,8 +60,9 @@ class Snapshot: return self.file_by_manifest_path(target) def file_by_manifest_path(self, target: str): + target_bytes_hex = os.fsencode(target).hex() for item in self.manifest["files"]: - if item["path"] == target: + if item["path"] == target or item.get("path_bytes_hex") == target_bytes_hex: return item raise AssertionError(f"{target} not present in snapshot")