Repositories / gitweb2.git

gitweb2.git

Clone (read-only): git clone http://git.guha-anderson.com/git/gitweb2.git

Branch

Serve repositories over read-only smart HTTP at /git/<repo>

Spawns `git http-backend` as CGI for /git/* requests so that clones over HTTP
work; the existing /repo/* paths still serve the HTML viewer. Each repo page
now shows its `git clone` URL.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Author
Arjun Guha <a.guha@northeastern.edu>
Date
2026-05-04 08:46:09 -0400
Commit
22487cce17e59aacdc9b6c980aae74c13e8ea407
README.md
index 2bb310b..a1374ef 100644
--- a/README.md
+++ b/README.md
@@ -49,3 +49,20 @@ paths and branch names that contain slashes are percent-encoded in URLs:
 /repo/homebox%2Focaml-git/main/README.md
 /repo/homebox%2Focaml-git/main/-/commits
 ```
+
+## URLs
+
+For a repository discovered with key `<repo>`, the server exposes two URLs:
+
+| Purpose                        | URL                                       |
+| ------------------------------ | ----------------------------------------- |
+| HTML viewer                    | `http://<host>:<port>/repo/<repo>`        |
+| Read-only clone (smart HTTP)   | `http://<host>:<port>/git/<repo>`         |
+
+The clone URL is also displayed at the top of every repository page. Cloning
+is implemented by spawning `git http-backend` as CGI, so the system `git`
+binary must be on `PATH`. Push is not supported.
+
+```sh
+git clone http://<host>:<port>/git/<repo>
+```
service/README.md
index f03c77a..90dae2d 100644
--- a/service/README.md
+++ b/service/README.md
@@ -21,4 +21,9 @@ journalctl --user-unit gitweb2.service -f
 
 The unit listens on **0.0.0.0:8002**, serves **`/home/git/repos`**, and uses this checkout at **`/media/external0/arjun/repos/homebox/gitweb2`**. **`ExecStart`** goes through **`bash -c`** so the **`--pygments`** command stays a single argument (systemd splits unquoted strings). Edit the unit file if any of that moves.
 
+For a repository with key `<repo>`, the URLs on this host are:
+
+- HTML viewer: `http://homebox:8002/repo/<repo>`
+- Read-only clone: `git clone http://homebox:8002/git/<repo>`
+
 Without [lingering](https://www.freedesktop.org/software/systemd/man/latest/loginctl.html), the user service stops when you log out (`loginctl enable-linger "$USER"` to keep it).
src/gitweb2.ml
index 6df0027..c860c34 100644
--- a/src/gitweb2.ml
+++ b/src/gitweb2.ml
@@ -1,8 +1,15 @@
 type response = {
   status : int;
+  headers : (string * string) list;
   body : string;
 }
 
+let html_headers : (string * string) list =
+  [ ("Cache-Control", "no-store"); ("Content-Type", "text/html; charset=utf-8") ]
+
+let html_response (status : int) (body : string) : response =
+  { status; headers = html_headers; body }
+
 type config = {
   root : string;
   host : string;
@@ -143,6 +150,7 @@ let page (title : string) (body : string) : string =
      .highlight, .highlight pre { font: 13px/1.45 ui-monospace, SFMono-Regular, Consolas, monospace; white-space: pre; }\n\
      .readme { margin-top: 24px; } .file-meta { padding: 10px 12px; border-bottom: 1px solid var(--line); background: #fff; }\n\
      .notice { padding: 16px; border: 1px solid var(--line); border-radius: 8px; background: var(--wash); }\n\
+     .clone { margin: 6px 0 12px; color: var(--muted); } .clone code { background: var(--wash); border: 1px solid var(--line); border-radius: 6px; padding: 2px 8px; font: 13px ui-monospace, SFMono-Regular, Consolas, monospace; color: var(--ink); user-select: all; }\n\
      @media (max-width: 640px) { main { padding: 20px 12px 40px; } h1 { font-size: 23px; } .kind { width: 78px; } }\n\
      </style>\n</head>\n<body><main>";
   Buffer.add_string buffer body;
@@ -220,12 +228,22 @@ let repo_summary (repo : repo_info) : repo_summary =
 let entries_for_display (entries : Ocaml_git.tree_entry list) : Ocaml_git.tree_entry list =
   List.sort (fun (a : Ocaml_git.tree_entry) b -> String.compare (String.lowercase_ascii a.name) (String.lowercase_ascii b.name)) entries
 
-let repo_header (repo : repo_info) (branch : string) (path : string) (branches : Ocaml_git.branch list) (selected : string) : string =
+let clone_url (clone_base : string option) (repo : repo_info) : string option =
+  Option.map (fun base -> base ^ "/git/" ^ url_encode repo.key) clone_base
+
+let repo_header ?(clone_base : string option) (repo : repo_info) (branch : string) (path : string)
+    (branches : Ocaml_git.branch list) (selected : string) : string =
   let buffer = Buffer.create 512 in
   let heading_title = if String.length path = 0 then repo.name else path in
   Printf.bprintf buffer
     [%i
-      {|<section class="repo-head"><p><a href="/">Repositories</a> / {%s html repo.name}</p><h1>{%s html heading_title}</h1><div class="toolbar"><span>Branch</span><nav>|} ];
+      {|<section class="repo-head"><p><a href="/">Repositories</a> / {%s html repo.name}</p><h1>{%s html heading_title}</h1>|} ];
+  (match clone_url clone_base repo with
+  | None -> ()
+  | Some url ->
+      let command = "git clone " ^ url in
+      Printf.bprintf buffer [%i {|<p class="clone"><span>Clone (read-only):</span> <code>{%s html command}</code></p>|} ]);
+  Printf.bprintf buffer [%i {|<div class="toolbar"><span>Branch</span><nav>|} ];
   List.iter
     (fun (item : Ocaml_git.branch) ->
       let active = if item.name = branch then " class=\"active\"" else "" in
@@ -241,12 +259,12 @@ let repo_header (repo : repo_info) (branch : string) (path : string) (branches :
   Printf.bprintf buffer [%i {|</div></section>|} ];
   Buffer.contents buffer
 
-let tree_view ~(pygments_command : string) (repo : repo_info) (git : Ocaml_git.t) (branch : string) (path : string)
-    (branches : Ocaml_git.branch list) : string =
+let tree_view ?(clone_base : string option) ~(pygments_command : string) (repo : repo_info) (git : Ocaml_git.t)
+    (branch : string) (path : string) (branches : Ocaml_git.branch list) : string =
   let commit = Ocaml_git.branch_commit git branch in
   let listing = Ocaml_git.tree ~commit ~path git () in
   let buffer = Buffer.create 4096 in
-  Buffer.add_string buffer (repo_header repo branch path branches "code");
+  Buffer.add_string buffer (repo_header ?clone_base repo branch path branches "code");
   Printf.bprintf buffer [%i {|<div class="panel"><table><tbody>|} ];
   if String.length path <> 0 then (
     let parent = try Filename.dirname path with _ -> "" in
@@ -275,20 +293,20 @@ let tree_view ~(pygments_command : string) (repo : repo_info) (git : Ocaml_git.t
       else ());
   Buffer.contents buffer
 
-let blob_view ~(pygments_command : string) (repo : repo_info) (blob : Ocaml_git.blob) (branch : string) (path : string)
-    (branches : Ocaml_git.branch list) : string =
+let blob_view ?(clone_base : string option) ~(pygments_command : string) (repo : repo_info) (blob : Ocaml_git.blob)
+    (branch : string) (path : string) (branches : Ocaml_git.branch list) : string =
   let buffer = Buffer.create 4096 in
-  Buffer.add_string buffer (repo_header repo branch path branches "code");
+  Buffer.add_string buffer (repo_header ?clone_base repo branch path branches "code");
   Printf.bprintf buffer [%i {|<div class="panel file"><div class="file-meta">{%d String.length blob.Ocaml_git.bytes} bytes &middot; {%s html (String.sub blob.id 0 (min 12 (String.length blob.id)))}</div>|} ];
   if Ocaml_git.blob_is_binary blob then Buffer.add_string buffer "<p class=\"notice\">Binary file</p>"
   else Buffer.add_string buffer (file_render ~pygments_command ~file_path:(join_path repo.path path) (Ocaml_git.blob_text blob));
   Buffer.add_string buffer "</div>";
   Buffer.contents buffer
 
-let commits_page (repo : repo_info) (branch : string) : string =
+let commits_page ?(clone_base : string option) (repo : repo_info) (branch : string) : string =
   Ocaml_git.with_repo repo.path @@ fun git ->
   let buffer = Buffer.create 4096 in
-  Buffer.add_string buffer (repo_header repo branch "" (local_branches git) "history");
+  Buffer.add_string buffer (repo_header ?clone_base repo branch "" (local_branches git) "history");
   Printf.bprintf buffer [%i {|<ol class="commits">|} ];
   List.iter
     (fun (commit : Ocaml_git.commit) ->
@@ -297,23 +315,27 @@ let commits_page (repo : repo_info) (branch : string) : string =
   Printf.bprintf buffer [%i {|</ol>|} ];
   page (branch ^ " history") (Buffer.contents buffer)
 
-let repo_page ~(pygments_command : string) (repo : repo_info) (branch : string) (path : string) : string =
+let repo_page ?(clone_base : string option) ~(pygments_command : string) (repo : repo_info) (branch : string)
+    (path : string) : string =
   Ocaml_git.with_repo repo.path @@ fun git ->
   let branches = local_branches git in
   let commit = Ocaml_git.branch_commit git branch in
   let entry = if path = "" then None else try Some (Ocaml_git.tree_entry ~commit git path) with _ -> None in
   let content =
     match entry with
-    | None | Some { kind = Tree; _ } -> tree_view ~pygments_command repo git branch path branches
-    | Some _ -> blob_view ~pygments_command repo (Ocaml_git.blob ~commit git path) branch path branches
+    | None | Some { kind = Tree; _ } -> tree_view ?clone_base ~pygments_command repo git branch path branches
+    | Some _ ->
+        blob_view ?clone_base ~pygments_command repo (Ocaml_git.blob ~commit git path) branch path branches
   in
   page repo.name content
 
-let not_found (message : string) : response = { status = 404; body = page "Not found" ("<p class=\"notice\">" ^ message ^ "</p>") }
+let not_found (message : string) : response =
+  html_response 404 (page "Not found" ("<p class=\"notice\">" ^ message ^ "</p>"))
 
 (* Route handling is kept pure so tests can cover all page decisions without
    opening sockets; [start] is only responsible for adapting this to HTTP. *)
-let route ?(pygments_command : string = default_pygments_command) ~(root : string) (raw_path : string) : response =
+let route ?(pygments_command : string = default_pygments_command) ?(clone_base : string option)
+    ~(root : string) (raw_path : string) : response =
   let repos = discover_repos root in
   let repo_by_key key = List.find_opt (fun repo -> repo.key = key) repos in
   let parts = split_path raw_path in
@@ -333,7 +355,7 @@ let route ?(pygments_command : string = default_pygments_command) ~(root : strin
               Printf.bprintf buffer [%i {|</span><small>{%s html summary.summary}</small></a></li>|} ])
             repos;
           Buffer.add_string buffer "</ol>");
-        { status = 200; body = page "Repositories" (Buffer.contents buffer) }
+        html_response 200 (page "Repositories" (Buffer.contents buffer))
     | "repo" :: repo_key :: rest -> (
         let repo_key = url_decode repo_key in
         match repo_by_key repo_key with
@@ -350,15 +372,163 @@ let route ?(pygments_command : string = default_pygments_command) ~(root : strin
                       | b :: _ -> b.name
                       | [] -> "main")
                 in
-                { status = 200; body = repo_page ~pygments_command repo branch "" }
-            | branch :: "-" :: "commits" :: [] -> { status = 200; body = commits_page repo (url_decode branch) }
+                html_response 200 (repo_page ?clone_base ~pygments_command repo branch "")
+            | branch :: "-" :: "commits" :: [] ->
+                html_response 200 (commits_page ?clone_base repo (url_decode branch))
             | branch :: path_parts ->
                 let path = path_parts |> List.map url_decode |> String.concat "/" in
-                { status = 200; body = repo_page ~pygments_command repo (url_decode branch) path }))
+                html_response 200 (repo_page ?clone_base ~pygments_command repo (url_decode branch) path)))
     | _ -> not_found ("No route for " ^ html raw_path)
   with
-  | Ocaml_git.Git_error message -> { status = 404; body = page "Not found" ("<p class=\"notice\">" ^ html message ^ "</p>") }
-  | exn -> { status = 500; body = page "Server error" ("<p class=\"notice\">" ^ html (Printexc.to_string exn) ^ "</p>") }
+  | Ocaml_git.Git_error message ->
+      html_response 404 (page "Not found" ("<p class=\"notice\">" ^ html message ^ "</p>"))
+  | exn ->
+      html_response 500 (page "Server error" ("<p class=\"notice\">" ^ html (Printexc.to_string exn) ^ "</p>"))
+
+(* Working trees expose their `.git/` directory; bare repositories are passed
+   through directly.  http-backend wants GIT_PROJECT_ROOT plus a PATH_INFO that
+   begins with the repository's directory name. *)
+let split_for_http_backend (repo : repo_info) : string * string =
+  if Sys.file_exists (join_path repo.path ".git") then (repo.path, ".git")
+  else (Filename.dirname repo.path, Filename.basename repo.path)
+
+let cgi_split_headers_body (data : string) : (string * string) list * string =
+  let len = String.length data in
+  let rec find i =
+    if i + 1 >= len then None
+    else if data.[i] = '\n' && data.[i + 1] = '\n' then Some (i, i + 2)
+    else if i + 3 < len && data.[i] = '\r' && data.[i + 1] = '\n' && data.[i + 2] = '\r' && data.[i + 3] = '\n'
+    then Some (i, i + 4)
+    else find (i + 1)
+  in
+  match find 0 with
+  | None -> ([], data)
+  | Some (header_end, body_start) ->
+      let headers_str = String.sub data 0 header_end in
+      let body = String.sub data body_start (len - body_start) in
+      let strip_cr s =
+        let l = String.length s in
+        if l > 0 && s.[l - 1] = '\r' then String.sub s 0 (l - 1) else s
+      in
+      let lines = String.split_on_char '\n' headers_str |> List.map strip_cr in
+      let headers =
+        List.filter_map
+          (fun line ->
+            match String.index_opt line ':' with
+            | None -> None
+            | Some idx ->
+                let key = String.trim (String.sub line 0 idx) in
+                let value = String.trim (String.sub line (idx + 1) (String.length line - idx - 1)) in
+                if key = "" then None else Some (key, value))
+          lines
+      in
+      (headers, body)
+
+let cgi_extract_status (headers : (string * string) list) : int * (string * string) list =
+  let is_status (key, _) = String.lowercase_ascii key = "status" in
+  match List.find_opt is_status headers with
+  | None -> (200, headers)
+  | Some (_, raw) ->
+      let value = match String.split_on_char ' ' raw with first :: _ -> first | [] -> "" in
+      let code = int_of_string_opt value |> Option.value ~default:200 in
+      (code, List.filter (fun h -> not (is_status h)) headers)
+
+let read_fd_all (fd : Unix.file_descr) : string =
+  let buffer = Buffer.create 4096 in
+  let bytes = Bytes.create 4096 in
+  let rec loop () =
+    match Unix.read fd bytes 0 (Bytes.length bytes) with
+    | 0 -> ()
+    | n ->
+        Buffer.add_subbytes buffer bytes 0 n;
+        loop ()
+    | exception Unix.Unix_error (Unix.EINTR, _, _) -> loop ()
+  in
+  loop ();
+  Buffer.contents buffer
+
+let write_fd_all (fd : Unix.file_descr) (data : string) : unit =
+  let len = String.length data in
+  let rec loop offset =
+    if offset >= len then ()
+    else
+      match Unix.write_substring fd data offset (len - offset) with
+      | n -> loop (offset + n)
+      | exception Unix.Unix_error (Unix.EINTR, _, _) -> loop offset
+  in
+  loop 0
+
+(* Spawns `git http-backend` as a CGI process.  stdin is fully written before
+   stdout is read, which is fine because the upload-pack request body is small
+   (negotiation only) and http-backend consumes it before producing output. *)
+let run_http_backend ~(env : string array) ~(stdin_data : string) : int * string =
+  let in_r, in_w = Unix.pipe () in
+  let out_r, out_w = Unix.pipe () in
+  let err_r, err_w = Unix.pipe () in
+  Unix.set_close_on_exec in_w;
+  Unix.set_close_on_exec out_r;
+  Unix.set_close_on_exec err_r;
+  let pid = Unix.create_process_env "git" [| "git"; "http-backend" |] env in_r out_w err_w in
+  Unix.close in_r;
+  Unix.close out_w;
+  Unix.close err_w;
+  (try write_fd_all in_w stdin_data with _ -> ());
+  (try Unix.close in_w with _ -> ());
+  let output = read_fd_all out_r in
+  Unix.close out_r;
+  let _stderr = read_fd_all err_r in
+  Unix.close err_r;
+  let _, status = Unix.waitpid [] pid in
+  let exit_code = match status with Unix.WEXITED n -> n | _ -> -1 in
+  (exit_code, output)
+
+let plain_response (status : int) (message : string) : response =
+  { status; headers = [ ("Content-Type", "text/plain; charset=utf-8") ]; body = message }
+
+(* Smart-HTTP serving for `git clone`.  Read-only: write paths (receive-pack)
+   are not enabled because the spawned http-backend is given no env to allow
+   them. *)
+let git_serve ~(root : string) ~(method_ : string) ~(content_type : string) ~(content_length : string)
+    ~(query_string : string) ~(remote_addr : string) ~(request_body : string) (raw_path : string) : response =
+  match split_path raw_path with
+  | "git" :: encoded_key :: rest -> (
+      let key = url_decode encoded_key in
+      let repos = discover_repos root in
+      match List.find_opt (fun (r : repo_info) -> r.key = key) repos with
+      | None -> plain_response 404 "no such repository\n"
+      | Some repo ->
+          let project_root, base = split_for_http_backend repo in
+          let rest_decoded = List.map url_decode rest in
+          let rest_str = String.concat "/" rest_decoded in
+          let path_info = "/" ^ base ^ if rest_str = "" then "" else "/" ^ rest_str in
+          let path_env = Sys.getenv_opt "PATH" |> Option.value ~default:"/usr/local/bin:/usr/bin:/bin" in
+          let home_env = Sys.getenv_opt "HOME" |> Option.value ~default:"/tmp" in
+          let env =
+            [|
+              "GATEWAY_INTERFACE=CGI/1.1";
+              "SERVER_PROTOCOL=HTTP/1.1";
+              "REQUEST_METHOD=" ^ method_;
+              "GIT_PROJECT_ROOT=" ^ project_root;
+              "PATH_INFO=" ^ path_info;
+              "QUERY_STRING=" ^ query_string;
+              "CONTENT_TYPE=" ^ content_type;
+              "CONTENT_LENGTH=" ^ content_length;
+              "REMOTE_ADDR=" ^ remote_addr;
+              "GIT_HTTP_EXPORT_ALL=1";
+              "PATH=" ^ path_env;
+              "HOME=" ^ home_env;
+            |]
+          in
+          let exit_code, output = run_http_backend ~env ~stdin_data:request_body in
+          let headers, body = cgi_split_headers_body output in
+          let status, header_list = cgi_extract_status headers in
+          let status = if exit_code <> 0 && status = 200 then 500 else status in
+          let header_list =
+            if List.exists (fun (k, _) -> String.lowercase_ascii k = "content-type") header_list then header_list
+            else ("Content-Type", "application/octet-stream") :: header_list
+          in
+          { status; headers = header_list; body })
+  | _ -> plain_response 404 "not found\n"
 
 let start (config : config) : unit =
   Logs.set_reporter (Logs_fmt.reporter ());
@@ -381,19 +551,44 @@ let start (config : config) : unit =
   Printf.printf "gitweb2 serving repositories from %s at http://%s:%d/\n%!" config.root config.host config.port;
   let server =
     Cohttp_eio.Server.make
-      ~callback:(fun conn request _body ->
+      ~callback:(fun conn request body ->
         let client = snd (fst conn) in
         let started = Unix.gettimeofday () in
         let resource = Cohttp.Request.resource request in
         let meth = Cohttp.Request.meth request in
         let version = Cohttp.Request.version request in
-        let response = route ~pygments_command:config.pygments_command ~root:config.root resource in
+        let uri = Uri.of_string resource in
+        let path = Uri.path uri in
+        let query = Uri.verbatim_query uri |> Option.value ~default:"" in
+        let req_headers = Cohttp.Request.headers request in
+        let header value = Cohttp.Header.get req_headers value |> Option.value ~default:"" in
+        let host = header "host" in
+        let clone_base = if host = "" then None else Some ("http://" ^ host) in
+        let response =
+          if String.starts_with ~prefix:"/git/" path then
+            let request_body =
+              try Eio.Buf_read.of_flow ~max_size:max_int body |> Eio.Buf_read.take_all with _ -> ""
+            in
+            let remote_addr =
+              match client with
+              | `Tcp (ip, _) -> Format.asprintf "%a" Eio.Net.Ipaddr.pp ip
+              | _ -> ""
+            in
+            git_serve ~root:config.root
+              ~method_:(Cohttp.Code.string_of_method meth)
+              ~content_type:(header "content-type")
+              ~content_length:(header "content-length")
+              ~query_string:query ~remote_addr ~request_body path
+          else route ~pygments_command:config.pygments_command ?clone_base ~root:config.root path
+        in
         let elapsed_ms = (Unix.gettimeofday () -. started) *. 1000. in
         let size = String.length response.body in
         Logs.info ~src:access_src (fun m ->
             m "%a \"%s %s %s\" %d %d %.1fms" Eio.Net.Sockaddr.pp client (Cohttp.Code.string_of_method meth)
               resource (Cohttp.Code.string_of_version version) response.status size elapsed_ms);
-        let headers = Cohttp.Header.init_with "Cache-Control" "no-store" |> fun h -> Cohttp.Header.add h "Content-Type" "text/html; charset=utf-8" in
+        let headers =
+          List.fold_left (fun h (k, v) -> Cohttp.Header.add h k v) (Cohttp.Header.init ()) response.headers
+        in
         Cohttp_eio.Server.respond_string ~headers ~status:(`Code response.status) ~body:response.body ())
       ()
   in
src/gitweb2.mli
index 758735f..ace0a02 100644
--- a/src/gitweb2.mli
+++ b/src/gitweb2.mli
@@ -1,5 +1,6 @@
 type response = {
   status : int;
+  headers : (string * string) list;
   body : string;
 }
 
@@ -14,7 +15,7 @@ val default_host : string
 val default_port : int
 val default_pygments_command : string
 val parse_args : string array -> (config, string) result
-val route : ?pygments_command:string -> root:string -> string -> response
+val route : ?pygments_command:string -> ?clone_base:string -> root:string -> string -> response
 val start : config -> unit
 val file_render : ?pygments_command:string -> file_path:string -> string -> string
 val run : string array -> unit