about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-05-01T20·10+0300
committerclbot <clbot@tvl.fyi>2024-05-02T13·28+0000
commit4033d4c50f833f56c07da519827b331a521f8d0e (patch)
treeb26659d758fa731d906e88cb5100f5a3ce046cef
parentc0d54393627748187c05bf0c7dd59cc16efd7f61 (diff)
feat(tvix/castore/path): implement PathBuf::from_host_path r/8065
This allows converting from std::path::Path to castore PathBufs.

A flag is present to control .. canonicalization, and the usual caveats
about platform-specific differences apply.

Currently only added for unix, we'll carefully consider other platforms
on a case-by-case basis.

Change-Id: If289a92f75a2e5c3eec132b6a91a28d225fc1989
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11577
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
-rw-r--r--tvix/castore/src/path.rs106
1 files changed, 105 insertions, 1 deletions
diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs
index 68e4b75ec8..fcc2bd01fb 100644
--- a/tvix/castore/src/path.rs
+++ b/tvix/castore/src/path.rs
@@ -227,6 +227,71 @@ impl PathBuf {
         PathBuf { inner: bytes }
     }
 
+    /// Convert from a [&std::path::Path] to [Self].
+    ///
+    /// - Self uses `/` as path separator.
+    /// - Absolute paths are always rejected, are are these with custom prefixes.
+    /// - Repeated separators are deduplicated.
+    /// - Occurrences of `.` are normalized away.
+    /// - A trailing slash is normalized away.
+    ///
+    /// A `canonicalize_dotdot` boolean controls whether `..` will get
+    /// canonicalized if possible, or should return an error.
+    ///
+    /// For more exotic paths, this conversion might produce different results
+    /// on different platforms, due to different underlying byte
+    /// representations, which is why it's restricted to unix for now.
+    #[cfg(unix)]
+    pub fn from_host_path(
+        host_path: &std::path::Path,
+        canonicalize_dotdot: bool,
+    ) -> Result<Self, std::io::Error> {
+        let mut p = PathBuf::with_capacity(host_path.as_os_str().len());
+
+        for component in host_path.components() {
+            match component {
+                std::path::Component::Prefix(_) | std::path::Component::RootDir => {
+                    return Err(std::io::Error::new(
+                        std::io::ErrorKind::InvalidData,
+                        "found disallowed prefix or rootdir",
+                    ))
+                }
+                std::path::Component::CurDir => continue, // ignore
+                std::path::Component::ParentDir => {
+                    if canonicalize_dotdot {
+                        // Try popping the last element from the path being constructed.
+                        // FUTUREWORK: pop method?
+                        p = p
+                            .parent()
+                            .ok_or_else(|| {
+                                std::io::Error::new(
+                                    std::io::ErrorKind::InvalidData,
+                                    "found .. going too far up",
+                                )
+                            })?
+                            .to_owned();
+                    } else {
+                        return Err(std::io::Error::new(
+                            std::io::ErrorKind::InvalidData,
+                            "found disallowed ..",
+                        ));
+                    }
+                }
+                std::path::Component::Normal(s) => {
+                    // append the new component to the path being constructed.
+                    p.try_push(s.as_encoded_bytes()).map_err(|_| {
+                        std::io::Error::new(
+                            std::io::ErrorKind::InvalidData,
+                            "encountered invalid node in sub_path component",
+                        )
+                    })?
+                }
+            }
+        }
+
+        Ok(p)
+    }
+
     pub fn into_boxed_path(self) -> Box<Path> {
         // SAFETY: Box<[u8]> and Box<Path> have the same representation,
         // and PathBuf always contains a valid Path.
@@ -257,7 +322,7 @@ mod test {
     // but maybe we want to disallow constructing paths like this as it's a
     // bad idea.
     #[case::cursed("C:\\a/b", 2)]
-    #[case::cursed("\\tvix-store", 1)]
+    #[case::cursed("\\\\tvix-store", 1)]
     pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
         let p: PathBuf = s.parse().expect("must parse");
 
@@ -339,4 +404,43 @@ mod test {
                 .collect::<Vec<_>>()
         );
     }
+
+    #[rstest]
+    #[case::empty("", "", false)]
+    #[case::path("a", "a", false)]
+    #[case::path2("a/b", "a/b", false)]
+    #[case::double_slash_middle("a//b", "a/b", false)]
+    #[case::dot(".", "", false)]
+    #[case::dot_start("./a/b", "a/b", false)]
+    #[case::dot_middle("a/./b", "a/b", false)]
+    #[case::dot_end("a/b/.", "a/b", false)]
+    #[case::trailing_slash("a/b/", "a/b", false)]
+    #[case::dotdot_canonicalize("a/..", "", true)]
+    #[case::dotdot_canonicalize2("a/../b", "b", true)]
+    #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))]
+    #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))]
+    pub fn from_host_path(
+        #[case] host_path: std::path::PathBuf,
+        #[case] exp_path: PathBuf,
+        #[case] canonicalize_dotdot: bool,
+    ) {
+        let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed");
+
+        assert_eq!(exp_path, p);
+    }
+
+    #[rstest]
+    #[case::absolute("/", false)]
+    #[case::dotdot_root("..", false)]
+    #[case::dotdot_root_canonicalize("..", true)]
+    #[case::dotdot_root_no_canonicalize("a/..", false)]
+    #[case::invalid_name("foo/bar\0", false)]
+    // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))]
+    // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))]
+    pub fn from_host_path_fail(
+        #[case] host_path: std::path::PathBuf,
+        #[case] canonicalize_dotdot: bool,
+    ) {
+        PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail");
+    }
 }