about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-10-12T20·26+0200
committerflokli <flokli@flokli.de>2023-10-12T21·05+0000
commit9b7629826f45af70ed2668353ff4d28446cd1417 (patch)
treee3c0b9b0ec1dde6ff048ef5655caeaa89bdccd6c /tvix
parentc9e90b4dd79d113514a853abd298752d87860f98 (diff)
refactor(tvix/castore): factor out node checks r/6794
Implement `validate()` on `node::Node`, and call it from PathInfo's
validate() too. Node-related errors are moved to a ValidateNodeError
error type.

This additionally adds some more validations for symlink targets (they
must not be empty, and not contain null bytes).

Change-Id: Ib9b89f1c9c795e868a1533281239bc8a36d97c5d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9715
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'tvix')
-rw-r--r--tvix/castore/src/proto/mod.rs90
-rw-r--r--tvix/castore/src/proto/tests/directory.rs16
-rw-r--r--tvix/store/src/proto/mod.rs38
-rw-r--r--tvix/store/src/proto/tests/pathinfo.rs31
4 files changed, 108 insertions, 67 deletions
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs
index 66ed5b0f1f..ae00d4f158 100644
--- a/tvix/castore/src/proto/mod.rs
+++ b/tvix/castore/src/proto/mod.rs
@@ -2,7 +2,6 @@
 // https://github.com/hyperium/tonic/issues/1056
 use bstr::ByteSlice;
 use std::{collections::HashSet, iter::Peekable};
-use thiserror::Error;
 
 use prost::Message;
 
@@ -26,7 +25,7 @@ pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix
 mod tests;
 
 /// Errors that can occur during the validation of Directory messages.
-#[derive(Debug, PartialEq, Eq, Error)]
+#[derive(Debug, PartialEq, Eq, thiserror::Error)]
 pub enum ValidateDirectoryError {
     /// Elements are not in sorted order
     #[error("{:?} is not sorted", .0.as_bstr())]
@@ -34,28 +33,37 @@ pub enum ValidateDirectoryError {
     /// Multiple elements with the same name encountered
     #[error("{:?} is a duplicate name", .0.as_bstr())]
     DuplicateName(Vec<u8>),
-    /// Invalid name encountered
-    #[error("Invalid name in {:?}", .0.as_bstr())]
-    InvalidName(Vec<u8>),
+    /// Invalid node
+    #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())]
+    InvalidNode(Vec<u8>, ValidateNodeError),
+    #[error("Total size exceeds u32::MAX")]
+    SizeOverflow,
+}
+
+/// Errors that occur during Node validation
+#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+pub enum ValidateNodeError {
     /// Invalid digest length encountered
     #[error("Invalid Digest length: {0}")]
     InvalidDigestLen(usize),
-    #[error("Total size exceeds u32::MAX")]
-    SizeOverflow,
+    /// Invalid name encountered
+    #[error("Invalid name")]
+    InvalidName(),
+    /// Invalid symlink target
+    #[error("Invalid symlink target: {}", .0.as_bstr())]
+    InvalidSymlinkTarget(Vec<u8>),
 }
 
-/// Checks a Node name for validity as an intermediate node, and returns an
-/// error that's generated from the supplied constructor.
-///
+/// Checks a Node name for validity as an intermediate node.
 /// We disallow slashes, null bytes, '.', '..' and the empty string.
-fn validate_node_name<E>(name: &[u8], err: fn(Vec<u8>) -> E) -> Result<(), E> {
+fn validate_node_name(name: &[u8]) -> Result<(), ValidateNodeError> {
     if name.is_empty()
         || name == b".."
         || name == b"."
         || name.contains(&0x00)
         || name.contains(&b'/')
     {
-        return Err(err(name.to_vec()));
+        Err(ValidateNodeError::InvalidName())?;
     }
     Ok(())
 }
@@ -103,6 +111,39 @@ impl node::Node {
             node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }),
         }
     }
+    pub fn validate(&self) -> Result<(), ValidateNodeError> {
+        match self {
+            // for a directory root node, ensure the digest has the appropriate size.
+            node::Node::Directory(directory_node) => {
+                if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
+                    Err(ValidateNodeError::InvalidDigestLen(
+                        directory_node.digest.len(),
+                    ))?;
+                };
+                validate_node_name(&directory_node.name)?;
+            }
+            // for a file root node, ensure the digest has the appropriate size.
+            node::Node::File(file_node) => {
+                if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
+                    Err(ValidateNodeError::InvalidDigestLen(file_node.digest.len()))?;
+                };
+                validate_node_name(&file_node.name)?;
+            }
+            // ensure the symlink target is not empty and doesn't contain null bytes.
+            node::Node::Symlink(symlink_node) => {
+                if symlink_node.target.len() == 0 {
+                    Err(ValidateNodeError::InvalidSymlinkTarget(vec![]))?;
+                }
+                if symlink_node.target.contains(&b'\0') {
+                    Err(ValidateNodeError::InvalidSymlinkTarget(
+                        symlink_node.target.to_vec(),
+                    ))?;
+                }
+                validate_node_name(&symlink_node.name)?;
+            }
+        }
+        Ok(())
+    }
 }
 
 /// Accepts a name, and a mutable reference to the previous name.
@@ -183,13 +224,11 @@ impl Directory {
 
         // check directories
         for directory_node in &self.directories {
-            validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?;
-            // ensure the digest has the appropriate size.
-            if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
-                return Err(ValidateDirectoryError::InvalidDigestLen(
-                    directory_node.digest.len(),
-                ));
-            }
+            node::Node::Directory(directory_node.clone())
+                .validate()
+                .map_err(|e| {
+                    ValidateDirectoryError::InvalidNode(directory_node.name.to_vec(), e)
+                })?;
 
             update_if_lt_prev(&mut last_directory_name, &directory_node.name)?;
             insert_once(&mut seen_names, &directory_node.name)?;
@@ -197,12 +236,9 @@ impl Directory {
 
         // check files
         for file_node in &self.files {
-            validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?;
-            if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
-                return Err(ValidateDirectoryError::InvalidDigestLen(
-                    file_node.digest.len(),
-                ));
-            }
+            node::Node::File(file_node.clone())
+                .validate()
+                .map_err(|e| ValidateDirectoryError::InvalidNode(file_node.name.to_vec(), e))?;
 
             update_if_lt_prev(&mut last_file_name, &file_node.name)?;
             insert_once(&mut seen_names, &file_node.name)?;
@@ -210,7 +246,9 @@ impl Directory {
 
         // check symlinks
         for symlink_node in &self.symlinks {
-            validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?;
+            node::Node::Symlink(symlink_node.clone())
+                .validate()
+                .map_err(|e| ValidateDirectoryError::InvalidNode(symlink_node.name.to_vec(), e))?;
 
             update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?;
             insert_once(&mut seen_names, &symlink_node.name)?;
diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs
index d46e8cb6c3..69d9b5b4ef 100644
--- a/tvix/castore/src/proto/tests/directory.rs
+++ b/tvix/castore/src/proto/tests/directory.rs
@@ -1,4 +1,6 @@
-use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
+use crate::proto::{
+    Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError, ValidateNodeError,
+};
 use lazy_static::lazy_static;
 
 lazy_static! {
@@ -171,7 +173,7 @@ fn validate_invalid_names() {
             ..Default::default()
         };
         match d.validate().expect_err("must fail") {
-            ValidateDirectoryError::InvalidName(n) => {
+            ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
                 assert_eq!(n, b"")
             }
             _ => panic!("unexpected error"),
@@ -188,7 +190,7 @@ fn validate_invalid_names() {
             ..Default::default()
         };
         match d.validate().expect_err("must fail") {
-            ValidateDirectoryError::InvalidName(n) => {
+            ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
                 assert_eq!(n, b".")
             }
             _ => panic!("unexpected error"),
@@ -206,7 +208,7 @@ fn validate_invalid_names() {
             ..Default::default()
         };
         match d.validate().expect_err("must fail") {
-            ValidateDirectoryError::InvalidName(n) => {
+            ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
                 assert_eq!(n, b"..")
             }
             _ => panic!("unexpected error"),
@@ -222,7 +224,7 @@ fn validate_invalid_names() {
             ..Default::default()
         };
         match d.validate().expect_err("must fail") {
-            ValidateDirectoryError::InvalidName(n) => {
+            ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
                 assert_eq!(n, b"\x00")
             }
             _ => panic!("unexpected error"),
@@ -238,7 +240,7 @@ fn validate_invalid_names() {
             ..Default::default()
         };
         match d.validate().expect_err("must fail") {
-            ValidateDirectoryError::InvalidName(n) => {
+            ValidateDirectoryError::InvalidNode(n, ValidateNodeError::InvalidName()) => {
                 assert_eq!(n, b"foo/bar")
             }
             _ => panic!("unexpected error"),
@@ -257,7 +259,7 @@ fn validate_invalid_digest() {
         ..Default::default()
     };
     match d.validate().expect_err("must fail") {
-        ValidateDirectoryError::InvalidDigestLen(n) => {
+        ValidateDirectoryError::InvalidNode(_, ValidateNodeError::InvalidDigestLen(n)) => {
             assert_eq!(n, 2)
         }
         _ => panic!("unexpected error"),
diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs
index 033fb79dbb..7921adc406 100644
--- a/tvix/store/src/proto/mod.rs
+++ b/tvix/store/src/proto/mod.rs
@@ -3,10 +3,7 @@ use data_encoding::BASE64;
 // https://github.com/hyperium/tonic/issues/1056
 use nix_compat::store_path::{self, StorePath};
 use thiserror::Error;
-use tvix_castore::{
-    proto::{self as castorepb, NamedNode},
-    B3Digest, B3_LEN,
-};
+use tvix_castore::proto::{self as castorepb, NamedNode, ValidateNodeError};
 
 mod grpc_pathinfoservice_wrapper;
 
@@ -34,14 +31,14 @@ pub enum ValidatePathInfoError {
     #[error("No node present")]
     NoNodePresent(),
 
-    /// Invalid node name encountered.
+    /// Node fails validation
+    #[error("Invalid root node: {:?}", .0.to_string())]
+    InvalidRootNode(ValidateNodeError),
+
+    /// Invalid node name encountered. Root nodes in PathInfos have more strict name requirements
     #[error("Failed to parse {0:?} as StorePath: {1}")]
     InvalidNodeName(Vec<u8>, store_path::Error),
 
-    /// The digest the (root) node refers to has invalid length.
-    #[error("Invalid Digest length: expected {}, got {}", B3_LEN, .0)]
-    InvalidNodeDigestLen(usize),
-
     /// The digest in narinfo.nar_sha256 has an invalid len.
     #[error("Invalid narinfo.nar_sha256 length: expected {}, got {}", 32, .0)]
     InvalidNarSha256DigestLen(usize),
@@ -146,27 +143,8 @@ impl PathInfo {
                 Err(ValidatePathInfoError::NoNodePresent())?
             }
             Some(castorepb::Node { node: Some(node) }) => {
-                match node {
-                    // for a directory root node, ensure the digest has the appropriate size.
-                    castorepb::node::Node::Directory(directory_node) => {
-                        if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
-                            return Err(ValidatePathInfoError::InvalidNodeDigestLen(
-                                directory_node.digest.len(),
-                            ));
-                        }
-                    }
-                    // for a file root node, ensure the digest has the appropriate size.
-                    castorepb::node::Node::File(file_node) => {
-                        // ensure the digest has the appropriate size.
-                        if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
-                            return Err(ValidatePathInfoError::InvalidNodeDigestLen(
-                                file_node.digest.len(),
-                            ));
-                        }
-                    }
-                    // nothing to do specifically for symlinks
-                    castorepb::node::Node::Symlink(_) => {}
-                }
+                node.validate()
+                    .map_err(|e| ValidatePathInfoError::InvalidRootNode(e))?;
                 // parse the name of the node itself and return
                 parse_node_name_root(node.get_name(), ValidatePathInfoError::InvalidNodeName)?
             }
diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs
index cfecbac3b8..5e1ae9c45b 100644
--- a/tvix/store/src/proto/tests/pathinfo.rs
+++ b/tvix/store/src/proto/tests/pathinfo.rs
@@ -43,7 +43,7 @@ fn validate_no_node(
         digest: Bytes::new(),
         size: 0,
     },
-    Err(ValidatePathInfoError::InvalidNodeDigestLen(0));
+    Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0)));
     "invalid digest length"
 )]
 #[test_case(
@@ -88,7 +88,7 @@ fn validate_directory(
         digest: Bytes::new(),
         ..Default::default()
     },
-    Err(ValidatePathInfoError::InvalidNodeDigestLen(0));
+    Err(ValidatePathInfoError::InvalidRootNode(castorepb::ValidateNodeError::InvalidDigestLen(0)));
     "invalid digest length"
 )]
 #[test_case(
@@ -120,7 +120,7 @@ fn validate_file(
 #[test_case(
     castorepb::SymlinkNode {
         name: DUMMY_NAME.into(),
-        ..Default::default()
+        target: "foo".into(),
     },
     Ok(StorePath::from_str(DUMMY_NAME).expect("must succeed"));
     "ok"
@@ -128,7 +128,7 @@ fn validate_file(
 #[test_case(
     castorepb::SymlinkNode {
         name: "invalid".into(),
-        ..Default::default()
+        target: "foo".into(),
     },
     Err(ValidatePathInfoError::InvalidNodeName(
         "invalid".into(),
@@ -239,3 +239,26 @@ fn validate_inconsistent_narinfo_reference_name_digest() {
         e => panic!("unexpected error: {:?}", e),
     }
 }
+
+/// Create a node with an empty symlink target, and ensure it fails validation.
+#[test]
+fn validate_symlink_empty_target_invalid() {
+    let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode {
+        name: "foo".into(),
+        target: "".into(),
+    });
+
+    node.validate().expect_err("must fail validation");
+}
+
+/// Create a node with a symlink target including null bytes, and ensure it
+/// fails validation.
+#[test]
+fn validate_symlink_target_null_byte_invalid() {
+    let node = castorepb::node::Node::Symlink(castorepb::SymlinkNode {
+        name: "foo".into(),
+        target: "foo\0".into(),
+    });
+
+    node.validate().expect_err("must fail validation");
+}