about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2024-05-08T07·13+0000
committeredef <edef@edef.eu>2024-05-08T15·30+0000
commit31d73cd443311c5ef4bfbd8c2b66e3b9691340ec (patch)
tree906c13131daf87bcc726db9453c8226d68a3728b /tvix
parent17a7dac94f29d8151ecebfbf11d5b83cf0c4f415 (diff)
refactor(nix-compat/nar/reader): reuse prev_name allocation r/8095
We reuse the prev_name allocation for Entry, instead of allocating and
returning a separate Vec.

We encode the `prev_name: None` case as an empty vector, since we don't
allow empty names anyway, and the sorting is equivalent.

Change-Id: I975b37ff873805f5ff099bc82128706891052247
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11607
Reviewed-by: Brian Olsen <me@griff.name>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix')
-rw-r--r--tvix/nix-compat/src/nar/reader/async/mod.rs31
-rw-r--r--tvix/nix-compat/src/nar/reader/async/test.rs12
-rw-r--r--tvix/nix-compat/src/nar/reader/mod.rs31
-rw-r--r--tvix/nix-compat/src/nar/reader/read.rs32
-rw-r--r--tvix/nix-compat/src/nar/reader/test.rs12
-rw-r--r--tvix/nix-compat/src/wire/bytes/mod.rs1
-rw-r--r--tvix/store/src/nar/import.rs2
-rw-r--r--tvix/tools/crunch-v2/src/main.rs2
8 files changed, 73 insertions, 50 deletions
diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs
index 048283a67c..0808fba38c 100644
--- a/tvix/nix-compat/src/nar/reader/async/mod.rs
+++ b/tvix/nix-compat/src/nar/reader/async/mod.rs
@@ -1,4 +1,5 @@
 use std::{
+    mem::MaybeUninit,
     pin::Pin,
     task::{self, Poll},
 };
@@ -110,11 +111,11 @@ pub struct DirReader<'a, 'r> {
     reader: &'a mut Reader<'r>,
     /// Previous directory entry name.
     /// We have to hang onto this to enforce name monotonicity.
-    prev_name: Option<Vec<u8>>,
+    prev_name: Vec<u8>,
 }
 
 pub struct Entry<'a, 'r> {
-    pub name: Vec<u8>,
+    pub name: &'a [u8],
     pub node: Node<'a, 'r>,
 }
 
@@ -122,7 +123,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
     fn new(reader: &'a mut Reader<'r>) -> Self {
         Self {
             reader,
-            prev_name: None,
+            prev_name: vec![],
         }
     }
 
@@ -138,7 +139,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
     pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
         // COME FROM the previous iteration: if we've already read an entry,
         // read its terminating TOK_PAR here.
-        if self.prev_name.is_some() {
+        if !self.prev_name.is_empty() {
             read::token(self.reader, &nar::wire::TOK_PAR).await?;
         }
 
@@ -146,30 +147,26 @@ impl<'a, 'r> DirReader<'a, 'r> {
             return Ok(None);
         }
 
-        let name = wire::read_bytes(self.reader, 1..=nar::wire::MAX_NAME_LEN).await?;
+        let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1];
+        let name =
+            wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?;
 
         if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." {
             return Err(InvalidData.into());
         }
 
         // Enforce strict monotonicity of directory entry names.
-        match &mut self.prev_name {
-            None => {
-                self.prev_name = Some(name.clone());
-            }
-            Some(prev_name) => {
-                if *prev_name >= name {
-                    return Err(InvalidData.into());
-                }
-
-                name[..].clone_into(prev_name);
-            }
+        if &self.prev_name[..] >= name {
+            return Err(InvalidData.into());
         }
 
+        self.prev_name.clear();
+        self.prev_name.extend_from_slice(name);
+
         read::token(self.reader, &nar::wire::TOK_NOD).await?;
 
         Ok(Some(Entry {
-            name,
+            name: &self.prev_name,
             node: Node::new(self.reader).await?,
         }))
     }
diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs
index 58bb651fca..7bc1f8942f 100644
--- a/tvix/nix-compat/src/nar/reader/async/test.rs
+++ b/tvix/nix-compat/src/nar/reader/async/test.rs
@@ -80,7 +80,7 @@ async fn complicated() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(mut subdir_reader) => {
@@ -133,7 +133,7 @@ async fn file_read_abandoned() {
                     .expect("next must succeed")
                     .expect("must be some");
 
-                assert_eq!(&b".keep"[..], &entry.name);
+                assert_eq!(b".keep", entry.name);
                 // don't bother to finish reading it.
             };
 
@@ -183,7 +183,7 @@ async fn dir_read_abandoned() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(_) => {
@@ -239,7 +239,7 @@ async fn dir_read_after_none() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(mut subdir_reader) => {
@@ -280,7 +280,7 @@ async fn dir_read_after_none() {
 }
 
 async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
-    assert_eq!(name.as_bytes(), &entry.name);
+    assert_eq!(name.as_bytes(), entry.name);
 
     match entry.node {
         nar::reader::Node::File {
@@ -299,7 +299,7 @@ fn must_be_symlink(
     exp_target: &'static str,
     entry: nar::reader::Entry<'_, '_>,
 ) {
-    assert_eq!(name.as_bytes(), &entry.name);
+    assert_eq!(name.as_bytes(), entry.name);
 
     match entry.node {
         nar::reader::Node::Symlink { target } => {
diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs
index bddf175080..9e9237ead3 100644
--- a/tvix/nix-compat/src/nar/reader/mod.rs
+++ b/tvix/nix-compat/src/nar/reader/mod.rs
@@ -264,11 +264,11 @@ pub struct DirReader<'a, 'r> {
     reader: ArchiveReader<'a, 'r>,
     /// Previous directory entry name.
     /// We have to hang onto this to enforce name monotonicity.
-    prev_name: Option<Vec<u8>>,
+    prev_name: Vec<u8>,
 }
 
 pub struct Entry<'a, 'r> {
-    pub name: Vec<u8>,
+    pub name: &'a [u8],
     pub node: Node<'a, 'r>,
 }
 
@@ -276,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
     fn new(reader: ArchiveReader<'a, 'r>) -> Self {
         Self {
             reader,
-            prev_name: None,
+            prev_name: vec![],
         }
     }
 
@@ -295,7 +295,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
 
         // COME FROM the previous iteration: if we've already read an entry,
         // read its terminating TOK_PAR here.
-        if self.prev_name.is_some() {
+        if !self.prev_name.is_empty() {
             try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
         }
 
@@ -306,9 +306,10 @@ impl<'a, 'r> DirReader<'a, 'r> {
             return Ok(None);
         }
 
+        let mut name = [0; wire::MAX_NAME_LEN + 1];
         let name = try_or_poison!(
             self.reader,
-            read::bytes(self.reader.inner, wire::MAX_NAME_LEN)
+            read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN)
         );
 
         if name.is_empty()
@@ -322,24 +323,18 @@ impl<'a, 'r> DirReader<'a, 'r> {
         }
 
         // Enforce strict monotonicity of directory entry names.
-        match &mut self.prev_name {
-            None => {
-                self.prev_name = Some(name.clone());
-            }
-            Some(prev_name) => {
-                if *prev_name >= name {
-                    self.reader.status.poison();
-                    return Err(InvalidData.into());
-                }
-
-                name[..].clone_into(prev_name);
-            }
+        if &self.prev_name[..] >= name {
+            self.reader.status.poison();
+            return Err(InvalidData.into());
         }
 
+        self.prev_name.clear();
+        self.prev_name.extend_from_slice(name);
+
         try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
 
         Ok(Some(Entry {
-            name,
+            name: &self.prev_name,
             // Don't need to worry about poisoning here: Node::new will do it for us if needed
             node: Node::new(self.reader.child())?,
         }))
diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs
index 1ce1613764..9938581f2a 100644
--- a/tvix/nix-compat/src/nar/reader/read.rs
+++ b/tvix/nix-compat/src/nar/reader/read.rs
@@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> {
     Ok(u64::from_le_bytes(buf))
 }
 
+/// Consume a byte string from the reader into a provided buffer,
+/// returning the data bytes.
+pub fn bytes_buf<'a, const N: usize>(
+    reader: &mut Reader,
+    buf: &'a mut [u8; N],
+    max_len: usize,
+) -> io::Result<&'a [u8]> {
+    assert_eq!(N % 8, 0);
+    assert!(max_len <= N);
+
+    // read the length, and reject excessively large values
+    let len = self::u64(reader)?;
+    if len > max_len as u64 {
+        return Err(InvalidData.into());
+    }
+    // we know the length fits in a usize now
+    let len = len as usize;
+
+    // read the data and padding into a buffer
+    let buf_len = (len + 7) & !7;
+    reader.read_exact(&mut buf[..buf_len])?;
+
+    // verify that the padding is all zeroes
+    for &b in &buf[len..buf_len] {
+        if b != 0 {
+            return Err(InvalidData.into());
+        }
+    }
+
+    Ok(&buf[..len])
+}
+
 /// Consume a byte string of up to `max_len` bytes from the reader.
 pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> {
     assert!(max_len <= isize::MAX as usize);
diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs
index 02dc4767c9..63e4fb289f 100644
--- a/tvix/nix-compat/src/nar/reader/test.rs
+++ b/tvix/nix-compat/src/nar/reader/test.rs
@@ -71,7 +71,7 @@ fn complicated() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(mut subdir_reader) => {
@@ -117,7 +117,7 @@ fn file_read_abandoned() {
                     .expect("next must succeed")
                     .expect("must be some");
 
-                assert_eq!(&b".keep"[..], &entry.name);
+                assert_eq!(b".keep", entry.name);
                 // don't bother to finish reading it.
             };
 
@@ -162,7 +162,7 @@ fn dir_read_abandoned() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(_) => {
@@ -213,7 +213,7 @@ fn dir_read_after_none() {
                     .expect("next must be some")
                     .expect("must be some");
 
-                assert_eq!(&b"keep"[..], &entry.name);
+                assert_eq!(b"keep", entry.name);
 
                 match entry.node {
                     nar::reader::Node::Directory(mut subdir_reader) => {
@@ -248,7 +248,7 @@ fn dir_read_after_none() {
 }
 
 fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
-    assert_eq!(name.as_bytes(), &entry.name);
+    assert_eq!(name.as_bytes(), entry.name);
 
     match entry.node {
         nar::reader::Node::File {
@@ -267,7 +267,7 @@ fn must_be_symlink(
     exp_target: &'static str,
     entry: nar::reader::Entry<'_, '_>,
 ) {
-    assert_eq!(name.as_bytes(), &entry.name);
+    assert_eq!(name.as_bytes(), entry.name);
 
     match entry.node {
         nar::reader::Node::Symlink { target } => {
diff --git a/tvix/nix-compat/src/wire/bytes/mod.rs b/tvix/nix-compat/src/wire/bytes/mod.rs
index f48b5000a5..2ed071e379 100644
--- a/tvix/nix-compat/src/wire/bytes/mod.rs
+++ b/tvix/nix-compat/src/wire/bytes/mod.rs
@@ -82,7 +82,6 @@ where
     Ok(buf)
 }
 
-#[allow(dead_code)]
 pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>(
     reader: &mut R,
     buf: &'a mut [MaybeUninit<u8>; N],
diff --git a/tvix/store/src/nar/import.rs b/tvix/store/src/nar/import.rs
index 7a9f1231e7..3d7c50014a 100644
--- a/tvix/store/src/nar/import.rs
+++ b/tvix/store/src/nar/import.rs
@@ -87,7 +87,7 @@ where
                 let mut path = path.clone();
 
                 // valid NAR names are valid castore names
-                path.try_push(&entry.name)
+                path.try_push(entry.name)
                     .expect("Tvix bug: failed to join name");
 
                 let entry = Box::pin(produce_nar_inner(
diff --git a/tvix/tools/crunch-v2/src/main.rs b/tvix/tools/crunch-v2/src/main.rs
index a5d538f6be..5be8c28e29 100644
--- a/tvix/tools/crunch-v2/src/main.rs
+++ b/tvix/tools/crunch-v2/src/main.rs
@@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec<u8>, avg_chunk_size: u32) -> Result<proto::
             let mut symlinks = vec![];
 
             while let Some(node) = reader.next()? {
-                match ingest(node.node, node.name, avg_chunk_size)? {
+                match ingest(node.node, node.name.to_owned(), avg_chunk_size)? {
                     proto::path::Node::Directory(node) => {
                         directories.push(node);
                     }